x86/EPT: work around hardware erratum setting A bit Since commit 191b3f3344ee ("p2m/ept: enable PML in p2m-ept for log-dirty"), the A and D bits of EPT paging entries are set unconditionally, regardless of whether PML is enabled or not. This causes a regression in Xen 4.6 on some processors due to Intel Errata AVR41 -- HVM guests get severe memory corruption when the A bit is set due to incorrect TLB flushing on mov to cr3. The errata affects the Atom C2000 family (Avoton). To fix, do not set the A bit on this processor family. Signed-off-by: Ross Lagerwall Move feature suppression to feature detection code. Add command line override. Signed-off-by: Jan Beulich --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -705,19 +705,28 @@ virtualization, to allow the L1 hypervis does not provide VM\_ENTRY\_LOAD\_GUEST\_PAT. ### ept (Intel) -> `= List of ( pml )` +> `= List of ( pml | ad )` + +Controls EPT related features. + +> Sub-options: + +> `pml` > Default: `false` -Controls EPT related features. Currently only Page Modification Logging (PML) is -the controllable feature as boolean type. +>> PML is a new hardware feature in Intel's Broadwell Server and further +>> platforms which reduces hypervisor overhead of log-dirty mechanism by +>> automatically recording GPAs (guest physical addresses) when guest memory +>> gets dirty, and therefore significantly reducing number of EPT violation +>> caused by write protection of guest memory, which is a necessity to +>> implement log-dirty mechanism before PML. + +> `ad` + +> Default: Hardware dependent -PML is a new hardware feature in Intel's Broadwell Server and further platforms -which reduces hypervisor overhead of log-dirty mechanism by automatically -recording GPAs (guest physical addresses) when guest memory gets dirty, and -therefore significantly reducing number of EPT violation caused by write -protection of guest memory, which is a necessity to implement log-dirty -mechanism before PML. +>> Have hardware keep accessed/dirty (A/D) bits updated. ### gdb > `= [/][,DPS[,[,[,[,]]]] | pci | amt ] ` --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -64,12 +64,14 @@ static unsigned int __read_mostly ple_wi integer_param("ple_window", ple_window); static bool_t __read_mostly opt_pml_enabled = 0; +static s8 __read_mostly opt_ept_ad = -1; /* * The 'ept' parameter controls functionalities that depend on, or impact the * EPT mechanism. Optional comma separated value may contain: * * pml Enable PML + * ad Use A/D bits */ static void __init parse_ept_param(char *s) { @@ -87,6 +89,8 @@ static void __init parse_ept_param(char if ( !strcmp(s, "pml") ) opt_pml_enabled = val; + else if ( !strcmp(s, "ad") ) + opt_ept_ad = val; s = ss + 1; } while ( ss ); @@ -268,6 +272,13 @@ static int vmx_init_vmcs_config(void) { rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap); + if ( !opt_ept_ad ) + _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT; + else if ( /* Work around Erratum AVR41 on Avoton processors. */ + boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d && + opt_ept_ad < 0 ) + _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT; + /* * Additional sanity checking before using EPT: * 1) the CPU we are running on must support EPT WB, as we will set --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -130,14 +130,14 @@ static void ept_p2m_type_to_flags(struct break; case p2m_ram_rw: entry->r = entry->w = entry->x = 1; - entry->a = entry->d = 1; + entry->a = entry->d = !!cpu_has_vmx_ept_ad; break; case p2m_mmio_direct: entry->r = entry->x = 1; entry->w = !rangeset_contains_singleton(mmio_ro_ranges, entry->mfn); - entry->a = 1; - entry->d = entry->w; + entry->a = !!cpu_has_vmx_ept_ad; + entry->d = entry->w && cpu_has_vmx_ept_ad; break; case p2m_ram_logdirty: entry->r = entry->x = 1; @@ -152,7 +152,7 @@ static void ept_p2m_type_to_flags(struct entry->w = 1; else entry->w = 0; - entry->a = 1; + entry->a = !!cpu_has_vmx_ept_ad; /* For both PML or non-PML cases we clear D bit anyway */ entry->d = 0; break; @@ -160,20 +160,20 @@ static void ept_p2m_type_to_flags(struct case p2m_ram_shared: entry->r = entry->x = 1; entry->w = 0; - entry->a = 1; + entry->a = !!cpu_has_vmx_ept_ad; entry->d = 0; break; case p2m_grant_map_rw: case p2m_map_foreign: entry->r = entry->w = 1; entry->x = 0; - entry->a = entry->d = 1; + entry->a = entry->d = !!cpu_has_vmx_ept_ad; break; case p2m_grant_map_ro: case p2m_mmio_write_dm: entry->r = 1; entry->w = entry->x = 0; - entry->a = 1; + entry->a = !!cpu_has_vmx_ept_ad; entry->d = 0; break; } @@ -233,7 +233,7 @@ static int ept_set_middle_entry(struct p ept_entry->r = ept_entry->w = ept_entry->x = 1; /* Manually set A bit to avoid overhead of MMU having to write it later. */ - ept_entry->a = 1; + ept_entry->a = !!cpu_has_vmx_ept_ad; ept_entry->suppress_ve = 1; --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -262,6 +262,7 @@ extern uint8_t posted_intr_vector; (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_1GB) #define cpu_has_vmx_ept_2mb \ (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_2MB) +#define cpu_has_vmx_ept_ad (vmx_ept_vpid_cap & VMX_EPT_AD_BIT) #define cpu_has_vmx_ept_invept_single_context \ (vmx_ept_vpid_cap & VMX_EPT_INVEPT_SINGLE_CONTEXT)