[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 4/4] nested vmx: enable VMCS shadowing feature
The current logic for handling the non-root VMREAD/VMWRITE is by VM-Exit and emulate, which may bring certain overhead. On new Intel platform, it introduces a new feature called VMCS shadowing, where non-root VMREAD/VMWRITE will not trigger VM-Exit, and the hardware will read/write the virtual VMCS instead. This is proved to have performance improvement with the feature. Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx> --- xen/arch/x86/hvm/vmx/vmcs.c | 68 +++++++++++++++++++++++++++++++++++- xen/arch/x86/hvm/vmx/vvmx.c | 27 ++++++++++++++ xen/include/asm-x86/hvm/vmx/vmcs.h | 18 +++++++++- 3 files changed, 111 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 82a8d91..95ddf35 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -32,6 +32,7 @@ #include <asm/hvm/support.h> #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vmcs.h> +#include <asm/hvm/nestedhvm.h> #include <asm/flushtlb.h> #include <xen/event.h> #include <xen/kernel.h> @@ -91,6 +92,7 @@ static void __init vmx_display_features(void) P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest"); P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization"); P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery"); + P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing"); #undef P if ( !printed ) @@ -132,6 +134,7 @@ static int vmx_init_vmcs_config(void) u32 _vmx_cpu_based_exec_control; u32 _vmx_secondary_exec_control = 0; u64 _vmx_ept_vpid_cap = 0; + u64 _vmx_misc_cap = 0; u32 _vmx_vmexit_control; u32 _vmx_vmentry_control; bool_t mismatch = 0; @@ -179,6 +182,9 @@ static int vmx_init_vmcs_config(void) SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_ENABLE_INVPCID); + rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap); + if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL ) + opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING; if ( opt_vpid_enabled ) opt |= SECONDARY_EXEC_ENABLE_VPID; if ( opt_unrestricted_guest_enabled ) @@ -382,6 +388,8 @@ static void __vmx_clear_vmcs(void *info) if ( arch_vmx->active_cpu == smp_processor_id() ) { __vmpclear(virt_to_maddr(arch_vmx->vmcs)); + if ( arch_vmx->vmcs_shadow_maddr ) + __vmpclear(arch_vmx->vmcs_shadow_maddr); arch_vmx->active_cpu = -1; arch_vmx->launched = 0; @@ -710,6 +718,8 @@ void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to) spin_lock(&vmx->vmcs_lock); __vmpclear(virt_to_maddr(from)); + if ( vmx->vmcs_shadow_maddr ) + __vmpclear(vmx->vmcs_shadow_maddr); __vmptrld(virt_to_maddr(to)); vmx->vmcs = to; @@ -761,6 +771,7 @@ static int construct_vmcs(struct vcpu *v) unsigned long sysenter_eip; u32 vmexit_ctl = vmx_vmexit_control; u32 vmentry_ctl = vmx_vmentry_control; + int ret = 0; vmx_vmcs_enter(v); @@ -816,7 +827,10 @@ static int construct_vmcs(struct vcpu *v) unsigned long *msr_bitmap = alloc_xenheap_page(); if ( msr_bitmap == NULL ) - return -ENOMEM; + { + ret = -ENOMEM; + goto out; + } memset(msr_bitmap, ~0, PAGE_SIZE); v->arch.hvm_vmx.msr_bitmap = msr_bitmap; @@ -843,6 +857,47 @@ static int construct_vmcs(struct vcpu *v) } } + /* non-root VMREAD/VMWRITE bitmap. */ + if ( cpu_has_vmx_vmcs_shadowing && nestedhvm_enabled(d) ) + { + struct page_info *vmread_bitmap, *vmwrite_bitmap; + + vmread_bitmap = alloc_domheap_page(d, 0); + if ( !vmread_bitmap ) + { + gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap failed\n"); + ret = -ENOMEM; + goto out1; + } + v->arch.hvm_vmx.vmread_bitmap = page_to_virt(vmread_bitmap); + + vmwrite_bitmap = alloc_domheap_page(d, 0); + if ( !vmwrite_bitmap ) + { + gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap failed\n"); + ret = -ENOMEM; + goto out2; + } + v->arch.hvm_vmx.vmwrite_bitmap = page_to_virt(vmwrite_bitmap); + + clear_page(v->arch.hvm_vmx.vmread_bitmap); + clear_page(v->arch.hvm_vmx.vmwrite_bitmap); + + /* + * For the following 4 encodings, we need to handle them in VMM. + * Let them vmexit as usual. + */ + set_bit(IO_BITMAP_A, v->arch.hvm_vmx.vmwrite_bitmap); + set_bit(IO_BITMAP_A_HIGH, v->arch.hvm_vmx.vmwrite_bitmap); + set_bit(IO_BITMAP_B, v->arch.hvm_vmx.vmwrite_bitmap); + set_bit(IO_BITMAP_B_HIGH, v->arch.hvm_vmx.vmwrite_bitmap); + + __vmwrite(VMREAD_BITMAP, + virt_to_maddr(v->arch.hvm_vmx.vmread_bitmap)); + __vmwrite(VMWRITE_BITMAP, + virt_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap)); + } + /* I/O access bitmap. */ __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0)); __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE)); @@ -997,6 +1052,13 @@ static int construct_vmcs(struct vcpu *v) vmx_vlapic_msr_changed(v); return 0; + +out2: + free_domheap_page(virt_to_page(v->arch.hvm_vmx.vmread_bitmap)); +out1: + free_xenheap_page(v->arch.hvm_vmx.msr_bitmap); +out: + return ret; } int vmx_read_guest_msr(u32 msr, u64 *val) @@ -1154,6 +1216,10 @@ void vmx_destroy_vmcs(struct vcpu *v) free_xenheap_page(v->arch.hvm_vmx.host_msr_area); free_xenheap_page(v->arch.hvm_vmx.msr_area); free_xenheap_page(v->arch.hvm_vmx.msr_bitmap); + if ( v->arch.hvm_vmx.vmread_bitmap ) + free_domheap_page(virt_to_page(v->arch.hvm_vmx.vmread_bitmap)); + if ( v->arch.hvm_vmx.vmwrite_bitmap ) + free_domheap_page(virt_to_page(v->arch.hvm_vmx.vmwrite_bitmap)); } void vm_launch_fail(void) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 93a1502..ce05744 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -996,6 +996,28 @@ static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu) return 0; } +static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs) +{ + unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs); + paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT; + + __vmpclear(vvmcs_maddr); + vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK; + v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr; + __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr); +} + +static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs) +{ + unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs); + paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT; + + __vmpclear(vvmcs_maddr); + vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK; + v->arch.hvm_vmx.vmcs_shadow_maddr = 0; + __vmwrite(VMCS_LINK_POINTER, ~0ul); +} + static void virtual_vmentry(struct cpu_user_regs *regs) { struct vcpu *v = current; @@ -1437,6 +1459,9 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs) __map_msr_bitmap(v); } + if ( cpu_has_vmx_vmcs_shadowing ) + nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx); + vmreturn(regs, VMSUCCEED); out: @@ -1487,6 +1512,8 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs) if ( gpa == nvcpu->nv_vvmcxaddr ) { + if ( cpu_has_vmx_vmcs_shadowing ) + nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx); clear_vvmcs_launched(&nvmx->launched_list, domain_page_map_to_mfn(nvcpu->nv_vvmcx)); nvmx_purge_vvmcs(v); diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 652dc21..0d246b0 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -81,6 +81,8 @@ struct vmx_domain { struct arch_vmx_struct { /* Virtual address of VMCS. */ struct vmcs_struct *vmcs; + /* VMCS shadow machine address. */ + paddr_t vmcs_shadow_maddr; /* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */ spinlock_t vmcs_lock; @@ -125,6 +127,10 @@ struct arch_vmx_struct { /* Remember EFLAGS while in virtual 8086 mode */ uint32_t vm86_saved_eflags; int hostenv_migrated; + + /* Bitmap to control vmexit policy for Non-root VMREAD/VMWRITE */ + unsigned long *vmread_bitmap; + unsigned long *vmwrite_bitmap; }; int vmx_create_vmcs(struct vcpu *v); @@ -191,6 +197,7 @@ extern u32 vmx_vmentry_control; #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMCS_SHADOWING 0x00004000 extern u32 vmx_secondary_exec_control; extern bool_t cpu_has_vmx_ins_outs_instr_info; @@ -205,6 +212,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info; #define VMX_EPT_INVEPT_SINGLE_CONTEXT 0x02000000 #define VMX_EPT_INVEPT_ALL_CONTEXT 0x04000000 +#define VMX_MISC_VMWRITE_ALL 0x20000000 + #define VMX_VPID_INVVPID_INSTRUCTION 0x100000000ULL #define VMX_VPID_INVVPID_INDIVIDUAL_ADDR 0x10000000000ULL #define VMX_VPID_INVVPID_SINGLE_CONTEXT 0x20000000000ULL @@ -244,7 +253,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info; (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT) #define cpu_has_vmx_virtual_intr_delivery \ (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) -#define cpu_has_vmx_vmcs_shadowing 0 +#define cpu_has_vmx_vmcs_shadowing \ + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VMCS_SHADOWING) + +#define VMCS_RID_TYPE_MASK 0x80000000 /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define VMX_INTR_SHADOW_STI 0x00000001 @@ -305,6 +317,10 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |