[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 4/4] nested vmx: enable VMCS shadowing feature



The current logic for handling the non-root VMREAD/VMWRITE is by
VM-Exit and emulate, which may bring certain overhead.

On new Intel platform, it introduces a new feature called VMCS
shadowing, where non-root VMREAD/VMWRITE will not trigger VM-Exit,
and the hardware will read/write the virtual VMCS instead.
This is proved to have performance improvement with the feature.

Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
---
 xen/arch/x86/hvm/vmx/vmcs.c        |   68 +++++++++++++++++++++++++++++++++++-
 xen/arch/x86/hvm/vmx/vvmx.c        |   27 ++++++++++++++
 xen/include/asm-x86/hvm/vmx/vmcs.h |   18 +++++++++-
 3 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 82a8d91..95ddf35 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -32,6 +32,7 @@
 #include <asm/hvm/support.h>
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/flushtlb.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
@@ -91,6 +92,7 @@ static void __init vmx_display_features(void)
     P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
     P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
     P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
+    P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
 #undef P
 
     if ( !printed )
@@ -132,6 +134,7 @@ static int vmx_init_vmcs_config(void)
     u32 _vmx_cpu_based_exec_control;
     u32 _vmx_secondary_exec_control = 0;
     u64 _vmx_ept_vpid_cap = 0;
+    u64 _vmx_misc_cap = 0;
     u32 _vmx_vmexit_control;
     u32 _vmx_vmentry_control;
     bool_t mismatch = 0;
@@ -179,6 +182,9 @@ static int vmx_init_vmcs_config(void)
                SECONDARY_EXEC_ENABLE_RDTSCP |
                SECONDARY_EXEC_PAUSE_LOOP_EXITING |
                SECONDARY_EXEC_ENABLE_INVPCID);
+        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
+        if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
+            opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
         if ( opt_vpid_enabled )
             opt |= SECONDARY_EXEC_ENABLE_VPID;
         if ( opt_unrestricted_guest_enabled )
@@ -382,6 +388,8 @@ static void __vmx_clear_vmcs(void *info)
     if ( arch_vmx->active_cpu == smp_processor_id() )
     {
         __vmpclear(virt_to_maddr(arch_vmx->vmcs));
+        if ( arch_vmx->vmcs_shadow_maddr )
+            __vmpclear(arch_vmx->vmcs_shadow_maddr);
 
         arch_vmx->active_cpu = -1;
         arch_vmx->launched   = 0;
@@ -710,6 +718,8 @@ void vmx_vmcs_switch(struct vmcs_struct *from, struct 
vmcs_struct *to)
     spin_lock(&vmx->vmcs_lock);
 
     __vmpclear(virt_to_maddr(from));
+    if ( vmx->vmcs_shadow_maddr )
+        __vmpclear(vmx->vmcs_shadow_maddr);
     __vmptrld(virt_to_maddr(to));
 
     vmx->vmcs = to;
@@ -761,6 +771,7 @@ static int construct_vmcs(struct vcpu *v)
     unsigned long sysenter_eip;
     u32 vmexit_ctl = vmx_vmexit_control;
     u32 vmentry_ctl = vmx_vmentry_control;
+    int ret = 0;
 
     vmx_vmcs_enter(v);
 
@@ -816,7 +827,10 @@ static int construct_vmcs(struct vcpu *v)
         unsigned long *msr_bitmap = alloc_xenheap_page();
 
         if ( msr_bitmap == NULL )
-            return -ENOMEM;
+        {
+            ret = -ENOMEM;
+            goto out;
+        }
 
         memset(msr_bitmap, ~0, PAGE_SIZE);
         v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
@@ -843,6 +857,47 @@ static int construct_vmcs(struct vcpu *v)
         }
     }
 
+    /* non-root VMREAD/VMWRITE bitmap. */
+    if ( cpu_has_vmx_vmcs_shadowing && nestedhvm_enabled(d) )
+    {
+        struct page_info *vmread_bitmap, *vmwrite_bitmap;
+
+        vmread_bitmap = alloc_domheap_page(d, 0);
+        if ( !vmread_bitmap )
+        {
+            gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap 
failed\n");
+            ret = -ENOMEM;
+            goto out1;
+        }
+        v->arch.hvm_vmx.vmread_bitmap = page_to_virt(vmread_bitmap);
+
+        vmwrite_bitmap = alloc_domheap_page(d, 0);
+        if ( !vmwrite_bitmap )
+        {
+            gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap 
failed\n");
+            ret = -ENOMEM;
+            goto out2;
+        }
+        v->arch.hvm_vmx.vmwrite_bitmap = page_to_virt(vmwrite_bitmap);
+
+        clear_page(v->arch.hvm_vmx.vmread_bitmap);
+        clear_page(v->arch.hvm_vmx.vmwrite_bitmap);
+
+        /* 
+         * For the following 4 encodings, we need to handle them in VMM.
+         * Let them vmexit as usual.
+         */
+        set_bit(IO_BITMAP_A, v->arch.hvm_vmx.vmwrite_bitmap);
+        set_bit(IO_BITMAP_A_HIGH, v->arch.hvm_vmx.vmwrite_bitmap);
+        set_bit(IO_BITMAP_B, v->arch.hvm_vmx.vmwrite_bitmap);
+        set_bit(IO_BITMAP_B_HIGH, v->arch.hvm_vmx.vmwrite_bitmap);
+
+        __vmwrite(VMREAD_BITMAP,
+                  virt_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
+        __vmwrite(VMWRITE_BITMAP,
+                  virt_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
+    }
+
     /* I/O access bitmap. */
     __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
     __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
@@ -997,6 +1052,13 @@ static int construct_vmcs(struct vcpu *v)
     vmx_vlapic_msr_changed(v);
 
     return 0;
+
+out2:
+    free_domheap_page(virt_to_page(v->arch.hvm_vmx.vmread_bitmap));
+out1:
+    free_xenheap_page(v->arch.hvm_vmx.msr_bitmap);
+out:
+    return ret;
 }
 
 int vmx_read_guest_msr(u32 msr, u64 *val)
@@ -1154,6 +1216,10 @@ void vmx_destroy_vmcs(struct vcpu *v)
     free_xenheap_page(v->arch.hvm_vmx.host_msr_area);
     free_xenheap_page(v->arch.hvm_vmx.msr_area);
     free_xenheap_page(v->arch.hvm_vmx.msr_bitmap);
+    if ( v->arch.hvm_vmx.vmread_bitmap )
+        free_domheap_page(virt_to_page(v->arch.hvm_vmx.vmread_bitmap));
+    if ( v->arch.hvm_vmx.vmwrite_bitmap )
+        free_domheap_page(virt_to_page(v->arch.hvm_vmx.vmwrite_bitmap));
 }
 
 void vm_launch_fail(void)
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 93a1502..ce05744 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -996,6 +996,28 @@ static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu)
     return 0;
 }
 
+static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
+{
+    unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
+    paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+
+    __vmpclear(vvmcs_maddr);
+    vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+    v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr;
+    __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
+}
+
+static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
+{
+    unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
+    paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+
+    __vmpclear(vvmcs_maddr);
+    vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
+    v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
+    __vmwrite(VMCS_LINK_POINTER, ~0ul);
+}
+
 static void virtual_vmentry(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
@@ -1437,6 +1459,9 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
         __map_msr_bitmap(v);
     }
 
+    if ( cpu_has_vmx_vmcs_shadowing )
+        nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
+
     vmreturn(regs, VMSUCCEED);
 
 out:
@@ -1487,6 +1512,8 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs)
     
     if ( gpa == nvcpu->nv_vvmcxaddr ) 
     {
+        if ( cpu_has_vmx_vmcs_shadowing )
+            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
         clear_vvmcs_launched(&nvmx->launched_list,
             domain_page_map_to_mfn(nvcpu->nv_vvmcx));
         nvmx_purge_vvmcs(v);
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 652dc21..0d246b0 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -81,6 +81,8 @@ struct vmx_domain {
 struct arch_vmx_struct {
     /* Virtual address of VMCS. */
     struct vmcs_struct  *vmcs;
+    /* VMCS shadow machine address. */
+    paddr_t             vmcs_shadow_maddr;
 
     /* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
     spinlock_t           vmcs_lock;
@@ -125,6 +127,10 @@ struct arch_vmx_struct {
     /* Remember EFLAGS while in virtual 8086 mode */
     uint32_t             vm86_saved_eflags;
     int                  hostenv_migrated;
+
+    /* Bitmap to control vmexit policy for Non-root VMREAD/VMWRITE */
+    unsigned long       *vmread_bitmap;
+    unsigned long       *vmwrite_bitmap;
 };
 
 int vmx_create_vmcs(struct vcpu *v);
@@ -191,6 +197,7 @@ extern u32 vmx_vmentry_control;
 #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING       0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID           0x00001000
+#define SECONDARY_EXEC_ENABLE_VMCS_SHADOWING    0x00004000
 extern u32 vmx_secondary_exec_control;
 
 extern bool_t cpu_has_vmx_ins_outs_instr_info;
@@ -205,6 +212,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
 #define VMX_EPT_INVEPT_SINGLE_CONTEXT           0x02000000
 #define VMX_EPT_INVEPT_ALL_CONTEXT              0x04000000
 
+#define VMX_MISC_VMWRITE_ALL                    0x20000000
+
 #define VMX_VPID_INVVPID_INSTRUCTION                        0x100000000ULL
 #define VMX_VPID_INVVPID_INDIVIDUAL_ADDR                    0x10000000000ULL
 #define VMX_VPID_INVVPID_SINGLE_CONTEXT                     0x20000000000ULL
@@ -244,7 +253,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
     (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
 #define cpu_has_vmx_virtual_intr_delivery \
     (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
-#define cpu_has_vmx_vmcs_shadowing 0
+#define cpu_has_vmx_vmcs_shadowing \
+    (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VMCS_SHADOWING)
+
+#define VMCS_RID_TYPE_MASK              0x80000000
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define VMX_INTR_SHADOW_STI             0x00000001
@@ -305,6 +317,10 @@ enum vmcs_field {
     EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
     EOI_EXIT_BITMAP3                = 0x00002022,
     EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+    VMREAD_BITMAP                   = 0x00002026,
+    VMREAD_BITMAP_HIGH              = 0x00002027,
+    VMWRITE_BITMAP                  = 0x00002028,
+    VMWRITE_BITMAP_HIGH             = 0x00002029,
     GUEST_PHYSICAL_ADDRESS          = 0x00002400,
     GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
     VMCS_LINK_POINTER               = 0x00002800,
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.