[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [V10 PATCH 19/23] PVH xen: vmcs related changes



I did a quick edit to see what a unified function would look like
(hasn't even been compile tested).  I think it looks just fine.

 -George

    PVH xen: vmcs related changes

    This patch contains vmcs changes related for PVH, mainly creating a VMCS
    for PVH guest.

    This version modified to unify the PVH and HVM vmcs construction functions.

    Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>
    Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 36f167f..0dc1ce5 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -634,7 +634,7 @@ void vmx_vmcs_exit(struct vcpu *v)
     {
         /* Don't confuse vmx_do_resume (for @v or @current!) */
         vmx_clear_vmcs(v);
-        if ( is_hvm_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             vmx_load_vmcs(current);

         spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
@@ -856,6 +856,54 @@ static void vmx_set_common_host_vmcs_fields(struct vcpu *v)
     __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
 }

+static int pvh_check_requirements(struct vcpu *v)
+{
+    u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+
+    if ( !paging_mode_hap(v->domain) )
+    {
+        printk(XENLOG_G_INFO "HAP is required for PVH guest.\n");
+        return -EINVAL;
+    }
+    if ( !cpu_has_vmx_pat )
+    {
+        printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n");
+        return -ENOSYS;
+    }
+    if ( !cpu_has_vmx_msr_bitmap )
+    {
+        printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n");
+        return -ENOSYS;
+    }
+    if ( !cpu_has_vmx_vpid )
+    {
+        printk(XENLOG_G_INFO "PVH: CPU doesn't have VPID support\n");
+        return -ENOSYS;
+    }
+    if ( !cpu_has_vmx_secondary_exec_control )
+    {
+        printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n");
+        return -ENOSYS;
+    }
+
+    if ( v->domain->arch.vtsc )
+    {
+        printk(XENLOG_G_INFO
+               "At present PVH only supports the default timer mode\n");
+        return -ENOSYS;
+    }
+
+    required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR;
+    if ( (tmpval & required) != required )
+    {
+        printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n",
+               required);
+        return -ENOSYS;
+    }
+
+    return 0;
+}
+
 static int construct_vmcs(struct vcpu *v)
 {
     struct domain *d = v->domain;
@@ -864,6 +912,13 @@ static int construct_vmcs(struct vcpu *v)

     vmx_vmcs_enter(v);

+    if ( is_pvh_vcpu(v) )
+    {
+        int rc = pvh_check_requirements(v);
+        if ( rc )
+            return rc;
+    }
+
     /* VMCS controls. */
     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);

@@ -871,6 +926,20 @@ static int construct_vmcs(struct vcpu *v)
     if ( d->arch.vtsc )
         v->arch.hvm_vmx.exec_control |= CPU_BASED_RDTSC_EXITING;

+    if ( is_pvh_vcpu(v) )
+    {
+        v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+
+        /* ? */
+        v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+        v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING;
+
+        ASSERT(v->arch.hvm_vmx.exec_conrol &
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+            ASSERT(v->arch.hvm_vmx.exec_conrol & CPU_BASED_RDTSC_EXITING)
+            ASSERT(v->arch.hvm_vmx.exec_conrol & CPU_BASED_ACTIVATE_MSR_BITMAP)
+            }
+
+
     v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;

     /* Disable VPID for now: we decide when to enable it on VMENTER. */
@@ -900,7 +969,30 @@ static int construct_vmcs(struct vcpu *v)
     /* Do not enable Monitor Trap Flag unless start single step debug */
     v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;

+    if ( is_pvh_vcpu(v) )
+    {
+        /* FIXME: Just disable the things we need to disable */
+        v->arch.hvm_vmx.secondary_exec_control = SECONDARY_EXEC_ENABLE_EPT;
+        v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID;
+        v->arch.hvm_vmx.secondary_exec_control |=
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+    }
+
     vmx_update_cpu_exec_control(v);
+
+    if ( is_pvh_vcpu(v) )
+    {
+        /*
+         * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means
+         * upon vmentry, the cpu reads/loads VMCS.DR7 and
VMCS.DEBUGCTLS, and not
+         * use the host values. 0 would cause it to not use the VMCS values.
+         */
+        vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_EFER;
+        vmentry_ctl &= ~VM_ENTRY_SMM;
+        vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR;
+        /* PVH 32bitfixme. */
+        vmentry_ctl |= VM_ENTRY_IA32E_MODE;       /*
GUEST_EFER.LME/LMA ignored */
+    }
+
     __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
     __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);

@@ -933,6 +1025,8 @@ static int construct_vmcs(struct vcpu *v)
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP,
MSR_TYPE_R | MSR_TYPE_W);
         if ( cpu_has_vmx_pat && paging_mode_hap(d) )
             vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT,
MSR_TYPE_R | MSR_TYPE_W);
+        if ( is_pvh_domain(v) )
+            vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type);
     }

     /* I/O access bitmap. */
@@ -980,6 +1074,17 @@ static int construct_vmcs(struct vcpu *v)

     __vmwrite(GUEST_ACTIVITY_STATE, 0);

+    if ( is_pvh_domain(v) )
+    {
+        /* These are sorta irrelevant as we load the discriptors directly. */
+        __vmwrite(GUEST_CS_SELECTOR, 0);
+        __vmwrite(GUEST_DS_SELECTOR, 0);
+        __vmwrite(GUEST_SS_SELECTOR, 0);
+        __vmwrite(GUEST_ES_SELECTOR, 0);
+        __vmwrite(GUEST_FS_SELECTOR, 0);
+        __vmwrite(GUEST_GS_SELECTOR, 0);
+    }
+
     /* Guest segment bases. */
     __vmwrite(GUEST_ES_BASE, 0);
     __vmwrite(GUEST_SS_BASE, 0);
@@ -1002,7 +1107,11 @@ static int construct_vmcs(struct vcpu *v)
     __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
     __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
     __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
-    __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */
+    if ( is_pvh_domain(v) )
+        /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme. */
+        __vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+    else
+        __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */

     /* Guest IDT. */
     __vmwrite(GUEST_IDTR_BASE, 0);
@@ -1028,16 +1137,24 @@ static int construct_vmcs(struct vcpu *v)
     __vmwrite(VMCS_LINK_POINTER, ~0UL);

     v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK
-              | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
-              | (1U << TRAP_no_device);
+        | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
+        | (is_pvh_domain(d) ? (1U << TRAP_debug) | (1U << TRAP_int3) : 0
+        | (1U << TRAP_no_device);
     vmx_update_exception_bitmap(v);

-    v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
+    v->arch.hvm_vcpu.guest_cr[0] = is_pvh_domain(v) ?
+        ( X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP )
+        : ( X86_CR0_PE | X86_CR0_ET );
     hvm_update_guest_cr(v, 0);

-    v->arch.hvm_vcpu.guest_cr[4] = 0;
+    v->arch.hvm_vcpu.guest_cr[4] = is_pvh_domain(v) ?
+        real_cr4_to_pv_guest_cr4(mmu_cr4_features)
+        : 0;
     hvm_update_guest_cr(v, 4);

+    if ( is_pvh_domain(v) )
+        v->arch.hvm_vmx.vmx_realmode = 0;
+
     if ( cpu_has_vmx_tpr_shadow )
     {
         __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
@@ -1294,6 +1411,9 @@ void vmx_do_resume(struct vcpu *v)
         hvm_asid_flush_vcpu(v);
     }

+    if ( is_pvh_vcpu(v) )
+        reset_stack_and_jump(vmx_asm_do_vmentry);
+
     debug_state = v->domain->debugger_attached
                   ||
v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3]
                   ||
v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP];
@@ -1477,7 +1597,7 @@ static void vmcs_dump(unsigned char ch)

     for_each_domain ( d )
     {
-        if ( !is_hvm_domain(d) )
+        if ( is_pv_domain(d) )
             continue;
         printk("\n>>> Domain %d <<<\n", d->domain_id);
         for_each_vcpu ( d, v )


On Mon, Aug 12, 2013 at 11:17 AM, George Dunlap
<George.Dunlap@xxxxxxxxxxxxx> wrote:
> On Mon, Aug 12, 2013 at 11:15 AM, George Dunlap
> <George.Dunlap@xxxxxxxxxxxxx> wrote:
>> On Sat, Aug 10, 2013 at 1:23 AM, Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> 
>> wrote:
>>> On Fri, 9 Aug 2013 11:25:36 +0100
>>> George Dunlap <dunlapg@xxxxxxxxx> wrote:
>>>
>>>> On Wed, Jul 24, 2013 at 2:59 AM, Mukesh Rathor
>>>> <mukesh.rathor@xxxxxxxxxx> wrote:
>>>> > This patch contains vmcs changes related for PVH, mainly creating a
>>>> > VMCS for PVH guest.
>>> .....
>>>> > +     v->arch.hvm_vmx.vmx_realmode = 0;
>>>> > +
>>>> > +    ept->asr  = pagetable_get_pfn(p2m_get_pagetable(p2m));
>>>> > +    __vmwrite(EPT_POINTER, ept_get_eptp(ept));
>>>> > +
>>>> > +    rdmsrl(MSR_IA32_CR_PAT, host_pat);
>>>> > +    __vmwrite(HOST_PAT, host_pat);
>>>> > +    __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET);
>>>> > +
>>>> > +    /* The paging mode is updated for PVH by
>>>> > arch_set_info_guest(). */ +
>>>> > +    return 0;
>>>> > +}
>>>>
>>>> The majority of this function seems to be duplicating code in
>>>> construct_vmcs(), but in a different order so that it's very difficult
>>>> to tell which is which.  Wouldn't it be better to just sprinkle
>>>> if(is_pvh_domain()) around consrtuct_vmcs?
>>>
>>>
>>> Nah, just makes the function extremely messy! Other maintainers I
>>> consulted with were OK with making it a separate function. The function
>>> is mostly orderded by vmx sections in the intel SDM.
>>
>> Does it?  Messier than the domain building functions where we also do
>> a lot of if(is_pvh_domain())'s?
>>
>> From my analysis, most of the differences are because the HVM code
>> allows two ways of doing things (e.g., either HAP or shadow) while you
>> only allow one.  These include:
>>  - disabling TSC exiting (if in the correct mode, the HVM code would
>> also do this)
>>  - Disabling invlpg and cr3 load/store exiting (same for HVM in HAP mode)
>>  - Unconditionally enabling secondary controls (HVM will enable if present)
>>  - Enabling the MSR bitmap (HVM will enable if present)
>>  - Updating cpu_based_exec_control directly (HVM has a function that
>> switches between this and something required for nested virt)
>>  - Unconditionally enabling VPID (HVM will enable it somewhere else if
>> appropriate)
>>  - &c &c
>
> I should have also said, since you would be calling
> pvh_check_requirements() at the beginning of the shared function
> anyway, all of these are guaranteed to set things up as required by
> PVH.
>
>  -George

Attachment: pvh-vmcs-unified.diff
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.