[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/4] x86/vmx: Fix security issue when a guest balloons out the #VE info page



The logic in altp2m_vcpu_{en,dis}able_ve() and vmx_vcpu_update_vmfunc_ve() is
dangerous.  After #VE has been set up, the guest can balloon out and free the
nominated GFN, after which the processor may write to it.  Also, the unlocked
GFN query means the MFN is stale by the time it is used.  Alternatively, a
guest can race two disable calls to cause one VMCS to still reference the
nominated GFN after the tracking information was dropped.

Rework the logic from scratch to make it safe.

Hold an extra page reference on the underlying frame, to account for the
VMCS's reference.  This means that if the GFN gets ballooned out, it isn't
freed back to Xen until #VE is disabled, and the VMCS no longer refers to the
page.

A consequence of this is that arch_vcpu_unmap_resources() now needs to call
altp2m_vcpu_disable_ve() to drop the reference during domain_kill(), to allow
all of the memory to be freed.

For domains using altp2m, we expect a single enable call and no disable for
the remaining lifetime of the domain.  However, to avoid problems with
concurrent calls, use cmpxchg() to locklessly maintain safety.

This doesn't have an XSA because altp2m is not yet a security-supported
feature.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx>
CC: Tamas K Lengyel <tamas@xxxxxxxxxxxxx>
CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>
CC: Juergen Gross <jgross@xxxxxxxx>
---
 xen/arch/x86/domain.c          |  7 +++++
 xen/arch/x86/hvm/vmx/vmx.c     | 33 ++++++++++++-----------
 xen/arch/x86/mm/altp2m.c       | 59 +++++++++++++++++++++++++++++++++++-------
 xen/include/asm-x86/domain.h   |  3 +++
 xen/include/asm-x86/hvm/vcpu.h |  7 ++++-
 5 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 32dc4253..198fa14 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -38,6 +38,7 @@
 #include <xen/livepatch.h>
 #include <public/sysctl.h>
 #include <public/hvm/hvm_vcpu.h>
+#include <asm/altp2m.h>
 #include <asm/regs.h>
 #include <asm/mc146818rtc.h>
 #include <asm/system.h>
@@ -2013,6 +2014,12 @@ static int relinquish_memory(
     return ret;
 }
 
+void arch_vcpu_unmap_resources(struct vcpu *v)
+{
+    if ( altp2m_active(v->domain) )
+        altp2m_vcpu_disable_ve(v);
+}
+
 int domain_relinquish_resources(struct domain *d)
 {
     int ret;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 24def93..395bccd 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2196,14 +2196,11 @@ static void vmx_vcpu_update_vmfunc_ve(struct vcpu *v)
 
         if ( cpu_has_vmx_virt_exceptions )
         {
-            p2m_type_t t;
-            mfn_t mfn;
+            const struct page_info *pg = vcpu_altp2m(v).veinfo_pg;
 
-            mfn = get_gfn_query_unlocked(d, gfn_x(vcpu_altp2m(v).veinfo_gfn), 
&t);
-
-            if ( !mfn_eq(mfn, INVALID_MFN) )
+            if ( pg )
             {
-                __vmwrite(VIRT_EXCEPTION_INFO, mfn_x(mfn) << PAGE_SHIFT);
+                __vmwrite(VIRT_EXCEPTION_INFO, page_to_maddr(pg));
                 /*
                  * Make sure we have an up-to-date EPTP_INDEX when
                  * setting SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS.
@@ -2237,21 +2234,19 @@ static int vmx_vcpu_emulate_vmfunc(const struct 
cpu_user_regs *regs)
 
 static bool_t vmx_vcpu_emulate_ve(struct vcpu *v)
 {
-    bool_t rc = 0, writable;
-    gfn_t gfn = vcpu_altp2m(v).veinfo_gfn;
+    const struct page_info *pg = vcpu_altp2m(v).veinfo_pg;
     ve_info_t *veinfo;
+    bool rc = false;
 
-    if ( gfn_eq(gfn, INVALID_GFN) )
-        return 0;
+    if ( !pg )
+        return rc;
 
-    veinfo = hvm_map_guest_frame_rw(gfn_x(gfn), 0, &writable);
-    if ( !veinfo )
-        return 0;
-    if ( !writable || veinfo->semaphore != 0 )
-        goto out;
+    veinfo = __map_domain_page(pg);
 
-    rc = 1;
+    if ( veinfo->semaphore != 0 )
+        goto out;
 
+    rc = true;
     veinfo->exit_reason = EXIT_REASON_EPT_VIOLATION;
     veinfo->semaphore = ~0;
     veinfo->eptp_index = vcpu_altp2m(v).p2midx;
@@ -2266,7 +2261,11 @@ static bool_t vmx_vcpu_emulate_ve(struct vcpu *v)
                             X86_EVENT_NO_EC);
 
  out:
-    hvm_unmap_guest_frame(veinfo, 0);
+    unmap_domain_page(veinfo);
+
+    if ( rc )
+        paging_mark_dirty(v->domain, page_to_mfn(pg));
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/altp2m.c b/xen/arch/x86/mm/altp2m.c
index 8bdefb0..7412635 100644
--- a/xen/arch/x86/mm/altp2m.c
+++ b/xen/arch/x86/mm/altp2m.c
@@ -27,7 +27,6 @@ altp2m_vcpu_initialise(struct vcpu *v)
         vcpu_pause(v);
 
     vcpu_altp2m(v).p2midx = 0;
-    vcpu_altp2m(v).veinfo_gfn = INVALID_GFN;
     atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
 
     altp2m_vcpu_update_p2m(v);
@@ -58,25 +57,67 @@ altp2m_vcpu_destroy(struct vcpu *v)
 
 int altp2m_vcpu_enable_ve(struct vcpu *v, gfn_t gfn)
 {
+    struct domain *d = v->domain;
+    struct altp2mvcpu *a = &vcpu_altp2m(v);
     p2m_type_t p2mt;
+    mfn_t mfn;
+    struct page_info *pg;
+    int rc;
+
+    /* Early exit path if #VE is already configured. */
+    if ( a->veinfo_pg )
+        return -EEXIST;
+
+    mfn = get_gfn(d, gfn_x(gfn), &p2mt);
+
+    /*
+     * Looking for a plain piece of guest writeable RAM.  Take an extra page
+     * reference to reflect our intent to point the VMCS at it.
+     */
+    if ( mfn_eq(mfn, INVALID_MFN) || !p2m_is_ram(p2mt) ||
+         p2m_is_readonly(p2mt) || !get_page(pg = mfn_to_page(mfn), d) )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
 
-    if ( !gfn_eq(vcpu_altp2m(v).veinfo_gfn, INVALID_GFN) ||
-         mfn_eq(get_gfn_query_unlocked(v->domain, gfn_x(gfn), &p2mt),
-                INVALID_MFN) )
-        return -EINVAL;
+    /*
+     * Update veinfo_pg, making sure to be safe with concurrent hypercalls.
+     * The first caller to make veinfo_pg become non-NULL will program its MFN
+     * into the VMCS, so must not be clobbered.  Callers which lose the race
+     * back off with -EEXIST.
+     */
+    if ( cmpxchg(&a->veinfo_pg, NULL, pg) != NULL )
+    {
+        put_page(pg);
+        rc = -EEXIST;
+        goto out;
+    }
 
-    vcpu_altp2m(v).veinfo_gfn = gfn;
+    rc = 0;
     altp2m_vcpu_update_vmfunc_ve(v);
 
-    return 0;
+ out:
+    put_gfn(d, gfn_x(gfn));
+
+    return rc;
 }
 
 void altp2m_vcpu_disable_ve(struct vcpu *v)
 {
-    if ( !gfn_eq(vcpu_altp2m(v).veinfo_gfn, INVALID_GFN) )
+    struct altp2mvcpu *a = &vcpu_altp2m(v);
+    struct page_info *pg;
+
+    /*
+     * Update veinfo_pg, making sure to be safe with concurrent hypercalls.
+     * The winner of this race is responsible to update the VMCS to no longer
+     * point at the page, then drop the associated ref.
+     */
+    if ( (pg = xchg(&a->veinfo_pg, NULL)) )
     {
-        vcpu_altp2m(v).veinfo_gfn = INVALID_GFN;
         altp2m_vcpu_update_vmfunc_ve(v);
+
+        put_page(pg);
     }
 }
 
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 277f99f..5f742d1 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -653,6 +653,9 @@ bool update_secondary_system_time(struct vcpu *,
 void vcpu_show_execution_state(struct vcpu *);
 void vcpu_show_registers(const struct vcpu *);
 
+#define arch_vcpu_unmap_resources arch_vcpu_unmap_resources
+void arch_vcpu_unmap_resources(struct vcpu *v);
+
 /* Clean up CR4 bits that are not under guest control. */
 unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
 
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index c8a40f6..6c84d5a 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -137,8 +137,13 @@ struct nestedvcpu {
 #define vcpu_nestedhvm(v) ((v)->arch.hvm.nvcpu)
 
 struct altp2mvcpu {
+    /*
+     * #VE information page.  This pointer being non-NULL indicates that a
+     * VMCS's VIRT_EXCEPTION_INFO field is pointing to the page, and an extra
+     * page reference is held.
+     */
+    struct page_info *veinfo_pg;
     uint16_t    p2midx;         /* alternate p2m index */
-    gfn_t       veinfo_gfn;     /* #VE information page gfn */
 };
 
 #define vcpu_altp2m(v) ((v)->arch.hvm.avcpu)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.