[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/domctl: Adjust size calculations for XEN_DOMCTL_get{_ext_vcpucontext, vcpuextstate}



When teaching valgrind about XEN_DOMCTL_getvcpuextstate, I discovered that
once the hypercall has happened, there is no indication in the structure as to
whether Xen wrote to the buffer or not, as Xen unconditionally clobbers the
size field.  While this can be argued as an issue in valgrind (the post-action
handler does not have the pre-action state to hand), it is contrary to the
prevailing style in Xen of using a NULL guest handle as a request for size.

This patch explicitly adds support in Xen for a NULL guest to indicate a
request for size.  None of the in-tree callers have their behaviour changed by
this, and no toolstack should reasonably expect to have a usable guest handle
at 0.

XEN_DOMCTL_get_ext_vcpucontext suffers from the same issue but while trying to
fix that in similar way, I discovered that it had a genuine bug when returning
the count of MSRs to the toolstack.  When running the hypercall on an active
vcpu, the vcpu can arbitrarily alter the count returned to the toolstack by
clearing and setting relevant MSRs.

As this behaviour is little use in practice and, because at the time of
writing this patch the implementation of this part of the hypercall is still
in staging without any toolstack users, I am taking this opportunity to fix
the behaviour.

The behaviour is now as follows:
 * A NULL guest handle for 'msrs' is a request for msr_count.
 * A hypercall with insufficient size will fail with -ENOBUFS.  The previous
   behaviour of being able to fill a partial buffer with some of the MSRs
   serves no practical purpose given the unordered nature of the contents.
 * After writing MSRs to the buffer, Xen will update the size field with the
   number of MSRs written, which may be fewer than the maximum for empty
   MSRs.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Keir Fraser <keir@xxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
---
 xen/arch/x86/domctl.c       |   58 +++++++++++++++++++++++++++----------------
 xen/include/public/domctl.h |   16 ++++++------
 2 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index ae29a56..cdf5c2b 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -830,6 +830,8 @@ long arch_do_domctl(
 
         if ( domctl->cmd == XEN_DOMCTL_get_ext_vcpucontext )
         {
+            uint16_t nr_msrs = 0;
+
             if ( v == current ) /* no vcpu_pause() */
                 break;
 
@@ -865,41 +867,54 @@ long arch_do_domctl(
             evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
             evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
 
-            i = ret = 0;
+            /* Count maximum number of optional msrs */
             if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+                nr_msrs += 4;
+
+            if ( guest_handle_is_null(evc->msrs) ||
+                 (evc->msr_count < nr_msrs) )
+            {
+                evc->msr_count = nr_msrs;
+                ret = guest_handle_is_null(evc->msrs) ? 0 : -ENOBUFS;
+            }
+            else
             {
-                unsigned int j;
+                i = 0;
 
-                if ( v->arch.pv_vcpu.dr_mask[0] )
+                if ( boot_cpu_has(X86_FEATURE_DBEXT) )
                 {
-                    if ( i < evc->msr_count && !ret )
+                    unsigned int j;
+
+                    if ( v->arch.pv_vcpu.dr_mask[0] )
                     {
                         msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
                         msr.reserved = 0;
                         msr.value = v->arch.pv_vcpu.dr_mask[0];
                         if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
                             ret = -EFAULT;
+                        else
+                            ++i;
                     }
-                    ++i;
-                }
-                for ( j = 0; j < 3; ++j )
-                {
-                    if ( !v->arch.pv_vcpu.dr_mask[1 + j] )
-                        continue;
-                    if ( i < evc->msr_count && !ret )
+                    for ( j = 0; j < 3; ++j )
                     {
-                        msr.index = MSR_AMD64_DR1_ADDRESS_MASK + j;
-                        msr.reserved = 0;
-                        msr.value = v->arch.pv_vcpu.dr_mask[1 + j];
-                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
-                            ret = -EFAULT;
+                        if ( !v->arch.pv_vcpu.dr_mask[1 + j] )
+                            continue;
+                        if ( !ret )
+                        {
+                            msr.index = MSR_AMD64_DR1_ADDRESS_MASK + j;
+                            msr.reserved = 0;
+                            msr.value = v->arch.pv_vcpu.dr_mask[1 + j];
+                            if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                                ret = -EFAULT;
+                            else
+                                ++i;
+                        }
                     }
-                    ++i;
                 }
+                evc->msr_count = i;
+                /* Check we didn't lie to userspace then overflow their buffer 
*/
+                BUG_ON(i > nr_msrs);
             }
-            if ( i > evc->msr_count && !ret )
-                ret = -ENOBUFS;
-            evc->msr_count = i;
 
             vcpu_unpause(v);
             copyback = 1;
@@ -1177,7 +1192,8 @@ long arch_do_domctl(
         {
             unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
 
-            if ( !evc->size && !evc->xfeature_mask )
+            if ( (evc->size == 0 && evc->xfeature_mask == 0) ||
+                 guest_handle_is_null(domctl->u.vcpuextstate.buffer) )
             {
                 evc->xfeature_mask = xfeature_mask;
                 evc->size = size;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 1b75ab2..e39e436 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -594,11 +594,13 @@ struct xen_domctl_ext_vcpucontext {
     uint8_t          syscall32_disables_events;
     uint8_t          sysenter_disables_events;
     /*
-     * When, for the "get" version, msr_count is too small to cover all MSRs
-     * the hypervisor needs to be saved, the call will return -ENOBUFS and
-     * set msr_count to the required (minimum) value. Furthermore, for both
-     * "get" and "set", that field as well as the msrs one only get looked at
-     * if the size field above covers the structure up to the entire msrs one.
+     * Number of msr entries pointed to by the 'msrs' guest handle.
+     *
+     * For the "get" subop, if the guest handle is NULL, Xen shall write back
+     * the maximum number of msrs it might save.  If msr_count is fewer than
+     * the maximum, Xen shall return -ENOBUFS.  When Xen actually writes into
+     * the buffer, this field shall reflect the actual number of msrs written
+     * which might be fewer than the maximum, if some MSRs are not in use.
      */
     uint16_t         msr_count;
 #if defined(__GNUC__)
@@ -868,8 +870,8 @@ struct xen_domctl_vcpuextstate {
      */
     uint64_aligned_t         xfeature_mask;
     /*
-     * SET: Size of struct (IN)
-     * GET: Size of struct (IN/OUT)
+     * Size (in bytes) of 'buffer'. For the "get" subop, if the guest handle
+     * is NULL, Xen shall write back the maximum size required.
      */
     uint64_aligned_t         size;
     XEN_GUEST_HANDLE_64(uint64) buffer;
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.