[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 2/5] EPT: split super pages upon mismatching memory types



... between constituent pages. To indicate such, the page order is
being passed down to the vMTRR routines, with a negative return value
(possible only on order-non-zero pages) indicating such collisions.

Some code redundancy reduction is being done to ept_set_entry() along
the way, allowing the new handling to be centralized to a single place
there.

In order to keep ept_set_entry() fast and simple, the actual splitting
is being deferred to the EPT_MISCONFIG VM exit handler.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Tim Deegan <tim@xxxxxxx>
---
v3: Adjust for code movement in preceding patch.

--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -222,30 +222,40 @@ void hvm_vcpu_cacheattr_destroy(struct v
 
 /*
  * Get MTRR memory type for physical address pa.
+ *
+ * May return a negative value when order > 0, indicating to the caller
+ * that the respective mapping needs splitting.
  */
-static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa)
+static int get_mtrr_type(const struct mtrr_state *m,
+                         paddr_t pa, unsigned int order)
 {
-   int32_t     addr, seg, index;
    uint8_t     overlap_mtrr = 0;
    uint8_t     overlap_mtrr_pos = 0;
-   uint64_t    phys_base;
-   uint64_t    phys_mask;
-   uint8_t     num_var_ranges = m->mtrr_cap & 0xff;
+   uint64_t    mask = -(uint64_t)PAGE_SIZE << order;
+   unsigned int seg, num_var_ranges = m->mtrr_cap & 0xff;
 
    if ( unlikely(!(m->enabled & 0x2)) )
        return MTRR_TYPE_UNCACHABLE;
 
+   pa &= mask;
    if ( (pa < 0x100000) && (m->enabled & 1) )
    {
-       /* Fixed range MTRR takes effective */
-       addr = (uint32_t) pa;
+       /* Fixed range MTRR takes effect. */
+       uint32_t addr = (uint32_t)pa, index;
+
        if ( addr < 0x80000 )
        {
+           /* 0x00000 ... 0x7FFFF in 64k steps */
+           if ( order > 4 )
+               return -1;
            seg = (addr >> 16);
            return m->fixed_ranges[seg];
        }
        else if ( addr < 0xc0000 )
        {
+           /* 0x80000 ... 0xBFFFF in 16k steps */
+           if ( order > 2 )
+               return -1;
            seg = (addr - 0x80000) >> 14;
            index = (seg >> 3) + 1;
            seg &= 7;            /* select 0-7 segments */
@@ -253,7 +263,9 @@ static uint8_t get_mtrr_type(struct mtrr
        }
        else
        {
-           /* 0xC0000 --- 0x100000 */
+           /* 0xC0000 ... 0xFFFFF in 4k steps */
+           if ( order )
+               return -1;
            seg = (addr - 0xc0000) >> 12;
            index = (seg >> 3) + 3;
            seg &= 7;            /* select 0-7 segments */
@@ -264,14 +276,15 @@ static uint8_t get_mtrr_type(struct mtrr
    /* Match with variable MTRRs. */
    for ( seg = 0; seg < num_var_ranges; seg++ )
    {
-       phys_base = ((uint64_t*)m->var_ranges)[seg*2];
-       phys_mask = ((uint64_t*)m->var_ranges)[seg*2 + 1];
+       uint64_t phys_base = m->var_ranges[seg].base;
+       uint64_t phys_mask = m->var_ranges[seg].mask;
+
        if ( phys_mask & MTRR_PHYSMASK_VALID )
        {
-           if ( ((uint64_t) pa & phys_mask) >> MTRR_PHYSMASK_SHIFT ==
-                (phys_base & phys_mask) >> MTRR_PHYSMASK_SHIFT )
+           phys_mask &= mask;
+           if ( (pa & phys_mask) == (phys_base & phys_mask) )
            {
-               if ( unlikely(m->overlapped) )
+               if ( unlikely(m->overlapped) || order )
                {
                     overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
                     overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
@@ -285,23 +298,24 @@ static uint8_t get_mtrr_type(struct mtrr
        }
    }
 
-   /* Overlapped or not found. */
+   /* Not found? */
    if ( unlikely(overlap_mtrr == 0) )
        return m->def_type;
 
-   if ( likely(!(overlap_mtrr & ~( ((uint8_t)1) << overlap_mtrr_pos ))) )
-       /* Covers both one variable memory range matches and
-        * two or more identical match.
-        */
+   /* One match, or multiple identical ones? */
+   if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) )
        return overlap_mtrr_pos;
 
+   if ( order )
+       return -1;
+
+   /* Two or more matches, one being UC? */
    if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) )
-       /* Two or more match, one is UC. */
        return MTRR_TYPE_UNCACHABLE;
 
-   if ( !(overlap_mtrr &
-          ~((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK))) )
-       /* Two or more match, WT and WB. */
+   /* Two or more matches, all of them WT and WB? */
+   if ( overlap_mtrr ==
+        ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) )
        return MTRR_TYPE_WRTHROUGH;
 
    /* Behaviour is undefined, but return the last overlapped type. */
@@ -341,7 +355,7 @@ static uint8_t effective_mm_type(struct 
      * just use it
      */ 
     if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
-        mtrr_mtype = get_mtrr_type(m, gpa);
+        mtrr_mtype = get_mtrr_type(m, gpa, 0);
     else
         mtrr_mtype = gmtrr_mtype;
 
@@ -370,7 +384,7 @@ uint32_t get_pat_flags(struct vcpu *v,
     guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, 
                                           gl1e_flags, gmtrr_mtype);
     /* 2. Get the memory type of host physical address, with MTRR */
-    shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr);
+    shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0);
 
     /* 3. Find the memory type in PAT, with host MTRR memory type
      * and guest effective memory type.
@@ -703,10 +717,10 @@ void memory_type_changed(struct domain *
         p2m_memory_type_changed(d);
 }
 
-uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
-                           uint8_t *ipat, bool_t direct_mmio)
+int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
+                       unsigned int order, uint8_t *ipat, bool_t direct_mmio)
 {
-    uint8_t gmtrr_mtype, hmtrr_mtype;
+    int gmtrr_mtype, hmtrr_mtype;
     uint32_t type;
     struct vcpu *v = current;
 
@@ -747,10 +761,12 @@ uint8_t epte_get_entry_emt(struct domain
     }
 
     gmtrr_mtype = is_hvm_domain(d) && v ?
-                  get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
+                  get_mtrr_type(&v->arch.hvm_vcpu.mtrr,
+                                gfn << PAGE_SHIFT, order) :
                   MTRR_TYPE_WRBACK;
-
-    hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT));
+    hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order);
+    if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 )
+        return -1;
 
     /* If both types match we're fine. */
     if ( likely(gmtrr_mtype == hmtrr_mtype) )
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -337,7 +337,7 @@ bool_t ept_handle_misconfig(uint64_t gpa
                     if ( !is_epte_valid(&e) || !is_epte_present(&e) )
                         continue;
                     e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
-                                               _mfn(e.mfn), &ipat,
+                                               _mfn(e.mfn), 0, &ipat,
                                                e.sa_p2mt == p2m_mmio_direct);
                     e.ipat = ipat;
                     atomic_write_ept_entry(&epte[i], e);
@@ -345,9 +345,22 @@ bool_t ept_handle_misconfig(uint64_t gpa
             }
             else
             {
-                e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
-                                           &ipat,
-                                           e.sa_p2mt == p2m_mmio_direct);
+                int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
+                                             level * EPT_TABLE_ORDER, &ipat,
+                                             e.sa_p2mt == p2m_mmio_direct);
+                if ( unlikely(emt < 0) )
+                {
+                    unmap_domain_page(epte);
+                    if ( ept_split_super_page(p2m, &e, level, level - 1) )
+                    {
+                        mfn = e.mfn;
+                        continue;
+                    }
+                    ept_free_entry(p2m, &e, level);
+                    okay = 0;
+                    break;
+                }
+                e.emt = emt;
                 e.ipat = ipat;
                 atomic_write_ept_entry(&epte[i], e);
             }
@@ -407,6 +420,7 @@ ept_set_entry(struct p2m_domain *p2m, un
     int vtd_pte_present = 0;
     int needs_sync = 1;
     ept_entry_t old_entry = { .epte = 0 };
+    ept_entry_t new_entry = { .epte = 0 };
     struct ept_data *ept = &p2m->ept;
     struct domain *d = p2m->domain;
 
@@ -456,7 +470,6 @@ ept_set_entry(struct p2m_domain *p2m, un
     if ( i == target )
     {
         /* We reached the target level. */
-        ept_entry_t new_entry = { .epte = 0 };
 
         /* No need to flush if the old entry wasn't valid */
         if ( !is_epte_present(ept_entry) )
@@ -467,35 +480,11 @@ ept_set_entry(struct p2m_domain *p2m, un
          *
          * Read-then-write is OK because we hold the p2m lock. */
         old_entry = *ept_entry;
-
-        if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
-             (p2mt == p2m_ram_paging_in) )
-        {
-            /* Construct the new entry, and then write it once */
-            new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat,
-                                                direct_mmio);
-
-            new_entry.ipat = ipat;
-            new_entry.sp = !!order;
-            new_entry.sa_p2mt = p2mt;
-            new_entry.access = p2ma;
-            new_entry.snp = (iommu_enabled && iommu_snoop);
-
-            new_entry.mfn = mfn_x(mfn);
-
-            if ( old_entry.mfn == new_entry.mfn )
-                need_modify_vtd_table = 0;
-
-            ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
-        }
-
-        atomic_write_ept_entry(ept_entry, new_entry);
     }
     else
     {
         /* We need to split the original page. */
         ept_entry_t split_ept_entry;
-        ept_entry_t new_entry = { .epte = 0 };
 
         ASSERT(is_epte_superpage(ept_entry));
 
@@ -519,8 +508,19 @@ ept_set_entry(struct p2m_domain *p2m, un
         ASSERT(i == target);
 
         ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
+    }
+
+    if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
+         (p2mt == p2m_ram_paging_in) )
+    {
+        int emt = epte_get_entry_emt(p2m->domain, gfn, mfn,
+                                     i * EPT_TABLE_ORDER, &ipat, direct_mmio);
+
+        if ( emt >= 0 )
+            new_entry.emt = emt;
+        else /* ept_handle_misconfig() will need to take care of this. */
+            new_entry.emt = MTRR_NUM_TYPES;
 
-        new_entry.emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
         new_entry.ipat = ipat;
         new_entry.sp = !!i;
         new_entry.sa_p2mt = p2mt;
@@ -535,10 +535,10 @@ ept_set_entry(struct p2m_domain *p2m, un
              need_modify_vtd_table = 0;
 
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
-
-        atomic_write_ept_entry(ept_entry, new_entry);
     }
 
+    atomic_write_ept_entry(ept_entry, new_entry);
+
     /* Track the highest gfn for which we have ever had a valid mapping */
     if ( p2mt != p2m_invalid &&
          (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -72,8 +72,9 @@ extern int mtrr_del_page(int reg, unsign
 extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
 extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
                   paddr_t spaddr, uint8_t gmtrr_mtype);
-extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn,
-                                  mfn_t mfn, uint8_t *ipat, bool_t 
direct_mmio);
+extern int epte_get_entry_emt(struct domain *, unsigned long gfn, mfn_t mfn,
+                              unsigned int order, uint8_t *ipat,
+                              bool_t direct_mmio);
 extern void ept_change_entry_emt_with_range(
     struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
 extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);


Attachment: EPT-split-on-mixed-memory-types.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.