[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings



When mapping large BARs (e.g. the frame buffer of a graphics card) the
overhead or establishing such mappings using onle 4k pages has,
particularly after the XSA-125 fix, become unacceptable. Alter the
XEN_DOMCTL_memory_mapping semantics once again, so that there's no
longer a fixed amount of guest frames that represents the upper limit
of what a single invocation can map. Instead bound execution time by
limiting the number of iterations (regardless of page size).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
RFC reasons:
- ARM side unimplemented (and hence libxc for now made cope with both
  models), the main issue (besides my inability to test any change
  there) being the many internal uses of map_mmio_regions())
- error unmapping in map_mmio_regions() and error propagation to caller
  from unmap_mmio_regions() are not satisfactory (for the latter a
  possible model might be to have the function - and hence the domctl -
  return the [non-zero] number of completed entries upon error,
  requiring the caller to re-invoke the hypercall to then obtain the
  actual error for the failed slot)
- iommu_{,un}map_page() interfaces don't support "order" (hence
  mmio_order() for now returns zero when !iommu_hap_pt_share, which in
  particular means the AMD side isn't being take care of just yet)

--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -2215,7 +2215,7 @@ int xc_domain_memory_mapping(
 {
     DECLARE_DOMCTL;
     xc_dominfo_t info;
-    int ret = 0, err;
+    int ret = 0, rc;
     unsigned long done = 0, nr, max_batch_sz;
 
     if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 ||
@@ -2240,19 +2240,24 @@ int xc_domain_memory_mapping(
         domctl.u.memory_mapping.nr_mfns = nr;
         domctl.u.memory_mapping.first_gfn = first_gfn + done;
         domctl.u.memory_mapping.first_mfn = first_mfn + done;
-        err = do_domctl(xch, &domctl);
-        if ( err && errno == E2BIG )
+        rc = do_domctl(xch, &domctl);
+        if ( rc < 0 && errno == E2BIG )
         {
             if ( max_batch_sz <= 1 )
                 break;
             max_batch_sz >>= 1;
             continue;
         }
+        if ( rc > 0 )
+        {
+            done += rc;
+            continue;
+        }
         /* Save the first error... */
         if ( !ret )
-            ret = err;
+            ret = rc;
         /* .. and ignore the rest of them when removing. */
-        if ( err && add_mapping != DPCI_REMOVE_MAPPING )
+        if ( rc && add_mapping != DPCI_REMOVE_MAPPING )
             break;
 
         done += nr;
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -436,7 +436,7 @@ static __init void pvh_add_mem_mapping(s
         else
             a = p2m_access_rwx;
 
-        if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) )
+        if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), 0, a)) )
             panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
                   gfn, mfn, i, rc);
         if ( !(i & 0xfffff) )
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2396,7 +2396,8 @@ static int vmx_alloc_vlapic_mapping(stru
     share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
     d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
     set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
-        _mfn(virt_to_mfn(apic_va)), p2m_get_hostp2m(d)->default_access);
+                       _mfn(virt_to_mfn(apic_va)), 0,
+                       p2m_get_hostp2m(d)->default_access);
 
     return 0;
 }
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -897,39 +897,47 @@ void p2m_change_type_range(struct domain
 
 /* Returns: 0 for success, -errno for failure */
 static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-                               p2m_type_t gfn_p2mt, p2m_access_t access)
+                               unsigned int order, p2m_type_t gfn_p2mt,
+                               p2m_access_t access)
 {
     int rc = 0;
     p2m_access_t a;
     p2m_type_t ot;
     mfn_t omfn;
+    unsigned int cur_order = 0;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     if ( !paging_mode_translate(d) )
         return -EIO;
 
-    gfn_lock(p2m, gfn, 0);
-    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
+    gfn_lock(p2m, gfn, order);
+    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
+    if ( cur_order < order )
+    {
+        gfn_unlock(p2m, gfn, order);
+        return cur_order + 1;
+    }
     if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
     {
-        gfn_unlock(p2m, gfn, 0);
+        gfn_unlock(p2m, gfn, order);
         domain_crash(d);
         return -ENOENT;
     }
     else if ( p2m_is_ram(ot) )
     {
+        unsigned long i;
+
         ASSERT(mfn_valid(omfn));
-        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+        for ( i = 0; i < (1UL << order); ++i )
+            set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
     }
 
     P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn));
-    rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt,
-                       access);
-    gfn_unlock(p2m, gfn, 0);
+    rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
+    gfn_unlock(p2m, gfn, order);
     if ( rc )
-        gdprintk(XENLOG_ERR,
-                 "p2m_set_entry failed! mfn=%08lx rc:%d\n",
-                 mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc);
+        gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (%#lx)\n",
+                 gfn, order, rc, mfn_x(mfn));
     return rc;
 }
 
@@ -937,14 +945,14 @@ static int set_typed_p2m_entry(struct do
 static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
                                  mfn_t mfn)
 {
-    return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign,
+    return set_typed_p2m_entry(d, gfn, mfn, 0, p2m_map_foreign,
                                p2m_get_hostp2m(d)->default_access);
 }
 
 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-                       p2m_access_t access)
+                       unsigned int order, p2m_access_t access)
 {
-    return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct, access);
+    return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access);
 }
 
 int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
@@ -989,19 +997,26 @@ int set_identity_p2m_entry(struct domain
 }
 
 /* Returns: 0 for success, -errno for failure */
-int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+                         unsigned int order)
 {
     int rc = -EINVAL;
     mfn_t actual_mfn;
     p2m_access_t a;
     p2m_type_t t;
+    unsigned int cur_order = 0;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     if ( !paging_mode_translate(d) )
         return -EIO;
 
-    gfn_lock(p2m, gfn, 0);
-    actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL, NULL);
+    gfn_lock(p2m, gfn, order);
+    actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
+    if ( cur_order < order )
+    {
+        rc = cur_order + 1;
+        goto out;
+    }
 
     /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
     if ( (INVALID_MFN == mfn_x(actual_mfn)) || (t != p2m_mmio_direct) )
@@ -1014,11 +1029,11 @@ int clear_mmio_p2m_entry(struct domain *
         gdprintk(XENLOG_WARNING,
                  "no mapping between mfn %08lx and gfn %08lx\n",
                  mfn_x(mfn), gfn);
-    rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid,
+    rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), order, p2m_invalid,
                        p2m->default_access);
 
  out:
-    gfn_unlock(p2m, gfn, 0);
+    gfn_unlock(p2m, gfn, order);
 
     return rc;
 }
@@ -2037,6 +2052,25 @@ unsigned long paging_gva_to_gfn(struct v
     return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
 }
 
+static unsigned int mmio_order(const struct domain *d,
+                               unsigned long start_fn, unsigned long nr)
+{
+    if ( !hap_enabled(d) || !need_iommu(d) || !iommu_hap_pt_share ||
+         (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
+        return 0;
+
+    if ( !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
+         opt_hap_1gb && hvm_hap_has_1gb(d) )
+        return PAGE_ORDER_1G;
+
+    if ( opt_hap_2mb && hvm_hap_has_2mb(d) )
+        return PAGE_ORDER_2M;
+
+    return 0;
+}
+
+#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
+
 int map_mmio_regions(struct domain *d,
                      unsigned long start_gfn,
                      unsigned long nr,
@@ -2044,22 +2078,45 @@ int map_mmio_regions(struct domain *d,
 {
     int ret = 0;
     unsigned long i;
+    unsigned int iter, order;
 
     if ( !paging_mode_translate(d) )
         return 0;
 
-    for ( i = 0; !ret && i < nr; i++ )
+    for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
+          i += 1UL << order, ++iter )
     {
-        ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i),
-                                 p2m_get_hostp2m(d)->default_access);
-        if ( ret )
+        for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ;
+              order = ret - 1 )
         {
-            unmap_mmio_regions(d, start_gfn, i, mfn);
+            ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), order,
+                                     p2m_get_hostp2m(d)->default_access);
+            if ( ret <= 0 )
+                break;
+        }
+        if ( ret < 0 )
+        {
+            for ( nr = i, iter = i = 0; i < nr ; i += 1UL << order, ++iter )
+            {
+                int rc;
+
+                WARN_ON(iter == MAP_MMIO_MAX_ITER);
+                for ( order = mmio_order(d, (start_gfn + i) | (mfn + i),
+                                         nr - i); ; order = rc - 1 )
+                {
+                    rc = clear_mmio_p2m_entry(d, start_gfn + i,
+                                              _mfn(mfn + i), order);
+                    if ( rc <= 0 )
+                        break;
+                }
+                if ( rc < 0 )
+                    order = 0;
+            }
             break;
         }
     }
 
-    return ret;
+    return ret < 0 ? ret : i == nr ? 0 : i;
 }
 
 int unmap_mmio_regions(struct domain *d,
@@ -2069,18 +2126,31 @@ int unmap_mmio_regions(struct domain *d,
 {
     int err = 0;
     unsigned long i;
+    unsigned int iter, order;
 
     if ( !paging_mode_translate(d) )
         return 0;
 
-    for ( i = 0; i < nr; i++ )
+    for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
+          i += 1UL << order, ++iter )
     {
-        int ret = clear_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i));
-        if ( ret )
+        int ret;
+
+        for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ;
+              order = ret - 1 )
+        {
+            ret = clear_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), order);
+            if ( ret <= 0 )
+                break;
+        }
+        if ( ret < 0 )
+        {
             err = ret;
+            order = 0;
+        }
     }
 
-    return err;
+    return err ?: i == nr ? 0 : i;
 }
 
 unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -1042,10 +1042,12 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
              (gfn + nr_mfns - 1) < gfn ) /* wrap? */
             break;
 
+#ifndef CONFIG_X86 /* XXX ARM!? */
         ret = -E2BIG;
         /* Must break hypercall up as this could take a while. */
         if ( nr_mfns > 64 )
             break;
+#endif
 
         ret = -EPERM;
         if ( !iomem_access_permitted(current->domain, mfn, mfn_end) ||
@@ -1063,7 +1065,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
                    d->domain_id, gfn, mfn, nr_mfns);
 
             ret = map_mmio_regions(d, gfn, nr_mfns, mfn);
-            if ( ret )
+            if ( ret < 0 )
                 printk(XENLOG_G_WARNING
                        "memory_map:fail: dom%d gfn=%lx mfn=%lx nr=%lx 
ret:%ld\n",
                        d->domain_id, gfn, mfn, nr_mfns, ret);
@@ -1075,7 +1077,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
                    d->domain_id, gfn, mfn, nr_mfns);
 
             ret = unmap_mmio_regions(d, gfn, nr_mfns, mfn);
-            if ( ret && is_hardware_domain(current->domain) )
+            if ( ret < 0 && is_hardware_domain(current->domain) )
                 printk(XENLOG_ERR
                        "memory_map: error %ld removing dom%d access to 
[%lx,%lx]\n",
                        ret, d->domain_id, mfn, mfn_end);
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -209,7 +209,7 @@ int guest_remove_page(struct domain *d, 
     }
     if ( p2mt == p2m_mmio_direct )
     {
-        clear_mmio_p2m_entry(d, gmfn, _mfn(mfn));
+        clear_mmio_p2m_entry(d, gmfn, _mfn(mfn), 0);
         put_gfn(d, gmfn);
         return 1;
     }
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -557,8 +557,9 @@ int p2m_is_logdirty_range(struct p2m_dom
 
 /* Set mmio addresses in the p2m table (for pass-through) */
 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-                       p2m_access_t access);
-int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
+                       unsigned int order, p2m_access_t access);
+int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+                         unsigned int order);
 
 /* Set identity addresses in the p2m table (for pass-through) */
 int set_identity_p2m_entry(struct domain *d, unsigned long gfn,


Attachment: x86-p2m-mmio-large.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.