[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCHv7 2/2] xen/privcmd: improve performance of MMAPBATCH_V2



Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
multiple frames at a time rather than one at a time, despite the pages
being non-consecutive GFNs.

xen_remap_foreign_mfn_array() is added which maps an array of GFNs
(instead of a consecutive range of GFNs).

Since per-frame errors are returned in an array, privcmd must set the
MMAPBATCH_V1 error bits as part of the "report errors" phase, after
all the frames are mapped.

Migrate times are significantly improved (when using a PV toolstack
domain).  For example, for an idle 12 GiB PV guest:

        Before     After
  real  0m38.179s  0m26.868s
  user  0m15.096s  0m13.652s
  sys   0m28.988s  0m18.732s

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 arch/arm/xen/enlighten.c |   20 ++++++--
 arch/x86/xen/mmu.c       |  102 ++++++++++++++++++++++++++++++++--------
 drivers/xen/privcmd.c    |  117 ++++++++++++++++++++++++++++++++--------------
 drivers/xen/xlate_mmu.c  |   50 ++++++++++++--------
 include/xen/xen-ops.h    |   47 +++++++++++++++++--
 5 files changed, 253 insertions(+), 83 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 5c04389..224081c 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 
 static __read_mostly int xen_events_irq = -1;
 
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
                               unsigned long addr,
-                              xen_pfn_t mfn, int nr,
-                              pgprot_t prot, unsigned domid,
+                              xen_pfn_t *mfn, int nr,
+                              int *err_ptr, pgprot_t prot,
+                              unsigned domid,
                               struct page **pages)
 {
-       return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
+       return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
                                         prot, domid, pages);
 }
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+
+/* Not used by XENFEAT_auto_translated guests. */
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+                              unsigned long addr,
+                              xen_pfn_t mfn, int nr,
+                              pgprot_t prot, unsigned domid,
+                              struct page **pages)
+{
+       return -ENOSYS;
+}
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
 
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3d536a5..78d352f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
-       unsigned long mfn;
+       xen_pfn_t *mfn;
+       bool contiguous;
        pgprot_t prot;
        struct mmu_update *mmu_update;
 };
@@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t 
token,
                                 unsigned long addr, void *data)
 {
        struct remap_data *rmd = data;
-       pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
+       pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
+
+       /* If we have a contigious range, just update the mfn itself,
+          else update pointer to be "next mfn". */
+       if (rmd->contiguous)
+               (*rmd->mfn)++;
+       else
+               rmd->mfn++;
 
        rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
        rmd->mmu_update->val = pte_val_ma(pte);
@@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t 
token,
        return 0;
 }
 
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-                              unsigned long addr,
-                              xen_pfn_t mfn, int nr,
-                              pgprot_t prot, unsigned domid,
-                              struct page **pages)
-
+static int do_remap_mfn(struct vm_area_struct *vma,
+                       unsigned long addr,
+                       xen_pfn_t *mfn, int nr,
+                       int *err_ptr, pgprot_t prot,
+                       unsigned domid,
+                       struct page **pages)
 {
+       int err = 0;
        struct remap_data rmd;
        struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-       int batch;
        unsigned long range;
-       int err = 0;
+       int mapped = 0;
 
        BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
 
        if (xen_feature(XENFEAT_auto_translated_physmap)) {
 #ifdef CONFIG_XEN_PVH
                /* We need to update the local page tables and the xen HAP */
-               return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
-                                                domid, pages);
+               return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+                                                prot, domid, pages);
 #else
                return -EINVAL;
 #endif
@@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct 
*vma,
 
        rmd.mfn = mfn;
        rmd.prot = prot;
+       /* We use the err_ptr to indicate if there we are doing a contigious
+        * mapping or a discontigious mapping. */
+       rmd.contiguous = !err_ptr;
 
        while (nr) {
-               batch = min(REMAP_BATCH_SIZE, nr);
+               int index = 0;
+               int done = 0;
+               int batch = min(REMAP_BATCH_SIZE, nr);
+               int batch_left = batch;
                range = (unsigned long)batch << PAGE_SHIFT;
 
                rmd.mmu_update = mmu_update;
@@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct 
*vma,
                if (err)
                        goto out;
 
-               err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
-               if (err < 0)
-                       goto out;
+               /* We record the error for each page that gives an error, but
+                * continue mapping until the whole set is done */
+               do {
+                       int i;
+
+                       err = HYPERVISOR_mmu_update(&mmu_update[index],
+                                                   batch_left, &done, domid);
+
+                       /*
+                        * @err_ptr may be the same buffer as @mfn, so
+                        * only clear it after each chunk of @mfn is
+                        * used.
+                        */
+                       if (err_ptr) {
+                               for (i = index; i < index + done; i++)
+                                       err_ptr[i] = 0;
+                       }
+                       if (err < 0) {
+                               if (!err_ptr)
+                                       goto out;
+                               err_ptr[i] = err;
+                               done++; /* Skip failed frame. */
+                       } else
+                               mapped += done;
+                       batch_left -= done;
+                       index += done;
+               } while (batch_left);
 
                nr -= batch;
                addr += range;
+               if (err_ptr)
+                       err_ptr += batch;
        }
-
-       err = 0;
 out:
 
        xen_flush_tlb_all();
 
-       return err;
+       return err < 0 ? err : mapped;
+}
+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+                              unsigned long addr,
+                              xen_pfn_t mfn, int nr,
+                              pgprot_t prot, unsigned domid,
+                              struct page **pages)
+{
+       return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
 
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+                              unsigned long addr,
+                              xen_pfn_t *mfn, int nr,
+                              int *err_ptr, pgprot_t prot,
+                              unsigned domid, struct page **pages)
+{
+       /* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+        * and the consequences later is quite hard to detect what the actual
+        * cause of "wrong memory was mapped in".
+        */
+       BUG_ON(err_ptr == NULL);
+       return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
+}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+
+
 /* Returns: 0 success */
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
                               int numpgs, struct page **pages)
@@ -2526,3 +2589,4 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 #endif
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
+
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 59ac71c..ec64b43 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
        return ret;
 }
 
+/*
+ * Similar to traverse_pages, but use each page as a "block" of
+ * data to be processed as one unit.
+ */
+static int traverse_pages_block(unsigned nelem, size_t size,
+                               struct list_head *pos,
+                               int (*fn)(void *data, int nr, void *state),
+                               void *state)
+{
+       void *pagedata;
+       unsigned pageidx;
+       int ret = 0;
+
+       BUG_ON(size > PAGE_SIZE);
+
+       pageidx = PAGE_SIZE;
+
+       while (nelem) {
+               int nr = (PAGE_SIZE/size);
+               struct page *page;
+               if (nr > nelem)
+                       nr = nelem;
+               pos = pos->next;
+               page = list_entry(pos, struct page, lru);
+               pagedata = page_address(page);
+               ret = (*fn)(pagedata, nr, state);
+               if (ret)
+                       break;
+               nelem -= nr;
+       }
+
+       return ret;
+}
+
 struct mmap_mfn_state {
        unsigned long va;
        struct vm_area_struct *vma;
@@ -274,39 +308,25 @@ struct mmap_batch_state {
 /* auto translated dom0 note: if domU being created is PV, then mfn is
  * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
  */
-static int mmap_batch_fn(void *data, void *state)
+static int mmap_batch_fn(void *data, int nr, void *state)
 {
        xen_pfn_t *mfnp = data;
        struct mmap_batch_state *st = state;
        struct vm_area_struct *vma = st->vma;
        struct page **pages = vma->vm_private_data;
-       struct page *cur_page = NULL;
+       struct page **cur_pages = NULL;
        int ret;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
-               cur_page = pages[st->index++];
+               cur_pages = &pages[st->index];
 
-       ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-                                        st->vma->vm_page_prot, st->domain,
-                                        &cur_page);
+       BUG_ON(nr < 0);
+       ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
+                                        (int *)mfnp, st->vma->vm_page_prot, 
+                                        st->domain, cur_pages);
 
-       /* Store error code for second pass. */
-       if (st->version == 1) {
-               if (ret < 0) {
-                       /*
-                        * V1 encodes the error codes in the 32bit top nibble 
of the
-                        * mfn (with its known limitations vis-a-vis 64 bit 
callers).
-                        */
-                       *mfnp |= (ret == -ENOENT) ?
-                                               PRIVCMD_MMAPBATCH_PAGED_ERROR :
-                                               PRIVCMD_MMAPBATCH_MFN_ERROR;
-               }
-       } else { /* st->version == 2 */
-               *((int *) mfnp) = ret;
-       }
-
-       /* And see if it affects the global_error. */
-       if (ret < 0) {
+       /* Adjust the global_error? */
+       if (ret != nr) {
                if (ret == -ENOENT)
                        st->global_error = -ENOENT;
                else {
@@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
                                st->global_error = 1;
                }
        }
-       st->va += PAGE_SIZE;
+       st->va += PAGE_SIZE * nr;
+       st->index += nr;
 
        return 0;
 }
 
-static int mmap_return_errors(void *data, void *state)
+static int mmap_return_error(int err, struct mmap_batch_state *st)
 {
-       struct mmap_batch_state *st = state;
+       int ret;
 
        if (st->version == 1) {
-               xen_pfn_t mfnp = *((xen_pfn_t *) data);
-               if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
-                       return __put_user(mfnp, st->user_mfn++);
-               else
+               if (err) {
+                       xen_pfn_t mfn;
+
+                       ret = get_user(mfn, st->user_mfn);
+                       if (ret < 0)
+                               return ret;
+                       /*
+                        * V1 encodes the error codes in the 32bit top
+                        * nibble of the mfn (with its known
+                        * limitations vis-a-vis 64 bit callers).
+                        */
+                       mfn |= (err == -ENOENT) ?
+                               PRIVCMD_MMAPBATCH_PAGED_ERROR :
+                               PRIVCMD_MMAPBATCH_MFN_ERROR;
+                       return __put_user(mfn, st->user_mfn++);
+               } else
                        st->user_mfn++;
        } else { /* st->version == 2 */
-               int err = *((int *) data);
                if (err)
                        return __put_user(err, st->user_err++);
                else
@@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
        return 0;
 }
 
+static int mmap_return_errors(void *data, int nr, void *state)
+{
+       struct mmap_batch_state *st = state;
+       int *errs = data;
+       int i;
+       int ret;
+
+       for (i = 0; i < nr; i++) {
+               ret = mmap_return_error(errs[i], st);
+               if (ret < 0)
+                       return ret;
+       }
+       return 0;
+}
+
 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
  * the vma with the page info to use later.
  * Returns: 0 if success, otherwise -errno
@@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
        state.version       = version;
 
        /* mmap_batch_fn guarantees ret == 0 */
-       BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
-                            &pagelist, mmap_batch_fn, &state));
+       BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
+                                   &pagelist, mmap_batch_fn, &state));
 
        up_write(&mm->mmap_sem);
 
@@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
                /* Write back errors in second pass. */
                state.user_mfn = (xen_pfn_t *)m.arr;
                state.user_err = m.err;
-               ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-                                                        &pagelist, 
mmap_return_errors, &state);
+               ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
+                                          &pagelist, mmap_return_errors, 
&state);
        } else
                ret = 0;
 
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 7724d90..581db0f 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned 
long fgmfn,
 }
 
 struct remap_data {
-       xen_pfn_t fgmfn; /* foreign domain's gmfn */
+       xen_pfn_t *fgmfn; /* foreign domain's gmfn */
        pgprot_t prot;
        domid_t  domid;
        struct vm_area_struct *vma;
        int index;
        struct page **pages;
        struct xen_remap_mfn_info *info;
+       int *err_ptr;
+       int mapped;
 };
 
 static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
@@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, 
unsigned long addr,
        pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
        int rc;
 
-       rc = map_foreign_page(pfn, info->fgmfn, info->domid);
-       if (rc < 0)
-               return rc;
-       set_pte_at(info->vma->vm_mm, addr, ptep, pte);
+       rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
+       *info->err_ptr++ = rc;
+       if (!rc) {
+               set_pte_at(info->vma->vm_mm, addr, ptep, pte);
+               info->mapped++;
+       }
+       info->fgmfn++;
 
        return 0;
 }
 
-int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
                              unsigned long addr,
-                             xen_pfn_t gfn, int nr,
-                             pgprot_t prot, unsigned domid,
+                             xen_pfn_t *mfn, int nr,
+                             int *err_ptr, pgprot_t prot,
+                             unsigned domid,
                              struct page **pages)
 {
        int err;
        struct remap_data data;
-
-       /* TBD: Batching, current sole caller only does page at a time */
-       if (nr > 1)
-               return -EINVAL;
-
-       data.fgmfn = gfn;
-       data.prot = prot;
+       unsigned long range = nr << PAGE_SHIFT;
+ 
+       /* Kept here for the purpose of making sure code doesn't break
+          x86 PVOPS */
+       BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+ 
+       data.fgmfn = mfn;
+       data.prot  = prot;
        data.domid = domid;
-       data.vma = vma;
-       data.index = 0;
+       data.vma   = vma;
        data.pages = pages;
-       err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+       data.index = 0;
+       data.err_ptr = err_ptr;
+       data.mapped = 0;
+
+       err = apply_to_page_range(vma->vm_mm, addr, range,
                                  remap_pte_fn, &data);
-       return err;
+       return err < 0 ? err : data.mapped;
 }
-EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range);
+EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
 
 int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
                              int nr, struct page **pages)
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 72fcb11..a1a1674 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -27,18 +27,55 @@ int xen_create_contiguous_region(phys_addr_t pstart, 
unsigned int order,
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
 
 struct vm_area_struct;
+
+/*
+ * xen_remap_domain_mfn_array() - map an array of foreign frames
+ * @vma:     VMA to map the pages into
+ * @addr:    Address at which to map the pages
+ * @gfn:     Array of GFNs to map
+ * @nr:      Number entries in the GFN array
+ * @err_ptr: Returns per-GFN error status.
+ * @prot:    page protection mask
+ * @domid:   Domain owning the pages
+ * @pages:   Array of pages if this domain has an auto-translated physmap
+ *
+ * @gfn and @err_ptr may point to the same buffer, the GFNs will be
+ * overwritten by the error codes after they are mapped.
+ *
+ * Returns the number of successfully mapped frames, or a -ve error
+ * code.
+ */
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+                              unsigned long addr,
+                              xen_pfn_t *gfn, int nr,
+                              int *err_ptr, pgprot_t prot,
+                              unsigned domid, 
+                              struct page **pages);
+
+/* xen_remap_domain_mfn_range() - map a range of foreign frames
+ * @vma:     VMA to map the pages into
+ * @addr:    Address at which to map the pages
+ * @gfn:     First GFN to map.
+ * @nr:      Number frames to map
+ * @prot:    page protection mask
+ * @domid:   Domain owning the pages
+ * @pages:   Array of pages if this domain has an auto-translated physmap
+ *
+ * Returns the number of successfully mapped frames, or a -ve error
+ * code.
+ */
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
                               unsigned long addr,
-                              xen_pfn_t mfn, int nr,
+                              xen_pfn_t gfn, int nr,
                               pgprot_t prot, unsigned domid,
                               struct page **pages);
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
                               int numpgs, struct page **pages);
-int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
                              unsigned long addr,
-                             xen_pfn_t gfn, int nr,
-                             pgprot_t prot,
-                             unsigned domid, 
+                             xen_pfn_t *gfn, int nr,
+                             int *err_ptr, pgprot_t prot,
+                             unsigned domid,
                              struct page **pages);
 int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
                              int nr, struct page **pages);
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.