[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Improve performance of IOCTL_PRIVCMD_MMAPBATCH_V2



This patch makes the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version)
map multiple (typically 1024) pages at a time rather than one page at a
time, despite the pages being non-consecutive MFNs. The main change is
to pass a pointer to an array of mfns, rather than one mfn. To support
error reporting, we also pass an err_ptr. If err_ptr is NULL, it indicates
we want the contiguous pages behaviour, so the mfn value is incremented
rather than the pointer itself. Performance of mapping guest memory into
Dom0 is improved by a factor of around 6 with this change.

Signed-off-by: Mats Petersson <mats.petersson@xxxxxxxxxx>
---
 arch/x86/xen/mmu.c    |   47 ++++++++++++++++++++++++++-------
 drivers/xen/privcmd.c |   70 ++++++++++++++++++++++++++++++++++++++++++-------
 include/xen/xen-ops.h |    5 ++--
 3 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dcf5f2d..c5e23ba 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2477,7 +2477,8 @@ void __init xen_hvm_init_mmu_ops(void)
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
-       unsigned long mfn;
+       unsigned long *mfn;
+       int contiguous;
        pgprot_t prot;
        struct mmu_update *mmu_update;
 };
@@ -2486,7 +2487,13 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t 
token,
                                 unsigned long addr, void *data)
 {
        struct remap_data *rmd = data;
-       pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+       pte_t pte = pte_mkspecial(pfn_pte(*rmd->mfn, rmd->prot));
+       /* If we have a contigious range, just update the mfn itself,
+          else update pointer to be "next mfn". */
+       if (rmd->contiguous)
+               (*rmd->mfn)++;
+       else
+               rmd->mfn++;
 
        rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
        rmd->mmu_update->val = pte_val_ma(pte);
@@ -2495,16 +2502,17 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t 
token,
        return 0;
 }
 
+
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
                               unsigned long addr,
-                              unsigned long mfn, int nr,
-                              pgprot_t prot, unsigned domid)
+                              unsigned long *mfn, int nr,
+                              int *err_ptr, pgprot_t prot,
+                              unsigned domid)
 {
+       int err;
        struct remap_data rmd;
        struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-       int batch;
        unsigned long range;
-       int err = 0;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return -EINVAL;
@@ -2515,9 +2523,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct 
*vma,
 
        rmd.mfn = mfn;
        rmd.prot = prot;
+       /* We use the err_ptr to indicate if there we are doing a contigious
+        * mapping or a discontigious mapping. */
+       rmd.contiguous = !err_ptr;
 
        while (nr) {
-               batch = min(REMAP_BATCH_SIZE, nr);
+               int index = 0;
+               int done = 0;
+               int batch = min(REMAP_BATCH_SIZE, nr);
+               int batch_left = batch;
                range = (unsigned long)batch << PAGE_SHIFT;
 
                rmd.mmu_update = mmu_update;
@@ -2526,12 +2540,25 @@ int xen_remap_domain_mfn_range(struct vm_area_struct 
*vma,
                if (err)
                        goto out;
 
-               err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
-               if (err < 0)
-                       goto out;
+               /* We record the error for each page that gives an error, but
+                * continue mapping until the whole set is done */
+               do {
+                       err = HYPERVISOR_mmu_update(&mmu_update[index],
+                                                   batch_left, &done, domid);
+                       if (err < 0) {
+                               if (err_ptr)
+                                       err_ptr[index] = err;
+                               else /* exit if error and no err_ptr */
+                                       goto out;
+                       }
+                       batch_left -= done;
+                       index += done;
+               } while (batch_left);
 
                nr -= batch;
                addr += range;
+               if (err_ptr)
+                       err_ptr += batch;
        }
 
        err = 0;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 24aec2f..68bc619 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -151,6 +151,41 @@ static int traverse_pages(unsigned nelem, size_t size,
        return ret;
 }
 
+
+/*
+ * Similar to traverse_pages, but use each page as a "block" of
+ * data to be processed as one unit.
+ */
+static int traverse_pages_block(unsigned nelem, size_t size,
+                               struct list_head *pos,
+                               int (*fn)(void *data, int nr, void *state),
+                               void *state)
+{
+       void *pagedata;
+       unsigned pageidx;
+       int ret = 0;
+
+       BUG_ON(size > PAGE_SIZE);
+
+       pageidx = PAGE_SIZE;
+
+       while (nelem) {
+               int nr = (PAGE_SIZE/size);
+               struct page *page;
+               if (nr > nelem)
+                       nr = nelem;
+               pos = pos->next;
+               page = list_entry(pos, struct page, lru);
+               pagedata = page_address(page);
+               ret = (*fn)(pagedata, nr, state);
+               if (ret)
+                       break;
+               nelem -= nr;
+       }
+
+       return ret;
+}
+
 struct mmap_mfn_state {
        unsigned long va;
        struct vm_area_struct *vma;
@@ -163,6 +198,7 @@ static int mmap_mfn_range(void *data, void *state)
        struct mmap_mfn_state *st = state;
        struct vm_area_struct *vma = st->vma;
        int rc;
+       xen_pfn_t mfn;
 
        /* Do not allow range to wrap the address space. */
        if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
@@ -174,9 +210,15 @@ static int mmap_mfn_range(void *data, void *state)
            ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
                return -EINVAL;
 
+       /* We need to pass a xen_pfn_t. Check that it fits, or pass EINVAL! */
+       mfn = (xen_pfn_t)msg->mfn;
+       if (mfn != msg->mfn)
+               return -EINVAL;
+
        rc = xen_remap_domain_mfn_range(vma,
                                        msg->va & PAGE_MASK,
-                                       msg->mfn, msg->npages,
+                                       &mfn, msg->npages,
+                                       NULL,
                                        vma->vm_page_prot,
                                        st->domain);
        if (rc < 0)
@@ -250,7 +292,7 @@ struct mmap_batch_state {
         *      0 for no errors
         *      1 if at least one error has happened (and no
         *          -ENOENT errors have happened)
-        *      -ENOENT if at least 1 -ENOENT has happened.
+        *      -ENOENT if at least one -ENOENT has happened.
         */
        int global_error;
        /* An array for individual errors */
@@ -260,17 +302,21 @@ struct mmap_batch_state {
        xen_pfn_t __user *user_mfn;
 };
 
-static int mmap_batch_fn(void *data, void *state)
+static int mmap_batch_fn(void *data, int nr, void *state)
 {
        xen_pfn_t *mfnp = data;
+
        struct mmap_batch_state *st = state;
        int ret;
 
-       ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-                                        st->vma->vm_page_prot, st->domain);
+       BUG_ON(nr < 0);
 
-       /* Store error code for second pass. */
-       *(st->err++) = ret;
+       ret = xen_remap_domain_mfn_range(st->vma,
+                                        st->va & PAGE_MASK,
+                                        mfnp, nr,
+                                        st->err,
+                                        st->vma->vm_page_prot,
+                                        st->domain);
 
        /* And see if it affects the global_error. */
        if (ret < 0) {
@@ -282,7 +328,7 @@ static int mmap_batch_fn(void *data, void *state)
                                st->global_error = 1;
                }
        }
-       st->va += PAGE_SIZE;
+       st->va += PAGE_SIZE * nr;
 
        return 0;
 }
@@ -303,6 +349,8 @@ static int mmap_return_errors_v1(void *data, void *state)
        return __put_user(*mfnp, st->user_mfn++);
 }
 
+
+
 static struct vm_operations_struct privcmd_vm_ops;
 
 static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
@@ -319,6 +367,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
        if (!xen_initial_domain())
                return -EPERM;
 
+
+
        switch (version) {
        case 1:
                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
@@ -378,8 +428,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, 
int version)
        state.err           = err_array;
 
        /* mmap_batch_fn guarantees ret == 0 */
-       BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
-                            &pagelist, mmap_batch_fn, &state));
+       BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
+                                   &pagelist, mmap_batch_fn, &state));
 
        up_write(&mm->mmap_sem);
 
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..1bcda39 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -26,7 +26,8 @@ void xen_destroy_contiguous_region(unsigned long vstart, 
unsigned int order);
 struct vm_area_struct;
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
                               unsigned long addr,
-                              unsigned long mfn, int nr,
-                              pgprot_t prot, unsigned domid);
+                              unsigned long *mfn, int nr,
+                              int *err_ptr, pgprot_t prot,
+                              unsigned domid);
 
 #endif /* INCLUDE_XEN_OPS_H */
-- 
1.7.9.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.