|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCHv7 2/2] xen/privcmd: improve performance of MMAPBATCH_V2
On Wed, 11 Mar 2015, David Vrabel wrote:
> Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
> multiple frames at a time rather than one at a time, despite the pages
> being non-consecutive GFNs.
>
> xen_remap_foreign_mfn_array() is added which maps an array of GFNs
> (instead of a consecutive range of GFNs).
>
> Since per-frame errors are returned in an array, privcmd must set the
> MMAPBATCH_V1 error bits as part of the "report errors" phase, after
> all the frames are mapped.
>
> Migrate times are significantly improved (when using a PV toolstack
> domain). For example, for an idle 12 GiB PV guest:
>
> Before After
> real 0m38.179s 0m26.868s
> user 0m15.096s 0m13.652s
> sys 0m28.988s 0m18.732s
>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
for the common and arm bits:
Reviewed-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
> arch/arm/xen/enlighten.c | 20 ++++++--
> arch/x86/xen/mmu.c | 102 ++++++++++++++++++++++++++++++++--------
> drivers/xen/privcmd.c | 117
> ++++++++++++++++++++++++++++++++--------------
> drivers/xen/xlate_mmu.c | 50 ++++++++++++--------
> include/xen/xen-ops.h | 47 +++++++++++++++++--
> 5 files changed, 253 insertions(+), 83 deletions(-)
>
> diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
> index 5c04389..224081c 100644
> --- a/arch/arm/xen/enlighten.c
> +++ b/arch/arm/xen/enlighten.c
> @@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
>
> static __read_mostly int xen_events_irq = -1;
>
> -int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> +int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
> unsigned long addr,
> - xen_pfn_t mfn, int nr,
> - pgprot_t prot, unsigned domid,
> + xen_pfn_t *mfn, int nr,
> + int *err_ptr, pgprot_t prot,
> + unsigned domid,
> struct page **pages)
> {
> - return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
> + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
> prot, domid, pages);
> }
> +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
> +
> +/* Not used by XENFEAT_auto_translated guests. */
> +int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> + unsigned long addr,
> + xen_pfn_t mfn, int nr,
> + pgprot_t prot, unsigned domid,
> + struct page **pages)
> +{
> + return -ENOSYS;
> +}
> EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
>
> int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 3d536a5..78d352f 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
> #define REMAP_BATCH_SIZE 16
>
> struct remap_data {
> - unsigned long mfn;
> + xen_pfn_t *mfn;
> + bool contiguous;
> pgprot_t prot;
> struct mmu_update *mmu_update;
> };
> @@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep,
> pgtable_t token,
> unsigned long addr, void *data)
> {
> struct remap_data *rmd = data;
> - pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
> + pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
> +
> + /* If we have a contigious range, just update the mfn itself,
> + else update pointer to be "next mfn". */
> + if (rmd->contiguous)
> + (*rmd->mfn)++;
> + else
> + rmd->mfn++;
>
> rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
> rmd->mmu_update->val = pte_val_ma(pte);
> @@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep,
> pgtable_t token,
> return 0;
> }
>
> -int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> - unsigned long addr,
> - xen_pfn_t mfn, int nr,
> - pgprot_t prot, unsigned domid,
> - struct page **pages)
> -
> +static int do_remap_mfn(struct vm_area_struct *vma,
> + unsigned long addr,
> + xen_pfn_t *mfn, int nr,
> + int *err_ptr, pgprot_t prot,
> + unsigned domid,
> + struct page **pages)
> {
> + int err = 0;
> struct remap_data rmd;
> struct mmu_update mmu_update[REMAP_BATCH_SIZE];
> - int batch;
> unsigned long range;
> - int err = 0;
> + int mapped = 0;
>
> BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
>
> if (xen_feature(XENFEAT_auto_translated_physmap)) {
> #ifdef CONFIG_XEN_PVH
> /* We need to update the local page tables and the xen HAP */
> - return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
> - domid, pages);
> + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
> + prot, domid, pages);
> #else
> return -EINVAL;
> #endif
> @@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct
> *vma,
>
> rmd.mfn = mfn;
> rmd.prot = prot;
> + /* We use the err_ptr to indicate if there we are doing a contigious
> + * mapping or a discontigious mapping. */
> + rmd.contiguous = !err_ptr;
>
> while (nr) {
> - batch = min(REMAP_BATCH_SIZE, nr);
> + int index = 0;
> + int done = 0;
> + int batch = min(REMAP_BATCH_SIZE, nr);
> + int batch_left = batch;
> range = (unsigned long)batch << PAGE_SHIFT;
>
> rmd.mmu_update = mmu_update;
> @@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct
> *vma,
> if (err)
> goto out;
>
> - err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
> - if (err < 0)
> - goto out;
> + /* We record the error for each page that gives an error, but
> + * continue mapping until the whole set is done */
> + do {
> + int i;
> +
> + err = HYPERVISOR_mmu_update(&mmu_update[index],
> + batch_left, &done, domid);
> +
> + /*
> + * @err_ptr may be the same buffer as @mfn, so
> + * only clear it after each chunk of @mfn is
> + * used.
> + */
> + if (err_ptr) {
> + for (i = index; i < index + done; i++)
> + err_ptr[i] = 0;
> + }
> + if (err < 0) {
> + if (!err_ptr)
> + goto out;
> + err_ptr[i] = err;
> + done++; /* Skip failed frame. */
> + } else
> + mapped += done;
> + batch_left -= done;
> + index += done;
> + } while (batch_left);
>
> nr -= batch;
> addr += range;
> + if (err_ptr)
> + err_ptr += batch;
> }
> -
> - err = 0;
> out:
>
> xen_flush_tlb_all();
>
> - return err;
> + return err < 0 ? err : mapped;
> +}
> +
> +int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> + unsigned long addr,
> + xen_pfn_t mfn, int nr,
> + pgprot_t prot, unsigned domid,
> + struct page **pages)
> +{
> + return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
> }
> EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
>
> +int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
> + unsigned long addr,
> + xen_pfn_t *mfn, int nr,
> + int *err_ptr, pgprot_t prot,
> + unsigned domid, struct page **pages)
> +{
> + /* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
> + * and the consequences later is quite hard to detect what the actual
> + * cause of "wrong memory was mapped in".
> + */
> + BUG_ON(err_ptr == NULL);
> + return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
> +}
> +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
> +
> +
> /* Returns: 0 success */
> int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> int numpgs, struct page **pages)
> @@ -2526,3 +2589,4 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct
> *vma,
> #endif
> }
> EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
> +
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 59ac71c..ec64b43 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
> return ret;
> }
>
> +/*
> + * Similar to traverse_pages, but use each page as a "block" of
> + * data to be processed as one unit.
> + */
> +static int traverse_pages_block(unsigned nelem, size_t size,
> + struct list_head *pos,
> + int (*fn)(void *data, int nr, void *state),
> + void *state)
> +{
> + void *pagedata;
> + unsigned pageidx;
> + int ret = 0;
> +
> + BUG_ON(size > PAGE_SIZE);
> +
> + pageidx = PAGE_SIZE;
> +
> + while (nelem) {
> + int nr = (PAGE_SIZE/size);
> + struct page *page;
> + if (nr > nelem)
> + nr = nelem;
> + pos = pos->next;
> + page = list_entry(pos, struct page, lru);
> + pagedata = page_address(page);
> + ret = (*fn)(pagedata, nr, state);
> + if (ret)
> + break;
> + nelem -= nr;
> + }
> +
> + return ret;
> +}
> +
> struct mmap_mfn_state {
> unsigned long va;
> struct vm_area_struct *vma;
> @@ -274,39 +308,25 @@ struct mmap_batch_state {
> /* auto translated dom0 note: if domU being created is PV, then mfn is
> * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
> */
> -static int mmap_batch_fn(void *data, void *state)
> +static int mmap_batch_fn(void *data, int nr, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> struct vm_area_struct *vma = st->vma;
> struct page **pages = vma->vm_private_data;
> - struct page *cur_page = NULL;
> + struct page **cur_pages = NULL;
> int ret;
>
> if (xen_feature(XENFEAT_auto_translated_physmap))
> - cur_page = pages[st->index++];
> + cur_pages = &pages[st->index];
>
> - ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain,
> - &cur_page);
> + BUG_ON(nr < 0);
> + ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
> + (int *)mfnp, st->vma->vm_page_prot,
> + st->domain, cur_pages);
>
> - /* Store error code for second pass. */
> - if (st->version == 1) {
> - if (ret < 0) {
> - /*
> - * V1 encodes the error codes in the 32bit top nibble
> of the
> - * mfn (with its known limitations vis-a-vis 64 bit
> callers).
> - */
> - *mfnp |= (ret == -ENOENT) ?
> - PRIVCMD_MMAPBATCH_PAGED_ERROR :
> - PRIVCMD_MMAPBATCH_MFN_ERROR;
> - }
> - } else { /* st->version == 2 */
> - *((int *) mfnp) = ret;
> - }
> -
> - /* And see if it affects the global_error. */
> - if (ret < 0) {
> + /* Adjust the global_error? */
> + if (ret != nr) {
> if (ret == -ENOENT)
> st->global_error = -ENOENT;
> else {
> @@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
> st->global_error = 1;
> }
> }
> - st->va += PAGE_SIZE;
> + st->va += PAGE_SIZE * nr;
> + st->index += nr;
>
> return 0;
> }
>
> -static int mmap_return_errors(void *data, void *state)
> +static int mmap_return_error(int err, struct mmap_batch_state *st)
> {
> - struct mmap_batch_state *st = state;
> + int ret;
>
> if (st->version == 1) {
> - xen_pfn_t mfnp = *((xen_pfn_t *) data);
> - if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
> - return __put_user(mfnp, st->user_mfn++);
> - else
> + if (err) {
> + xen_pfn_t mfn;
> +
> + ret = get_user(mfn, st->user_mfn);
> + if (ret < 0)
> + return ret;
> +
> + /*
> + * V1 encodes the error codes in the 32bit top
> + * nibble of the mfn (with its known
> + * limitations vis-a-vis 64 bit callers).
> + */
> + mfn |= (err == -ENOENT) ?
> + PRIVCMD_MMAPBATCH_PAGED_ERROR :
> + PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(mfn, st->user_mfn++);
> + } else
> st->user_mfn++;
> } else { /* st->version == 2 */
> - int err = *((int *) data);
> if (err)
> return __put_user(err, st->user_err++);
> else
> @@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
> return 0;
> }
>
> +static int mmap_return_errors(void *data, int nr, void *state)
> +{
> + struct mmap_batch_state *st = state;
> + int *errs = data;
> + int i;
> + int ret;
> +
> + for (i = 0; i < nr; i++) {
> + ret = mmap_return_error(errs[i], st);
> + if (ret < 0)
> + return ret;
> + }
> + return 0;
> +}
> +
> /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
> * the vma with the page info to use later.
> * Returns: 0 if success, otherwise -errno
> @@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata,
> int version)
> state.version = version;
>
> /* mmap_batch_fn guarantees ret == 0 */
> - BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist, mmap_batch_fn, &state));
> + BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
> + &pagelist, mmap_batch_fn, &state));
>
> up_write(&mm->mmap_sem);
>
> @@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata,
> int version)
> /* Write back errors in second pass. */
> state.user_mfn = (xen_pfn_t *)m.arr;
> state.user_err = m.err;
> - ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> mmap_return_errors, &state);
> + ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
> + &pagelist, mmap_return_errors,
> &state);
> } else
> ret = 0;
>
> diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
> index 7724d90..581db0f 100644
> --- a/drivers/xen/xlate_mmu.c
> +++ b/drivers/xen/xlate_mmu.c
> @@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned
> long fgmfn,
> }
>
> struct remap_data {
> - xen_pfn_t fgmfn; /* foreign domain's gmfn */
> + xen_pfn_t *fgmfn; /* foreign domain's gmfn */
> pgprot_t prot;
> domid_t domid;
> struct vm_area_struct *vma;
> int index;
> struct page **pages;
> struct xen_remap_mfn_info *info;
> + int *err_ptr;
> + int mapped;
> };
>
> static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
> @@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token,
> unsigned long addr,
> pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
> int rc;
>
> - rc = map_foreign_page(pfn, info->fgmfn, info->domid);
> - if (rc < 0)
> - return rc;
> - set_pte_at(info->vma->vm_mm, addr, ptep, pte);
> + rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
> + *info->err_ptr++ = rc;
> + if (!rc) {
> + set_pte_at(info->vma->vm_mm, addr, ptep, pte);
> + info->mapped++;
> + }
> + info->fgmfn++;
>
> return 0;
> }
>
> -int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
> +int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
> unsigned long addr,
> - xen_pfn_t gfn, int nr,
> - pgprot_t prot, unsigned domid,
> + xen_pfn_t *mfn, int nr,
> + int *err_ptr, pgprot_t prot,
> + unsigned domid,
> struct page **pages)
> {
> int err;
> struct remap_data data;
> -
> - /* TBD: Batching, current sole caller only does page at a time */
> - if (nr > 1)
> - return -EINVAL;
> -
> - data.fgmfn = gfn;
> - data.prot = prot;
> + unsigned long range = nr << PAGE_SHIFT;
> +
> + /* Kept here for the purpose of making sure code doesn't break
> + x86 PVOPS */
> + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
> +
> + data.fgmfn = mfn;
> + data.prot = prot;
> data.domid = domid;
> - data.vma = vma;
> - data.index = 0;
> + data.vma = vma;
> data.pages = pages;
> - err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
> + data.index = 0;
> + data.err_ptr = err_ptr;
> + data.mapped = 0;
> +
> + err = apply_to_page_range(vma->vm_mm, addr, range,
> remap_pte_fn, &data);
> - return err;
> + return err < 0 ? err : data.mapped;
> }
> -EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range);
> +EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
>
> int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
> int nr, struct page **pages)
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 72fcb11..a1a1674 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -27,18 +27,55 @@ int xen_create_contiguous_region(phys_addr_t pstart,
> unsigned int order,
> void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
>
> struct vm_area_struct;
> +
> +/*
> + * xen_remap_domain_mfn_array() - map an array of foreign frames
> + * @vma: VMA to map the pages into
> + * @addr: Address at which to map the pages
> + * @gfn: Array of GFNs to map
> + * @nr: Number entries in the GFN array
> + * @err_ptr: Returns per-GFN error status.
> + * @prot: page protection mask
> + * @domid: Domain owning the pages
> + * @pages: Array of pages if this domain has an auto-translated physmap
> + *
> + * @gfn and @err_ptr may point to the same buffer, the GFNs will be
> + * overwritten by the error codes after they are mapped.
> + *
> + * Returns the number of successfully mapped frames, or a -ve error
> + * code.
> + */
> +int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
> + unsigned long addr,
> + xen_pfn_t *gfn, int nr,
> + int *err_ptr, pgprot_t prot,
> + unsigned domid,
> + struct page **pages);
> +
> +/* xen_remap_domain_mfn_range() - map a range of foreign frames
> + * @vma: VMA to map the pages into
> + * @addr: Address at which to map the pages
> + * @gfn: First GFN to map.
> + * @nr: Number frames to map
> + * @prot: page protection mask
> + * @domid: Domain owning the pages
> + * @pages: Array of pages if this domain has an auto-translated physmap
> + *
> + * Returns the number of successfully mapped frames, or a -ve error
> + * code.
> + */
> int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> unsigned long addr,
> - xen_pfn_t mfn, int nr,
> + xen_pfn_t gfn, int nr,
> pgprot_t prot, unsigned domid,
> struct page **pages);
> int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> int numpgs, struct page **pages);
> -int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
> +int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
> unsigned long addr,
> - xen_pfn_t gfn, int nr,
> - pgprot_t prot,
> - unsigned domid,
> + xen_pfn_t *gfn, int nr,
> + int *err_ptr, pgprot_t prot,
> + unsigned domid,
> struct page **pages);
> int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
> int nr, struct page **pages);
> --
> 1.7.10.4
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |