[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 4/4] xen: introduce XENMEM_exchange_and_pin and XENMEM_unpin



On Mon, 2013-09-09 at 17:06 +0100, Stefano Stabellini wrote:
> Introduce two new hypercalls XENMEM_exchange_and_pin and XENMEM_unpin.
> 
> XENMEM_exchange_and_pin, it's like XENMEM_exchange but it also pins the
> new pages: their p2m mapping are guaranteed not to be changed, until
> XENMEM_unpin is called.  XENMEM_exchange_and_pin returns the DMA frame
> numbers of the new pages to the caller, even if it's an autotranslate
> guest.
> 
> The only effect of XENMEM_unpin is to "unpin" the previously
> pinned pages. Afterwards the p2m mappings can be transparently changed by
> the hypervisor as normal. The memory remains accessible from the guest.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>

This needs input from the x86 and common code maintainers, who you've
not ccd. I've added them.

> 
> 
> Changes in v5:
> - memory_exchange: handle guest_physmap_pin_range failures;
> - make i an unsigned long in unpinned;
> - add nr_unpinned to xen_unpin to report partial success.
> 
> Changes in v4:
> - rename XENMEM_get_dma_buf to XENMEM_exchange_and_pin;
> - rename XENMEM_get_dma_buf to XENMEM_unpin;
> - move the pinning before we copy back the mfn to the guest;
> - propagate errors returned by guest_physmap_pin_range;
> - use xen_memory_exchange_t as parameter for XENMEM_exchange_and_pin;
> - use an unsigned iterator in unpin;
> - improve the documentation of the new hypercalls;
> - add a note about out.address_bits for XENMEM_exchange.
> ---
>  xen/common/memory.c         |  141 
> +++++++++++++++++++++++++++++++++----------
>  xen/include/public/memory.h |   47 ++++++++++++++
>  2 files changed, 156 insertions(+), 32 deletions(-)
> 
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 4b2f311..34169c1 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -364,14 +364,14 @@ static void decrease_reservation(struct memop_args *a)
>      a->nr_done = i;
>  }
>  
> -static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) 
> arg)
> +static long memory_exchange(int op, 
> XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
>  {
>      struct xen_memory_exchange exch;
>      PAGE_LIST_HEAD(in_chunk_list);
>      PAGE_LIST_HEAD(out_chunk_list);
>      unsigned long in_chunk_order, out_chunk_order;
>      xen_pfn_t     gpfn, gmfn, mfn;
> -    unsigned long i, j, k = 0; /* gcc ... */
> +    unsigned long i = 0, j = 0, k = 0; /* gcc ... */
>      unsigned int  memflags = 0;
>      long          rc = 0;
>      struct domain *d;
> @@ -542,54 +542,72 @@ static long 
> memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
>          /* Assign each output page to the domain. */
>          for ( j = 0; (page = page_list_remove_head(&out_chunk_list)); ++j )
>          {
> -            if ( assign_pages(d, page, exch.out.extent_order,
> -                              MEMF_no_refcount) )
> -            {
> -                unsigned long dec_count;
> -                bool_t drop_dom_ref;
> -
> -                /*
> -                 * Pages in in_chunk_list is stolen without
> -                 * decreasing the tot_pages. If the domain is dying when
> -                 * assign pages, we need decrease the count. For those pages
> -                 * that has been assigned, it should be covered by
> -                 * domain_relinquish_resources().
> -                 */
> -                dec_count = (((1UL << exch.in.extent_order) *
> -                              (1UL << in_chunk_order)) -
> -                             (j * (1UL << exch.out.extent_order)));
> -
> -                spin_lock(&d->page_alloc_lock);
> -                domain_adjust_tot_pages(d, -dec_count);
> -                drop_dom_ref = (dec_count && !d->tot_pages);
> -                spin_unlock(&d->page_alloc_lock);
> -
> -                if ( drop_dom_ref )
> -                    put_domain(d);
> -
> -                free_domheap_pages(page, exch.out.extent_order);
> -                goto dying;
> -            }
> +            unsigned long dec_count;
> +            bool_t drop_dom_ref;
>  
>              if ( __copy_from_guest_offset(&gpfn, exch.out.extent_start,
>                                            (i << out_chunk_order) + j, 1) )
>              {
>                  rc = -EFAULT;
> -                continue;
> +                goto extent_error;
>              }
>  
>              mfn = page_to_mfn(page);
>              guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
>  
> +            if ( op == XENMEM_exchange_and_pin )
> +            {
> +                if ( guest_physmap_pin_range(d, gpfn, exch.out.extent_order) 
> )
> +                {
> +                    rc = -EFAULT;
> +                    goto extent_error_physmap;
> +                }
> +            }
> +
>              if ( !paging_mode_translate(d) )
>              {
>                  for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
>                      set_gpfn_from_mfn(mfn + k, gpfn + k);
> +            }
> +
> +            rc = assign_pages(d, page, exch.out.extent_order, 
> MEMF_no_refcount);

This has moved after set_gpfn_from_mfn in the exchange case? Why?

Aren't we now pinning and setting the gpfn for pages which don't yet
belong to the guest?

I think any refactoring of error paths etc would be best done as a
separate patch to adding the new hypercall.

> +            if ( rc )
> +                goto extent_error_physmap;
> +
> +            if ( op == XENMEM_exchange_and_pin || !paging_mode_translate(d) )
> +            {
>                  if ( __copy_to_guest_offset(exch.out.extent_start,
>                                              (i << out_chunk_order) + j,
>                                              &mfn, 1) )
>                      rc = -EFAULT;

After this we continue rather than errorring?

>              }
> +
> +            continue;
> +
> +extent_error_physmap:
> +            guest_physmap_remove_page(d, gpfn, mfn, exch.out.extent_order);
> +extent_error:
> +            /*
> +             * Pages in in_chunk_list is stolen without
> +             * decreasing the tot_pages. If the domain is dying when
> +             * assign pages, we need decrease the count. For those pages
> +             * that has been assigned, it should be covered by
> +             * domain_relinquish_resources().
> +             */
> +            dec_count = (((1UL << exch.in.extent_order) *
> +                        (1UL << in_chunk_order)) -
> +                    (j * (1UL << exch.out.extent_order)));
> +
> +            spin_lock(&d->page_alloc_lock);
> +            domain_adjust_tot_pages(d, -dec_count);
> +            drop_dom_ref = (dec_count && !d->tot_pages);
> +            spin_unlock(&d->page_alloc_lock);
> +
> +            if ( drop_dom_ref )
> +                put_domain(d);
> +
> +            free_domheap_pages(page, exch.out.extent_order);
> +            goto dying;
>          }
>          BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) );
>      }
> @@ -627,6 +645,60 @@ static long 
> memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
>      return rc;
>  }
>  
> +static long unpin(XEN_GUEST_HANDLE_PARAM(xen_unpin_t) arg)
> +{
> +    int rc;
> +    unsigned long i;
> +    struct xen_unpin unpin;
> +    xen_pfn_t gpfn;
> +    struct domain *d;
> +
> +    if ( copy_from_guest(&unpin, arg, 1) )
> +        return -EFAULT;
> +
> +    /* Various sanity checks. */
> +    if ( /* Extent orders are sensible? */
> +         (unpin.in.extent_order > MAX_ORDER) ||
> +         /* Sizes of input list do not overflow a long? */
> +         ((~0UL >> unpin.in.extent_order) < unpin.in.nr_extents) )
> +        return -EFAULT;
> +
> +    if ( !guest_handle_okay(unpin.in.extent_start, unpin.in.nr_extents) )
> +        return -EFAULT;
> +
> +    d = rcu_lock_domain_by_any_id(unpin.in.domid);
> +    if ( d == NULL )
> +    {
> +        rc = -ESRCH;
> +        goto fail;
> +    }
> +
> +    for ( i = 0; i < unpin.in.nr_extents; i++ )
> +    {
> +        if ( unlikely(__copy_from_guest_offset(
> +                      &gpfn, unpin.in.extent_start, i, 1)) )
> +        {
> +            rc = -EFAULT;
> +            goto partial;
> +        }
> +
> +        rc = guest_physmap_unpin_range(d, gpfn, unpin.in.extent_order);
> +        if ( rc )
> +            goto partial;
> +    }
> +
> +    rc = 0;
> +
> +partial:
> +    unpin.nr_unpinned = i;
> +    if ( __copy_field_to_guest(arg, &unpin, nr_unpinned) )
> +        rc = -EFAULT;
> +
> + fail:
> +    rcu_unlock_domain(d);
> +    return rc;
> +}
> +
>  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>  {
>      struct domain *d;
> @@ -715,8 +787,13 @@ long do_memory_op(unsigned long cmd, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>  
>          break;
>  
> +    case XENMEM_exchange_and_pin:
>      case XENMEM_exchange:
> -        rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
> +        rc = memory_exchange(op, guest_handle_cast(arg, 
> xen_memory_exchange_t));
> +        break;
> +
> +    case XENMEM_unpin:
> +        rc = unpin(guest_handle_cast(arg, xen_unpin_t));
>          break;
>  
>      case XENMEM_maximum_ram_page:
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 7a26dee..73fdc31 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -105,6 +105,7 @@ struct xen_memory_exchange {
>      /*
>       * [IN] Details of memory extents to be exchanged (GMFN bases).
>       * Note that @in.address_bits is ignored and unused.
> +     * @out.address_bits should contain the address mask for the new pages.
>       */
>      struct xen_memory_reservation in;
>  
> @@ -459,6 +460,52 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>   * The zero value is appropiate.
>   */
>  
> +#define XENMEM_exchange_and_pin             26

All the other doc comments in this file precede their #define.

> +/*
> + * This hypercall is similar to XENMEM_exchange: it takes the same
> + * struct as an argument and it exchanges the pages passed in with a new
> + * set of pages. The new pages are going to be "pinned": it's guaranteed
> + * that their p2m mapping won't be changed until explicitly "unpinned".
> + * The content of the exchanged pages is lost.
> + * Only normal guest r/w memory can be pinned: no granted pages or
> + * ballooned pages.
> + * If return code is zero then @out.extent_list provides the DMA frame
> + * numbers of the newly-allocated memory.
> + * Returns zero on complete success, otherwise a negative error code:
> + *   -ENOSYS if not implemented
> + *   -EINVAL if the page is already pinned

Not -EBUSY?

> + *   -EFAULT if an internal error occurs

> + * On complete success then always @nr_exchanged == @in.nr_extents.  On
> + * partial success @nr_exchanged indicates how much work was done and a
> + * negative error code is returned.
> + */
> +
> +#define XENMEM_unpin             27
> +/*
> + * XENMEM_unpin unpins a set of pages, previously pinned by
> + * XENMEM_exchange_and_pin. After this call the p2m mapping of the pages can
> + * be transparently changed by the hypervisor, as usual. The pages are
> + * still accessible from the guest.
> + */
> +struct xen_unpin {
> +    /*
> +     * [IN] Details of memory extents to be unpinned (GMFN bases).
> +     * Note that @in.address_bits is ignored and unused.
> +     */
> +    struct xen_memory_reservation in;
> +    /*
> +     * [OUT] Number of input extents that were successfully unpinned.
> +     *  1. The first @nr_unpinned input extents were successfully
> +     *     unpinned.

Except if the code fails before doing any pinning (e.g. the domain
lookup fails), then it is arbitrarily whatever it was set to by the
guest or uninitialised (from the fail label in unpin()). There are error
codes (e..g EFAULT) which are common to both cases so they are
indistinguishable.

> +     *  2. All other input extents are untouched.
> +     *  3. If not all input exents are unpinned then the return code of this

"extents"

> +     *     command will be non-zero.
> +     */
> +    xen_ulong_t nr_unpinned;
> +};
> +typedef struct xen_unpin xen_unpin_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_unpin_t);
> +
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.