[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl



On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
> Re-spin of alternative patch after David's feedback.
> Thanks
> Andres

applied. fixed some whitespace issues.
> 
> commit ab351a5cef1797935b083c2f6e72800a8949c515
> Author: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
> Date:   Thu Aug 30 12:23:33 2012 -0400
> 
>     xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>     
>     PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>     field for reporting the error code for every frame that could not be
>     mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>     
>     Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top 
> nibble
>     in the mfn array.
>     
>     Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
>     Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
> 
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 85226cb..5386f20 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>   */
>  static int gather_array(struct list_head *pagelist,
>                       unsigned nelem, size_t size,
> -                     void __user *data)
> +                     const void __user *data)
>  {
>       unsigned pageidx;
>       void *pagedata;
> @@ -246,61 +246,117 @@ struct mmap_batch_state {
>       domid_t domain;
>       unsigned long va;
>       struct vm_area_struct *vma;
> -     int err;
> -
> -     xen_pfn_t __user *user;
> +     /* A tristate: 
> +      *      0 for no errors
> +      *      1 if at least one error has happened (and no
> +      *          -ENOENT errors have happened)
> +      *      -ENOENT if at least 1 -ENOENT has happened.
> +      */
> +     int global_error;
> +     /* An array for individual errors */
> +     int *err;
> +
> +     /* User-space mfn array to store errors in the second pass for V1. */
> +     xen_pfn_t __user *user_mfn;
>  };
>  
>  static int mmap_batch_fn(void *data, void *state)
>  {
>       xen_pfn_t *mfnp = data;
>       struct mmap_batch_state *st = state;
> +     int ret;
>  
> -     if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> -                                    st->vma->vm_page_prot, st->domain) < 0) {
> -             *mfnp |= 0xf0000000U;
> -             st->err++;
> +     ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> +                                      st->vma->vm_page_prot, st->domain);
> +
> +     /* Store error code for second pass. */
> +     *(st->err++) = ret;
> +
> +     /* And see if it affects the global_error. */
> +     if (ret < 0) {
> +             if (ret == -ENOENT)
> +                     st->global_error = -ENOENT;
> +             else {
> +                     /* Record that at least one error has happened. */
> +                     if (st->global_error == 0)
> +                             st->global_error = 1;
> +             }
>       }
>       st->va += PAGE_SIZE;
>  
>       return 0;
>  }
>  
> -static int mmap_return_errors(void *data, void *state)
> +static int mmap_return_errors_v1(void *data, void *state)
>  {
>       xen_pfn_t *mfnp = data;
>       struct mmap_batch_state *st = state;
> -
> -     return put_user(*mfnp, st->user++);
> +     int err = *(st->err++);
> +
> +     /*
> +      * V1 encodes the error codes in the 32bit top nibble of the 
> +      * mfn (with its known limitations vis-a-vis 64 bit callers).
> +      */
> +     *mfnp |= (err == -ENOENT) ?
> +                             PRIVCMD_MMAPBATCH_PAGED_ERROR :
> +                             PRIVCMD_MMAPBATCH_MFN_ERROR;
> +     return __put_user(*mfnp, st->user_mfn++);
>  }
>  
>  static struct vm_operations_struct privcmd_vm_ops;
>  
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>  {
>       int ret;
> -     struct privcmd_mmapbatch m;
> +     struct privcmd_mmapbatch_v2 m;
>       struct mm_struct *mm = current->mm;
>       struct vm_area_struct *vma;
>       unsigned long nr_pages;
>       LIST_HEAD(pagelist);
> +     int *err_array = NULL;
>       struct mmap_batch_state state;
>  
>       if (!xen_initial_domain())
>               return -EPERM;
>  
> -     if (copy_from_user(&m, udata, sizeof(m)))
> -             return -EFAULT;
> +     switch (version) {
> +     case 1:
> +             if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> +                     return -EFAULT;
> +             /* Returns per-frame error in m.arr. */
> +             m.err = NULL;
> +             if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> +                     return -EFAULT;
> +             break;
> +     case 2:
> +             if (copy_from_user(&m, udata, sizeof(struct 
> privcmd_mmapbatch_v2)))
> +                     return -EFAULT;
> +             /* Returns per-frame error code in m.err. */
> +             if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> +                     return -EFAULT;
> +             break;
> +     default:
> +             return -EINVAL;
> +     }
>  
>       nr_pages = m.num;
>       if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>               return -EINVAL;
>  
> -     ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> -                        m.arr);
> +     ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> +
> +     if (ret)
> +             goto out;
> +     if (list_empty(&pagelist)) {
> +             ret = -EINVAL;
> +             goto out;
> +    }
>  
> -     if (ret || list_empty(&pagelist))
> +     err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
> +     if (err_array == NULL) {
> +             ret = -ENOMEM;
>               goto out;
> +     }
>  
>       down_write(&mm->mmap_sem);
>  
> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>               goto out;
>       }
>  
> -     state.domain = m.dom;
> -     state.vma = vma;
> -     state.va = m.addr;
> -     state.err = 0;
> +     state.domain        = m.dom;
> +     state.vma           = vma;
> +     state.va            = m.addr;
> +     state.global_error  = 0;
> +     state.err           = err_array;
>  
> -     ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> -                          &pagelist, mmap_batch_fn, &state);
> +     /* mmap_batch_fn guarantees ret == 0 */
> +     BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
> +                          &pagelist, mmap_batch_fn, &state));
>  
>       up_write(&mm->mmap_sem);
>  
> -     if (state.err > 0) {
> -             state.user = m.arr;
> +     if (state.global_error && (version == 1)) {
> +             /* Write back errors in second pass. */
> +             state.user_mfn = (xen_pfn_t *)m.arr;
> +             state.err      = err_array;
>               ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> -                            &pagelist,
> -                            mmap_return_errors, &state);
> -     }
> +                                      &pagelist, mmap_return_errors_v1, 
> &state);
> +     } else
> +             ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
> +
> +    /* If we have not had any EFAULT-like global errors then set the global
> +     * error to -ENOENT if necessary. */
> +    if ((ret == 0) && (state.global_error == -ENOENT))
> +        ret = -ENOENT;
>  
>  out:
> +     kfree(err_array);
>       free_page_list(&pagelist);
>  
>       return ret;
> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
>               break;
>  
>       case IOCTL_PRIVCMD_MMAPBATCH:
> -             ret = privcmd_ioctl_mmap_batch(udata);
> +             ret = privcmd_ioctl_mmap_batch(udata, 1);
> +             break;
> +
> +     case IOCTL_PRIVCMD_MMAPBATCH_V2:
> +             ret = privcmd_ioctl_mmap_batch(udata, 2);
>               break;
>  
>       default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 45c1aa1..a853168 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
>       int num;     /* number of pages to populate */
>       domid_t dom; /* target domain */
>       __u64 addr;  /* virtual address */
> -     xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> +     xen_pfn_t __user *arr; /* array of mfns - or'd with
> +                               PRIVCMD_MMAPBATCH_*_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR     0xf0000000U
> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR   0x80000000U
> +
> +struct privcmd_mmapbatch_v2 {
> +     unsigned int num; /* number of pages to populate */
> +     domid_t dom;      /* target domain */
> +     __u64 addr;       /* virtual address */
> +     const xen_pfn_t __user *arr; /* array of mfns */
> +     int __user *err;  /* array of error codes */
>  };
>  
>  /*
>   * @cmd: IOCTL_PRIVCMD_HYPERCALL
>   * @arg: &privcmd_hypercall_t
>   * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame).  On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>   */
>  #define IOCTL_PRIVCMD_HYPERCALL                                      \
>       _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
>       _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>  #define IOCTL_PRIVCMD_MMAPBATCH                                      \
>       _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2                           \
> +     _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>  
>  #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> 
> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
> 
> > From: David Vrabel <david.vrabel@xxxxxxxxxx>
> > 
> > PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> > field for reporting the error code for every frame that could not be
> > mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> > 
> > Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> > ---
> > drivers/xen/privcmd.c |   99 
> > +++++++++++++++++++++++++++++++++++++++---------
> > include/xen/privcmd.h |   23 +++++++++++-
> > 2 files changed, 102 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> > index ccee0f1..c0e89e7 100644
> > --- a/drivers/xen/privcmd.c
> > +++ b/drivers/xen/privcmd.c
> > @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> >  */
> > static int gather_array(struct list_head *pagelist,
> >                     unsigned nelem, size_t size,
> > -                   void __user *data)
> > +                   const void __user *data)
> > {
> >     unsigned pageidx;
> >     void *pagedata;
> > @@ -248,18 +248,37 @@ struct mmap_batch_state {
> >     struct vm_area_struct *vma;
> >     int err;
> > 
> > -   xen_pfn_t __user *user;
> > +   xen_pfn_t __user *user_mfn;
> > +   int __user *user_err;
> > };
> > 
> > static int mmap_batch_fn(void *data, void *state)
> > {
> >     xen_pfn_t *mfnp = data;
> >     struct mmap_batch_state *st = state;
> > +   int ret;
> > 
> > -   if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> > -                                  st->vma->vm_page_prot, st->domain) < 0) {
> > -           *mfnp |= 0xf0000000U;
> > -           st->err++;
> > +   ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> > +                                    st->vma->vm_page_prot, st->domain);
> > +   if (ret < 0) {
> > +           /*
> > +            * Error reporting is a mess but userspace relies on
> > +            * it behaving this way.
> > +            *
> > +            * V2 needs to a) return the result of each frame's
> > +            * remap; and b) return -ENOENT if any frame failed
> > +            * with -ENOENT.
> > +            *
> > +            * In this first pass the error code is saved by
> > +            * overwriting the mfn and an error is indicated in
> > +            * st->err.
> > +            *
> > +            * The second pass by mmap_return_errors() will write
> > +            * the error codes to user space and get the right
> > +            * ioctl return value.
> > +            */
> > +           *(int *)mfnp = ret;
> > +           st->err = ret;
> >     }
> >     st->va += PAGE_SIZE;
> > 
> > @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> > {
> >     xen_pfn_t *mfnp = data;
> >     struct mmap_batch_state *st = state;
> > +   int ret;
> > +
> > +   if (st->user_err) {
> > +           int err = *(int *)mfnp;
> > +
> > +           if (err == -ENOENT)
> > +                   st->err = err;
> > 
> > -   return put_user(*mfnp, st->user++);
> > +           return __put_user(err, st->user_err++);
> > +   } else {
> > +           xen_pfn_t mfn;
> > +
> > +           ret = __get_user(mfn, st->user_mfn);
> > +           if (ret < 0)
> > +                   return ret;
> > +
> > +           mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> > +           return __put_user(mfn, st->user_mfn++);
> > +   }
> > }
> > 
> > static struct vm_operations_struct privcmd_vm_ops;
> > 
> > -static long privcmd_ioctl_mmap_batch(void __user *udata)
> > +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> > {
> >     int ret;
> > -   struct privcmd_mmapbatch m;
> > +   struct privcmd_mmapbatch_v2 m;
> >     struct mm_struct *mm = current->mm;
> >     struct vm_area_struct *vma;
> >     unsigned long nr_pages;
> > @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user 
> > *udata)
> >     if (!xen_initial_domain())
> >             return -EPERM;
> > 
> > -   if (copy_from_user(&m, udata, sizeof(m)))
> > -           return -EFAULT;
> > +   switch (version) {
> > +   case 1:
> > +           if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> > +                   return -EFAULT;
> > +           /* Returns per-frame error in m.arr. */
> > +           m.err = NULL;
> > +           if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> > +                   return -EFAULT;
> > +           break;
> > +   case 2:
> > +           if (copy_from_user(&m, udata, sizeof(struct 
> > privcmd_mmapbatch_v2)))
> > +                   return -EFAULT;
> > +           /* Returns per-frame error code in m.err. */
> > +           if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> > +                   return -EFAULT;
> > +           break;
> > +   default:
> > +           return -EINVAL;
> > +   }
> > 
> >     nr_pages = m.num;
> >     if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> >             return -EINVAL;
> > 
> > -   ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> > -                      m.arr);
> > +   ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> > 
> >     if (ret || list_empty(&pagelist))
> >             goto out;
> > @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user 
> > *udata)
> > 
> >     up_write(&mm->mmap_sem);
> > 
> > -   if (state.err > 0) {
> > -           state.user = m.arr;
> > +   if (state.err) {
> > +           state.err = 0;
> > +           state.user_mfn = (xen_pfn_t *)m.arr;
> > +           state.user_err = m.err;
> >             ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> > -                          &pagelist,
> > -                          mmap_return_errors, &state);
> > -   }
> > +                                &pagelist,
> > +                                mmap_return_errors, &state);
> > +           if (ret >= 0)
> > +                   ret = state.err;
> > +   } else if (m.err)
> > +           __clear_user(m.err, m.num * sizeof(*m.err));
> > 
> > out:
> >     free_page_list(&pagelist);
> > @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> >             break;
> > 
> >     case IOCTL_PRIVCMD_MMAPBATCH:
> > -           ret = privcmd_ioctl_mmap_batch(udata);
> > +           ret = privcmd_ioctl_mmap_batch(udata, 1);
> > +           break;
> > +
> > +   case IOCTL_PRIVCMD_MMAPBATCH_V2:
> > +           ret = privcmd_ioctl_mmap_batch(udata, 2);
> >             break;
> > 
> >     default:
> > diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> > index 17857fb..f60d75c 100644
> > --- a/include/xen/privcmd.h
> > +++ b/include/xen/privcmd.h
> > @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> >     int num;     /* number of pages to populate */
> >     domid_t dom; /* target domain */
> >     __u64 addr;  /* virtual address */
> > -   xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> > +   xen_pfn_t __user *arr; /* array of mfns - or'd with
> > +                             PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> > +};
> > +
> > +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> > +
> > +struct privcmd_mmapbatch_v2 {
> > +   unsigned int num; /* number of pages to populate */
> > +   domid_t dom;      /* target domain */
> > +   __u64 addr;       /* virtual address */
> > +   const xen_pfn_t __user *arr; /* array of mfns */
> > +   int __user *err;  /* array of error codes */
> > };
> > 
> > /*
> >  * @cmd: IOCTL_PRIVCMD_HYPERCALL
> >  * @arg: &privcmd_hypercall_t
> >  * Return: Value returned from execution of the specified hypercall.
> > + *
> > + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> > + * @arg: &struct privcmd_mmapbatch_v2
> > + * Return: 0 on success (i.e., arg->err contains valid error codes for
> > + * each frame).  On an error other than a failed frame remap, -1 is
> > + * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
> > + * if the operation was otherwise successful but any frame failed with
> > + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> >  */
> > #define IOCTL_PRIVCMD_HYPERCALL                                     \
> >     _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> > @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> >     _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> > #define IOCTL_PRIVCMD_MMAPBATCH                                     \
> >     _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> > +#define IOCTL_PRIVCMD_MMAPBATCH_V2                         \
> > +   _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> > 
> > #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> > -- 
> > 1.7.2.5
> > 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.