[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl



David,
The patch looks functionally ok, but I still have two lingering concerns:
- the hideous casting of mfn into err
- why not signal paged out frames for V1

Rather than keep writing English, I wrote some C :)

And took the liberty to include your signed-off. David & Konrad, let me know 
what you think, and once we settle on either version we can move into unit 
testing this.

Thanks
Andres

commit 3c0c619f11a26b7bc3f12a1c477cf969c25de231
Author: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Date:   Thu Aug 30 12:23:33 2012 -0400

    xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
    
    PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
    field for reporting the error code for every frame that could not be
    mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
    
    Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top 
nibble
    in the mfn array.
    
    Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
    Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 85226cb..6562e29 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
  */
 static int gather_array(struct list_head *pagelist,
                        unsigned nelem, size_t size,
-                       void __user *data)
+                       const void __user *data)
 {
        unsigned pageidx;
        void *pagedata;
@@ -246,20 +246,54 @@ struct mmap_batch_state {
        domid_t domain;
        unsigned long va;
        struct vm_area_struct *vma;
+       /* A tristate: 
+        *      0 for no errors
+        *      1 if at least one error has happened (and no
+        *          -ENOENT errors have happened)
+        *      -ENOENT if at least 1 -ENOENT has happened.
+        */
        int err;
 
-       xen_pfn_t __user *user;
+       xen_pfn_t __user *user_mfn;
+       int __user *user_err;
 };
 
 static int mmap_batch_fn(void *data, void *state)
 {
        xen_pfn_t *mfnp = data;
        struct mmap_batch_state *st = state;
+       int ret;
+
+       ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+                                        st->vma->vm_page_prot, st->domain);
+       if (ret < 0) {
+               /*
+                * V2 provides a user-space (pre-checked for access) user_err
+                * pointer, in which we store the individual map error codes.
+                * 
+                * V1 encodes the error codes in the 32bit top nibble of the 
+                * mfn (with its known limitations vis-a-vis 64 bit callers).
+                * 
+                * In either case, global state.err is zero unless one or more
+                * individual maps fail with -ENOENT, in which case it is 
-ENOENT.
+                *
+                */
+               if (st->user_err)
+                       BUG_ON(__put_user(ret, st->user_err++));
+               else {
+                       xen_pfn_t nibble = (ret == -ENOENT) ?
+                                       PRIVCMD_MMAPBATCH_PAGED_ERROR :
+                                       PRIVCMD_MMAPBATCH_MFN_ERROR;
+                       *mfnp |= nibble;
+               }
 
-       if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-                                      st->vma->vm_page_prot, st->domain) < 0) {
-               *mfnp |= 0xf0000000U;
-               st->err++;
+               if (ret == -ENOENT)
+                       st->err = -ENOENT;
+               else {
+                       /* Record that at least one error has happened. */
+                       if (st->err == 0)
+                               st->err = 1;
+               }
        }
        st->va += PAGE_SIZE;
 
@@ -271,15 +305,18 @@ static int mmap_return_errors(void *data, void *state)
        xen_pfn_t *mfnp = data;
        struct mmap_batch_state *st = state;
 
-       return put_user(*mfnp, st->user++);
+       if (st->user_err == NULL)
+               return __put_user(*mfnp, st->user_mfn++);
+
+       return 0;
 }
 
 static struct vm_operations_struct privcmd_vm_ops;
 
-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 {
        int ret;
-       struct privcmd_mmapbatch m;
+       struct privcmd_mmapbatch_v2 m;
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long nr_pages;
@@ -289,15 +326,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
        if (!xen_initial_domain())
                return -EPERM;
 
-       if (copy_from_user(&m, udata, sizeof(m)))
-               return -EFAULT;
+       switch (version) {
+       case 1:
+               if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+                       return -EFAULT;
+               /* Returns per-frame error in m.arr. */
+               m.err = NULL;
+               if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+                       return -EFAULT;
+               break;
+       case 2:
+               if (copy_from_user(&m, udata, sizeof(struct 
privcmd_mmapbatch_v2)))
+                       return -EFAULT;
+               /* Returns per-frame error code in m.err. */
+               if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+                       return -EFAULT;
+               break;
+       default:
+               return -EINVAL;
+       }
 
        nr_pages = m.num;
        if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
                return -EINVAL;
 
-       ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
-                          m.arr);
+       ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
 
        if (ret || list_empty(&pagelist))
                goto out;
@@ -315,22 +368,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
                goto out;
        }
 
-       state.domain = m.dom;
-       state.vma = vma;
-       state.va = m.addr;
-       state.err = 0;
+       state.domain    = m.dom;
+       state.vma       = vma;
+       state.va        = m.addr;
+       state.err       = 0;
+       state.user_err  = m.err;
 
-       ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-                            &pagelist, mmap_batch_fn, &state);
+       /* mmap_batch_fn guarantees ret == 0 */
+       BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
+                            &pagelist, mmap_batch_fn, &state));
 
        up_write(&mm->mmap_sem);
 
-       if (state.err > 0) {
-               state.user = m.arr;
-               ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-                              &pagelist,
-                              mmap_return_errors, &state);
-       }
+       if (state.err) {
+               if (state.err == -ENOENT)
+                       ret = -ENOENT;
+               /* V1 still needs to write back nibbles. */
+               if (m.err == NULL)
+               {
+                       int efault;
+                       state.user_mfn = (xen_pfn_t *)m.arr;
+                       efault = traverse_pages(m.num, sizeof(xen_pfn_t),
+                                                &pagelist,
+                                                mmap_return_errors, &state);
+                       if (efault)
+                               ret = efault;
+               }
+       } else if (m.err)
+               __clear_user(m.err, m.num * sizeof(*m.err));
 
 out:
        free_page_list(&pagelist);
@@ -354,7 +419,11 @@ static long privcmd_ioctl(struct file *file,
                break;
 
        case IOCTL_PRIVCMD_MMAPBATCH:
-               ret = privcmd_ioctl_mmap_batch(udata);
+               ret = privcmd_ioctl_mmap_batch(udata, 1);
+               break;
+
+       case IOCTL_PRIVCMD_MMAPBATCH_V2:
+               ret = privcmd_ioctl_mmap_batch(udata, 2);
                break;
 
        default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 45c1aa1..a853168 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
        int num;     /* number of pages to populate */
        domid_t dom; /* target domain */
        __u64 addr;  /* virtual address */
-       xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+       xen_pfn_t __user *arr; /* array of mfns - or'd with
+                                 PRIVCMD_MMAPBATCH_*_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR     0xf0000000U
+#define PRIVCMD_MMAPBATCH_PAGED_ERROR   0x80000000U
+
+struct privcmd_mmapbatch_v2 {
+       unsigned int num; /* number of pages to populate */
+       domid_t dom;      /* target domain */
+       __u64 addr;       /* virtual address */
+       const xen_pfn_t __user *arr; /* array of mfns */
+       int __user *err;  /* array of error codes */
 };
 
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
  * Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame).  On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
  */
 #define IOCTL_PRIVCMD_HYPERCALL                                        \
        _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
        _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
 #define IOCTL_PRIVCMD_MMAPBATCH                                        \
        _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2                             \
+       _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
 
On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:

> From: David Vrabel <david.vrabel@xxxxxxxxxx>
> 
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
> drivers/xen/privcmd.c |   99 +++++++++++++++++++++++++++++++++++++++---------
> include/xen/privcmd.h |   23 +++++++++++-
> 2 files changed, 102 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..c0e89e7 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>  */
> static int gather_array(struct list_head *pagelist,
>                       unsigned nelem, size_t size,
> -                     void __user *data)
> +                     const void __user *data)
> {
>       unsigned pageidx;
>       void *pagedata;
> @@ -248,18 +248,37 @@ struct mmap_batch_state {
>       struct vm_area_struct *vma;
>       int err;
> 
> -     xen_pfn_t __user *user;
> +     xen_pfn_t __user *user_mfn;
> +     int __user *user_err;
> };
> 
> static int mmap_batch_fn(void *data, void *state)
> {
>       xen_pfn_t *mfnp = data;
>       struct mmap_batch_state *st = state;
> +     int ret;
> 
> -     if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> -                                    st->vma->vm_page_prot, st->domain) < 0) {
> -             *mfnp |= 0xf0000000U;
> -             st->err++;
> +     ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> +                                      st->vma->vm_page_prot, st->domain);
> +     if (ret < 0) {
> +             /*
> +              * Error reporting is a mess but userspace relies on
> +              * it behaving this way.
> +              *
> +              * V2 needs to a) return the result of each frame's
> +              * remap; and b) return -ENOENT if any frame failed
> +              * with -ENOENT.
> +              *
> +              * In this first pass the error code is saved by
> +              * overwriting the mfn and an error is indicated in
> +              * st->err.
> +              *
> +              * The second pass by mmap_return_errors() will write
> +              * the error codes to user space and get the right
> +              * ioctl return value.
> +              */
> +             *(int *)mfnp = ret;
> +             st->err = ret;
>       }
>       st->va += PAGE_SIZE;
> 
> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> {
>       xen_pfn_t *mfnp = data;
>       struct mmap_batch_state *st = state;
> +     int ret;
> +
> +     if (st->user_err) {
> +             int err = *(int *)mfnp;
> +
> +             if (err == -ENOENT)
> +                     st->err = err;
> 
> -     return put_user(*mfnp, st->user++);
> +             return __put_user(err, st->user_err++);
> +     } else {
> +             xen_pfn_t mfn;
> +
> +             ret = __get_user(mfn, st->user_mfn);
> +             if (ret < 0)
> +                     return ret;
> +
> +             mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> +             return __put_user(mfn, st->user_mfn++);
> +     }
> }
> 
> static struct vm_operations_struct privcmd_vm_ops;
> 
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
>       int ret;
> -     struct privcmd_mmapbatch m;
> +     struct privcmd_mmapbatch_v2 m;
>       struct mm_struct *mm = current->mm;
>       struct vm_area_struct *vma;
>       unsigned long nr_pages;
> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>       if (!xen_initial_domain())
>               return -EPERM;
> 
> -     if (copy_from_user(&m, udata, sizeof(m)))
> -             return -EFAULT;
> +     switch (version) {
> +     case 1:
> +             if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> +                     return -EFAULT;
> +             /* Returns per-frame error in m.arr. */
> +             m.err = NULL;
> +             if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> +                     return -EFAULT;
> +             break;
> +     case 2:
> +             if (copy_from_user(&m, udata, sizeof(struct 
> privcmd_mmapbatch_v2)))
> +                     return -EFAULT;
> +             /* Returns per-frame error code in m.err. */
> +             if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> +                     return -EFAULT;
> +             break;
> +     default:
> +             return -EINVAL;
> +     }
> 
>       nr_pages = m.num;
>       if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>               return -EINVAL;
> 
> -     ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> -                        m.arr);
> +     ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> 
>       if (ret || list_empty(&pagelist))
>               goto out;
> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> 
>       up_write(&mm->mmap_sem);
> 
> -     if (state.err > 0) {
> -             state.user = m.arr;
> +     if (state.err) {
> +             state.err = 0;
> +             state.user_mfn = (xen_pfn_t *)m.arr;
> +             state.user_err = m.err;
>               ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> -                            &pagelist,
> -                            mmap_return_errors, &state);
> -     }
> +                                  &pagelist,
> +                                  mmap_return_errors, &state);
> +             if (ret >= 0)
> +                     ret = state.err;
> +     } else if (m.err)
> +             __clear_user(m.err, m.num * sizeof(*m.err));
> 
> out:
>       free_page_list(&pagelist);
> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
>               break;
> 
>       case IOCTL_PRIVCMD_MMAPBATCH:
> -             ret = privcmd_ioctl_mmap_batch(udata);
> +             ret = privcmd_ioctl_mmap_batch(udata, 1);
> +             break;
> +
> +     case IOCTL_PRIVCMD_MMAPBATCH_V2:
> +             ret = privcmd_ioctl_mmap_batch(udata, 2);
>               break;
> 
>       default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..f60d75c 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
>       int num;     /* number of pages to populate */
>       domid_t dom; /* target domain */
>       __u64 addr;  /* virtual address */
> -     xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> +     xen_pfn_t __user *arr; /* array of mfns - or'd with
> +                               PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +
> +struct privcmd_mmapbatch_v2 {
> +     unsigned int num; /* number of pages to populate */
> +     domid_t dom;      /* target domain */
> +     __u64 addr;       /* virtual address */
> +     const xen_pfn_t __user *arr; /* array of mfns */
> +     int __user *err;  /* array of error codes */
> };
> 
> /*
>  * @cmd: IOCTL_PRIVCMD_HYPERCALL
>  * @arg: &privcmd_hypercall_t
>  * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame).  On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>  */
> #define IOCTL_PRIVCMD_HYPERCALL                                       \
>       _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
>       _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH                                       \
>       _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2                           \
> +     _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> 
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> -- 
> 1.7.2.5
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.