[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
David, The patch looks functionally ok, but I still have two lingering concerns: - the hideous casting of mfn into err - why not signal paged out frames for V1 Rather than keep writing English, I wrote some C :) And took the liberty to include your signed-off. David & Konrad, let me know what you think, and once we settle on either version we can move into unit testing this. Thanks Andres commit 3c0c619f11a26b7bc3f12a1c477cf969c25de231 Author: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> Date: Thu Aug 30 12:23:33 2012 -0400 xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional field for reporting the error code for every frame that could not be mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble in the mfn array. Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 85226cb..6562e29 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) */ static int gather_array(struct list_head *pagelist, unsigned nelem, size_t size, - void __user *data) + const void __user *data) { unsigned pageidx; void *pagedata; @@ -246,20 +246,54 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; + /* A tristate: + * 0 for no errors + * 1 if at least one error has happened (and no + * -ENOENT errors have happened) + * -ENOENT if at least 1 -ENOENT has happened. + */ int err; - xen_pfn_t __user *user; + xen_pfn_t __user *user_mfn; + int __user *user_err; }; static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + int ret; + + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain); + if (ret < 0) { + /* + * V2 provides a user-space (pre-checked for access) user_err + * pointer, in which we store the individual map error codes. + * + * V1 encodes the error codes in the 32bit top nibble of the + * mfn (with its known limitations vis-a-vis 64 bit callers). + * + * In either case, global state.err is zero unless one or more + * individual maps fail with -ENOENT, in which case it is -ENOENT. + * + */ + if (st->user_err) + BUG_ON(__put_user(ret, st->user_err++)); + else { + xen_pfn_t nibble = (ret == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + *mfnp |= nibble; + } - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { - *mfnp |= 0xf0000000U; - st->err++; + if (ret == -ENOENT) + st->err = -ENOENT; + else { + /* Record that at least one error has happened. */ + if (st->err == 0) + st->err = 1; + } } st->va += PAGE_SIZE; @@ -271,15 +305,18 @@ static int mmap_return_errors(void *data, void *state) xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - return put_user(*mfnp, st->user++); + if (st->user_err == NULL) + return __put_user(*mfnp, st->user_mfn++); + + return 0; } static struct vm_operations_struct privcmd_vm_ops; -static long privcmd_ioctl_mmap_batch(void __user *udata) +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { int ret; - struct privcmd_mmapbatch m; + struct privcmd_mmapbatch_v2 m; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long nr_pages; @@ -289,15 +326,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) if (!xen_initial_domain()) return -EPERM; - if (copy_from_user(&m, udata, sizeof(m))) - return -EFAULT; + switch (version) { + case 1: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) + return -EFAULT; + /* Returns per-frame error in m.arr. */ + m.err = NULL; + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) + return -EFAULT; + break; + case 2: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) + return -EFAULT; + /* Returns per-frame error code in m.err. */ + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) + return -EFAULT; + break; + default: + return -EINVAL; + } nr_pages = m.num; if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), - m.arr); + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); if (ret || list_empty(&pagelist)) goto out; @@ -315,22 +368,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) goto out; } - state.domain = m.dom; - state.vma = vma; - state.va = m.addr; - state.err = 0; + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.err = 0; + state.user_err = m.err; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state); + /* mmap_batch_fn guarantees ret == 0 */ + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); - if (state.err > 0) { - state.user = m.arr; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, - mmap_return_errors, &state); - } + if (state.err) { + if (state.err == -ENOENT) + ret = -ENOENT; + /* V1 still needs to write back nibbles. */ + if (m.err == NULL) + { + int efault; + state.user_mfn = (xen_pfn_t *)m.arr; + efault = traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, + mmap_return_errors, &state); + if (efault) + ret = efault; + } + } else if (m.err) + __clear_user(m.err, m.num * sizeof(*m.err)); out: free_page_list(&pagelist); @@ -354,7 +419,11 @@ static long privcmd_ioctl(struct file *file, break; case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata); + ret = privcmd_ioctl_mmap_batch(udata, 1); + break; + + case IOCTL_PRIVCMD_MMAPBATCH_V2: + ret = privcmd_ioctl_mmap_batch(udata, 2); break; default: diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h index 45c1aa1..a853168 100644 --- a/include/xen/privcmd.h +++ b/include/xen/privcmd.h @@ -58,13 +58,33 @@ struct privcmd_mmapbatch { int num; /* number of pages to populate */ domid_t dom; /* target domain */ __u64 addr; /* virtual address */ - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ + xen_pfn_t __user *arr; /* array of mfns - or'd with + PRIVCMD_MMAPBATCH_*_ERROR on err */ +}; + +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U + +struct privcmd_mmapbatch_v2 { + unsigned int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + __u64 addr; /* virtual address */ + const xen_pfn_t __user *arr; /* array of mfns */ + int __user *err; /* array of error codes */ }; /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t * Return: Value returned from execution of the specified hypercall. + * + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 + * @arg: &struct privcmd_mmapbatch_v2 + * Return: 0 on success (i.e., arg->err contains valid error codes for + * each frame). On an error other than a failed frame remap, -1 is + * returned and errno is set to EINVAL, EFAULT etc. As an exception, + * if the operation was otherwise successful but any frame failed with + * -ENOENT, then -1 is returned and errno is set to ENOENT. */ #define IOCTL_PRIVCMD_HYPERCALL \ _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) @@ -72,5 +92,7 @@ struct privcmd_mmapbatch { _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) #define IOCTL_PRIVCMD_MMAPBATCH \ _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ On Aug 30, 2012, at 8:58 AM, David Vrabel wrote: > From: David Vrabel <david.vrabel@xxxxxxxxxx> > > PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional > field for reporting the error code for every frame that could not be > mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. > > Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> > --- > drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++--------- > include/xen/privcmd.h | 23 +++++++++++- > 2 files changed, 102 insertions(+), 20 deletions(-) > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > index ccee0f1..c0e89e7 100644 > --- a/drivers/xen/privcmd.c > +++ b/drivers/xen/privcmd.c > @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) > */ > static int gather_array(struct list_head *pagelist, > unsigned nelem, size_t size, > - void __user *data) > + const void __user *data) > { > unsigned pageidx; > void *pagedata; > @@ -248,18 +248,37 @@ struct mmap_batch_state { > struct vm_area_struct *vma; > int err; > > - xen_pfn_t __user *user; > + xen_pfn_t __user *user_mfn; > + int __user *user_err; > }; > > static int mmap_batch_fn(void *data, void *state) > { > xen_pfn_t *mfnp = data; > struct mmap_batch_state *st = state; > + int ret; > > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > - st->vma->vm_page_prot, st->domain) < 0) { > - *mfnp |= 0xf0000000U; > - st->err++; > + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > + st->vma->vm_page_prot, st->domain); > + if (ret < 0) { > + /* > + * Error reporting is a mess but userspace relies on > + * it behaving this way. > + * > + * V2 needs to a) return the result of each frame's > + * remap; and b) return -ENOENT if any frame failed > + * with -ENOENT. > + * > + * In this first pass the error code is saved by > + * overwriting the mfn and an error is indicated in > + * st->err. > + * > + * The second pass by mmap_return_errors() will write > + * the error codes to user space and get the right > + * ioctl return value. > + */ > + *(int *)mfnp = ret; > + st->err = ret; > } > st->va += PAGE_SIZE; > > @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state) > { > xen_pfn_t *mfnp = data; > struct mmap_batch_state *st = state; > + int ret; > + > + if (st->user_err) { > + int err = *(int *)mfnp; > + > + if (err == -ENOENT) > + st->err = err; > > - return put_user(*mfnp, st->user++); > + return __put_user(err, st->user_err++); > + } else { > + xen_pfn_t mfn; > + > + ret = __get_user(mfn, st->user_mfn); > + if (ret < 0) > + return ret; > + > + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR; > + return __put_user(mfn, st->user_mfn++); > + } > } > > static struct vm_operations_struct privcmd_vm_ops; > > -static long privcmd_ioctl_mmap_batch(void __user *udata) > +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) > { > int ret; > - struct privcmd_mmapbatch m; > + struct privcmd_mmapbatch_v2 m; > struct mm_struct *mm = current->mm; > struct vm_area_struct *vma; > unsigned long nr_pages; > @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > if (!xen_initial_domain()) > return -EPERM; > > - if (copy_from_user(&m, udata, sizeof(m))) > - return -EFAULT; > + switch (version) { > + case 1: > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) > + return -EFAULT; > + /* Returns per-frame error in m.arr. */ > + m.err = NULL; > + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) > + return -EFAULT; > + break; > + case 2: > + if (copy_from_user(&m, udata, sizeof(struct > privcmd_mmapbatch_v2))) > + return -EFAULT; > + /* Returns per-frame error code in m.err. */ > + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) > + return -EFAULT; > + break; > + default: > + return -EINVAL; > + } > > nr_pages = m.num; > if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) > return -EINVAL; > > - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), > - m.arr); > + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); > > if (ret || list_empty(&pagelist)) > goto out; > @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > > up_write(&mm->mmap_sem); > > - if (state.err > 0) { > - state.user = m.arr; > + if (state.err) { > + state.err = 0; > + state.user_mfn = (xen_pfn_t *)m.arr; > + state.user_err = m.err; > ret = traverse_pages(m.num, sizeof(xen_pfn_t), > - &pagelist, > - mmap_return_errors, &state); > - } > + &pagelist, > + mmap_return_errors, &state); > + if (ret >= 0) > + ret = state.err; > + } else if (m.err) > + __clear_user(m.err, m.num * sizeof(*m.err)); > > out: > free_page_list(&pagelist); > @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file, > break; > > case IOCTL_PRIVCMD_MMAPBATCH: > - ret = privcmd_ioctl_mmap_batch(udata); > + ret = privcmd_ioctl_mmap_batch(udata, 1); > + break; > + > + case IOCTL_PRIVCMD_MMAPBATCH_V2: > + ret = privcmd_ioctl_mmap_batch(udata, 2); > break; > > default: > diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h > index 17857fb..f60d75c 100644 > --- a/include/xen/privcmd.h > +++ b/include/xen/privcmd.h > @@ -59,13 +59,32 @@ struct privcmd_mmapbatch { > int num; /* number of pages to populate */ > domid_t dom; /* target domain */ > __u64 addr; /* virtual address */ > - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ > + xen_pfn_t __user *arr; /* array of mfns - or'd with > + PRIVCMD_MMAPBATCH_MFN_ERROR on err */ > +}; > + > +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U > + > +struct privcmd_mmapbatch_v2 { > + unsigned int num; /* number of pages to populate */ > + domid_t dom; /* target domain */ > + __u64 addr; /* virtual address */ > + const xen_pfn_t __user *arr; /* array of mfns */ > + int __user *err; /* array of error codes */ > }; > > /* > * @cmd: IOCTL_PRIVCMD_HYPERCALL > * @arg: &privcmd_hypercall_t > * Return: Value returned from execution of the specified hypercall. > + * > + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 > + * @arg: &struct privcmd_mmapbatch_v2 > + * Return: 0 on success (i.e., arg->err contains valid error codes for > + * each frame). On an error other than a failed frame remap, -1 is > + * returned and errno is set to EINVAL, EFAULT etc. As an exception, > + * if the operation was otherwise successful but any frame failed with > + * -ENOENT, then -1 is returned and errno is set to ENOENT. > */ > #define IOCTL_PRIVCMD_HYPERCALL \ > _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) > @@ -73,5 +92,7 @@ struct privcmd_mmapbatch { > _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) > #define IOCTL_PRIVCMD_MMAPBATCH \ > _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) > +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ > + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) > > #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ > -- > 1.7.2.5 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |