[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, RFC 1/4] linux: add new (replacement) mmap-batch ioctl



While the error indicator of IOCTL_PRIVCMD_MMAPBATCH should be in the
top nibble (it is documented that way in include/xen/public/privcmd.h
and include/xen/compat_ioctl.h), it really wasn't for 64-bit
implementations. With MFNs now possibly being 32 or more bits wide on
x86-64, using bits 28-31 as failure indicator (and bit 31 as paged-out
indicator) is not longer acceptable. Instead, a new ioctl with a
separate error indication array is being introduced.

As usual, written against 2.6.32.3 and made apply to the 2.6.18
tree without further testing.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- head-2010-01-04.orig/drivers/xen/privcmd/compat_privcmd.c   2010-01-04 
13:50:00.000000000 +0100
+++ head-2010-01-04/drivers/xen/privcmd/compat_privcmd.c        2010-01-04 
15:13:52.000000000 +0100
@@ -96,6 +96,56 @@ int privcmd_ioctl_32(int fd, unsigned in
 #endif
        }
                break;
+       case IOCTL_PRIVCMD_MMAP_BATCH_32: {
+               struct privcmd_mmap_batch *p;
+               struct privcmd_mmap_batch_32 *p32;
+               struct privcmd_mmap_batch_32 n32;
+#ifdef xen_pfn32_t
+               xen_pfn_t *__user arr;
+               xen_pfn32_t *__user arr32;
+               unsigned int i;
+#endif
+
+               p32 = compat_ptr(arg);
+               p = compat_alloc_user_space(sizeof(*p));
+               if (copy_from_user(&n32, p32, sizeof(n32)) ||
+                   put_user(n32.num, &p->num) ||
+                   put_user(n32.dom, &p->dom) ||
+                   put_user(n32.addr, &p->addr) ||
+                   put_user(compat_ptr(n32.err), &p->err))
+                       return -EFAULT;
+#ifdef xen_pfn32_t
+               arr = compat_alloc_user_space(n32.num * sizeof(*arr)
+                                             + sizeof(*p));
+               arr32 = compat_ptr(n32.arr);
+               for (i = 0; i < n32.num; ++i) {
+                       xen_pfn32_t mfn;
+
+                       if (get_user(mfn, arr32 + i) || put_user(mfn, arr + i))
+                               return -EFAULT;
+               }
+
+               if (put_user(arr, &p->arr))
+                       return -EFAULT;
+#else
+               if (put_user(compat_ptr(n32.arr), &p->arr))
+                       return -EFAULT;
+#endif
+
+               ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP_BATCH, (unsigned long)p);
+
+#ifdef xen_pfn32_t
+               for (i = 0; !ret && i < n32.num; ++i) {
+                       xen_pfn_t mfn;
+
+                       if (get_user(mfn, arr + i) || put_user(mfn, arr32 + i))
+                               ret = -EFAULT;
+                       else if (mfn != (xen_pfn32_t)mfn)
+                               ret = -ERANGE;
+               }
+#endif
+       }
+               break;
        default:
                ret = -EINVAL;
                break;
--- head-2010-01-04.orig/drivers/xen/privcmd/privcmd.c  2010-01-05 
11:21:42.000000000 +0100
+++ head-2010-01-04/drivers/xen/privcmd/privcmd.c       2010-01-05 
11:22:28.000000000 +0100
@@ -299,6 +299,108 @@ static long privcmd_ioctl(struct file *f
        mmapbatch_out:
                list_for_each_safe(l,l2,&pagelist)
                        free_page((unsigned long)l);
+       }
+       break;
+
+       case IOCTL_PRIVCMD_MMAP_BATCH: {
+               privcmd_mmap_batch_t m;
+               struct mm_struct *mm = current->mm;
+               struct vm_area_struct *vma;
+               const xen_pfn_t __user *p;
+               xen_pfn_t *mfn;
+               unsigned long addr, nr_pages;
+               unsigned int i, nr;
+               LIST_HEAD(pagelist);
+               struct list_head *l, *l2;
+               int *err, paged_out;
+
+               if (!is_initial_xendomain())
+                       return -EPERM;
+
+               if (copy_from_user(&m, udata, sizeof(m)))
+                       return -EFAULT;
+
+               nr_pages = m.num;
+               addr = m.addr;
+               if (m.num <= 0 || nr_pages > (ULONG_MAX >> PAGE_SHIFT) ||
+                   addr != m.addr || nr_pages > (-addr >> PAGE_SHIFT))
+                       return -EINVAL;
+
+               p = m.arr;
+               for (i = 0; i < nr_pages; i += nr, p += nr) {
+                       nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
+
+                       ret = -ENOMEM;
+                       l = (struct list_head *)__get_free_page(GFP_KERNEL);
+                       if (l == NULL)
+                               goto mmap_batch_out;
+
+                       INIT_LIST_HEAD(l);
+                       list_add_tail(l, &pagelist);
+
+                       mfn = (void *)(l + 1);
+                       ret = -EFAULT;
+                       if (copy_from_user(mfn, p, nr * sizeof(*mfn)))
+                               goto mmap_batch_out;
+               }
+
+               down_write(&mm->mmap_sem);
+
+               vma = find_vma(mm, addr);
+               ret = -EINVAL;
+               if (!vma ||
+                   addr < vma->vm_start ||
+                   addr + (nr_pages << PAGE_SHIFT) > vma->vm_end ||
+                   !enforce_singleshot_mapping(vma, addr, nr_pages)) {
+                       up_write(&mm->mmap_sem);
+                       goto mmap_batch_out;
+               }
+
+               i = 0;
+               ret = 0;
+               paged_out = 0;
+               list_for_each(l, &pagelist) {
+                       int rc;
+
+                       nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
+                       mfn = (void *)(l + 1);
+                       err = (void *)(l + 1);
+                       BUILD_BUG_ON(sizeof(*err) > sizeof(*mfn));
+
+                       while (i < nr) {
+                               rc = direct_remap_pfn_range(vma, addr & 
PAGE_MASK,
+                                                           *mfn, PAGE_SIZE,
+                                                           vma->vm_page_prot, 
m.dom);
+                               if (rc < 0) {
+                                       if (rc == -ENOENT)
+                                               paged_out = 1;
+                                       ret++;
+                               } else
+                                       BUG_ON(rc > 0);
+                               *err++ = rc;
+                               mfn++; i++; addr += PAGE_SIZE;
+                       }
+               }
+
+               up_write(&mm->mmap_sem);
+
+               if (ret > 0) {
+                       int __user *p = m.err;
+
+                       ret = paged_out ? -ENOENT : 0;
+                       i = 0;
+                       list_for_each(l, &pagelist) {
+                               nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
+                               err = (void *)(l + 1);
+                               if (copy_to_user(p, err, nr * sizeof(*err)))
+                                       ret = -EFAULT;
+                               i += nr; p += nr;
+                       }
+               }
+
+       mmap_batch_out:
+               list_for_each_safe(l, l2, &pagelist)
+                       free_page((unsigned long)l);
 #undef MMAPBATCH_NR_PER_PAGE
        }
        break;
--- head-2010-01-04.orig/fs/compat_ioctl.c      2009-12-17 16:11:48.000000000 
+0100
+++ head-2010-01-04/fs/compat_ioctl.c   2010-01-05 11:00:04.000000000 +0100
@@ -2937,6 +2937,7 @@ IGNORE_IOCTL(FBIOGCURSOR32)
 #ifdef CONFIG_XEN
 HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
 HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
+HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_BATCH_32, privcmd_ioctl_32)
 COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
 COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
 COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
--- head-2010-01-04.orig/include/xen/compat_ioctl.h     2009-12-17 
15:40:40.000000000 +0100
+++ head-2010-01-04/include/xen/compat_ioctl.h  2010-01-04 14:01:56.000000000 
+0100
@@ -49,9 +49,27 @@ struct privcmd_mmapbatch_32 {
 #endif
        compat_uptr_t arr; /* array of mfns - top nibble set on err */
 };
+
+struct privcmd_mmap_batch_32 {
+       unsigned int num; /* number of pages to populate */
+       domid_t dom;      /* target domain */
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+       union {      /* virtual address */
+               __u64 addr __packed;
+               __u32 va;
+       };
+#else
+       __u64 addr;  /* virtual address */
+#endif
+       compat_uptr_t arr; /* array of mfns */
+       compat_uptr_t err; /* array of error codes */
+};
+
 #define IOCTL_PRIVCMD_MMAP_32                   \
        _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap_32))
-#define IOCTL_PRIVCMD_MMAPBATCH_32                  \
+#define IOCTL_PRIVCMD_MMAPBATCH_32              \
        _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch_32))
+#define IOCTL_PRIVCMD_MMAP_BATCH_32             \
+       _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmap_batch_32))
 
 #endif /* __LINUX_XEN_COMPAT_H__ */
--- head-2010-01-04.orig/include/xen/public/privcmd.h   2009-12-18 
10:38:09.000000000 +0100
+++ head-2010-01-04/include/xen/public/privcmd.h        2010-01-04 
14:02:06.000000000 +0100
@@ -60,6 +60,14 @@ typedef struct privcmd_mmapbatch {
        xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
 } privcmd_mmapbatch_t; 
 
+typedef struct privcmd_mmap_batch {
+       unsigned int num; /* number of pages to populate */
+       domid_t dom;      /* target domain */
+       __u64 addr;       /* virtual address */
+       const xen_pfn_t __user *arr; /* array of mfns */
+       int __user *err;  /* array of error codes */
+} privcmd_mmap_batch_t;
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
@@ -71,5 +79,7 @@ typedef struct privcmd_mmapbatch {
        _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
 #define IOCTL_PRIVCMD_MMAPBATCH                                        \
        _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t))
+#define IOCTL_PRIVCMD_MMAP_BATCH                               \
+       _IOC(_IOC_NONE, 'P', 4, sizeof(privcmd_mmap_batch_t))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.