[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] xen: add gntdev
This is a basic (and experimental) gntdev implementation for NetBSD. The gnt device allows usermode applications to map grant references in userspace. It is mainly used by Qemu to implement a Xen backend (that runs in userspace). Due to the fact that qemu-upstream is not yet functional in NetBSD, the only way to try this gntdev is to use the old qemu (qemu-traditional). Performance is not that bad (given that we are using qemu-traditional and running a backend in userspace), the throughput of write operations is 64.7 MB/s, while in the Dom0 it is 104.6 MB/s. Regarding read operations, the throughput inside the DomU is 76.0 MB/s, while on the Dom0 it is 108.8 MB/s. Patches to libxc and libxl are also comming soon. Cc: xen-devel@xxxxxxxxxxxxx --- etc/MAKEDEV.tmpl | 5 + etc/etc.amd64/MAKEDEV.conf | 2 +- etc/etc.i386/MAKEDEV.conf | 2 +- sys/arch/amd64/conf/XEN3_DOM0 | 1 + sys/arch/amd64/conf/majors.amd64 | 1 + sys/arch/i386/conf/XEN3_DOM0 | 1 + sys/arch/i386/conf/majors.i386 | 1 + sys/arch/xen/conf/files.xen | 2 + sys/arch/xen/include/xen_shm.h | 2 + sys/arch/xen/include/xenio.h | 76 +++++ sys/arch/xen/x86/xen_shm_machdep.c | 18 +- sys/arch/xen/xen/gntdev.c | 618 ++++++++++++++++++++++++++++++++++++ sys/dev/DEVNAMES | 1 + sys/rump/librump/rumpkern/devsw.c | 1 + 14 files changed, 728 insertions(+), 3 deletions(-) create mode 100644 sys/arch/xen/xen/gntdev.c diff --git a/etc/MAKEDEV.tmpl b/etc/MAKEDEV.tmpl index 21b0568..00029c6 100644 --- a/etc/MAKEDEV.tmpl +++ b/etc/MAKEDEV.tmpl @@ -289,6 +289,7 @@ # wsfont* console font control # wsmux* wscons event multiplexor # xenevt Xen event interface +# gntdev Xen grant table interface # # iSCSI communication devices # iscsi* iSCSI driver and /sbin/iscsid communication @@ -1020,6 +1021,10 @@ xsd_kva) mkdev xsd_kva c %xenevt_chr% 1 ;; +gntdev) + mkdev gntdev c %gntdev_chr% 0 + ;; + xencons) mkdev xencons c %xencons_chr% 0 ;; diff --git a/etc/etc.amd64/MAKEDEV.conf b/etc/etc.amd64/MAKEDEV.conf index a4a831c..5e2098c 100644 --- a/etc/etc.amd64/MAKEDEV.conf +++ b/etc/etc.amd64/MAKEDEV.conf @@ -44,5 +44,5 @@ all_md) ;; xen) - makedev xenevt xencons xsd_kva + makedev xenevt xencons xsd_kva gntdev ;; diff --git a/etc/etc.i386/MAKEDEV.conf b/etc/etc.i386/MAKEDEV.conf index ba3e2cc..bd38673 100644 --- a/etc/etc.i386/MAKEDEV.conf +++ b/etc/etc.i386/MAKEDEV.conf @@ -48,7 +48,7 @@ all_md) ;; xen) - makedev xenevt xencons xsd_kva + makedev xenevt xencons xsd_kva gntdev ;; floppy) diff --git a/sys/arch/amd64/conf/XEN3_DOM0 b/sys/arch/amd64/conf/XEN3_DOM0 index e5f9f1f..1807dd2 100644 --- a/sys/arch/amd64/conf/XEN3_DOM0 +++ b/sys/arch/amd64/conf/XEN3_DOM0 @@ -838,6 +838,7 @@ pseudo-device wsfont pseudo-device drvctl # xen pseudo-devices +pseudo-device gntdev pseudo-device xenevt pseudo-device xvif pseudo-device xbdback diff --git a/sys/arch/amd64/conf/majors.amd64 b/sys/arch/amd64/conf/majors.amd64 index 9e6b1ac..cf15f7d 100644 --- a/sys/arch/amd64/conf/majors.amd64 +++ b/sys/arch/amd64/conf/majors.amd64 @@ -96,6 +96,7 @@ device-major nsmb char 98 nsmb # - they appear in the i386 MAKEDEV # +device-major gntdev char 140 gntdev device-major xenevt char 141 xenevt device-major xbd char 142 block 142 xbd device-major xencons char 143 xencons diff --git a/sys/arch/i386/conf/XEN3_DOM0 b/sys/arch/i386/conf/XEN3_DOM0 index 8b5cf99..be28bbc 100644 --- a/sys/arch/i386/conf/XEN3_DOM0 +++ b/sys/arch/i386/conf/XEN3_DOM0 @@ -820,6 +820,7 @@ pseudo-device wsfont pseudo-device drvctl # xen pseudo-devices +pseudo-device gntdev pseudo-device xenevt pseudo-device xvif pseudo-device xbdback diff --git a/sys/arch/i386/conf/majors.i386 b/sys/arch/i386/conf/majors.i386 index 38c043f..9aab728 100644 --- a/sys/arch/i386/conf/majors.i386 +++ b/sys/arch/i386/conf/majors.i386 @@ -111,6 +111,7 @@ device-major mt char 107 block 24 mt # - they appear in the i386 MAKEDEV # +device-major gntdev char 140 gntdev device-major xenevt char 141 xenevt device-major xbd char 142 block 142 xbd device-major xencons char 143 xencons diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen index e022db5..91ff858 100644 --- a/sys/arch/xen/conf/files.xen +++ b/sys/arch/xen/conf/files.xen @@ -198,6 +198,7 @@ attach xencons at xendevbus file arch/xen/xen/xencons.c xencons needs-flag # Xen event peudo-device +defpseudo gntdev defpseudo xenevt defpseudo xvif defpseudo xbdback @@ -390,6 +391,7 @@ include "dev/pcmcia/files.pcmcia" # Domain-0 operations defflag opt_xen.h DOM0OPS file arch/xen/xen/privcmd.c dom0ops +file arch/xen/xen/gntdev.c dom0ops file arch/xen/x86/xen_shm_machdep.c dom0ops file arch/x86/pci/pci_machdep.c hypervisor & pci & dom0ops file arch/xen/xen/pci_intr_machdep.c hypervisor & pci diff --git a/sys/arch/xen/include/xen_shm.h b/sys/arch/xen/include/xen_shm.h index e2d89d0..a796572 100644 --- a/sys/arch/xen/include/xen_shm.h +++ b/sys/arch/xen/include/xen_shm.h @@ -37,6 +37,8 @@ */ int xen_shm_map(int, int, grant_ref_t *, vaddr_t *, grant_handle_t *, int); +int xen_shm_map_multidom(int , int *, grant_ref_t *, vaddr_t *, + grant_handle_t *, int); void xen_shm_unmap(vaddr_t, int, grant_handle_t *); int xen_shm_callback(int (*)(void *), void *); diff --git a/sys/arch/xen/include/xenio.h b/sys/arch/xen/include/xenio.h index 6b25733..87cd376 100644 --- a/sys/arch/xen/include/xenio.h +++ b/sys/arch/xen/include/xenio.h @@ -122,4 +122,80 @@ typedef struct oprivcmd_hypercall /* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ #define EVTCHN_UNBIND _IOW('E', 3, unsigned long) +/* Interface to /dev/gntdev */ + +typedef struct ioctl_gntdev_grant_ref { + /* The domain ID of the grant to be mapped. */ + uint32_t domid; + /* The grant reference of the grant to be mapped. */ + uint32_t ref; +} ioctl_gntdev_grant_ref; + +typedef struct ioctl_gntdev_map_grant_ref { + /* IN parameters */ + /* The number of grants to be mapped. */ + uint32_t count; + uint32_t pad; + uint64_t vaddr; + /* OUT parameters */ + /* The offset to be used on a subsequent call to mmap(). */ + uint64_t index; + /* Variable IN parameter. */ + /* Array of grant references, of size @count. */ + ioctl_gntdev_grant_ref *refs; +} ioctl_gntdev_map_grant_ref; + +typedef struct ioctl_gntdev_unmap_grant_ref { + /* IN parameters */ + /* The offset was returned by the corresponding map operation. */ + uint64_t index; + /* The number of pages to be unmapped. */ + uint32_t count; + uint32_t pad; +} ioctl_gntdev_unmap_grant_ref; + +typedef struct ioctl_gntdev_get_offset_for_vaddr { + /* IN parameters */ + /* The virtual address of the first mapped page in a range. */ + uint64_t vaddr; + /* OUT parameters */ + /* The offset that was used in the initial mmap() operation. */ + uint64_t offset; + /* The number of pages mapped in the VM area that begins at @vaddr. */ + uint32_t count; + uint32_t pad; +} ioctl_gntdev_get_offset_for_vaddr; + +/* + * Inserts the grant references into the mapping table of an instance + * of gntdev. N.B. This does not perform the mapping, which is deferred + * until mmap() is called with @index as the offset. + */ +#define IOCTL_GNTDEV_MAP_GRANT_REF \ + _IOWR('G', 0, ioctl_gntdev_map_grant_ref) + +/* + * Removes the grant references from the mapping table of an instance of + * of gntdev. N.B. munmap() must be called on the relevant virtual address(es) + * before this ioctl is called, or an error will result. + */ +#define IOCTL_GNTDEV_UNMAP_GRANT_REF \ + _IOW('G', 1, ioctl_gntdev_unmap_grant_ref) + +/* + * Returns the offset in the driver's address space that corresponds + * to @vaddr. This can be used to perform a munmap(), followed by an + * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by + * the caller. The number of pages that were allocated at the same time as + * @vaddr is returned in @count. + * + * N.B. Where more than one page has been mapped into a contiguous range, the + * supplied @vaddr must correspond to the start of the range; otherwise + * an error will result. It is only possible to munmap() the entire + * contiguously-allocated range at once, and not any subrange thereof. + */ +#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ + _IOWR('G', 2, ioctl_gntdev_get_offset_for_vaddr) + + #endif /* __XEN_XENIO_H__ */ diff --git a/sys/arch/xen/x86/xen_shm_machdep.c b/sys/arch/xen/x86/xen_shm_machdep.c index d47745c..b47cc54 100644 --- a/sys/arch/xen/x86/xen_shm_machdep.c +++ b/sys/arch/xen/x86/xen_shm_machdep.c @@ -35,6 +35,7 @@ __KERNEL_RCSID(0, "$NetBSD: xen_shm_machdep.c,v 1.10 2011/09/02 22:25:08 dyoung #include <sys/queue.h> #include <sys/vmem.h> #include <sys/kernel.h> +#include <sys/malloc.h> #include <uvm/uvm.h> #include <machine/pmap.h> @@ -120,6 +121,21 @@ int xen_shm_map(int nentries, int domid, grant_ref_t *grefp, vaddr_t *vap, grant_handle_t *handlep, int flags) { + int i, rc; + int *domids = malloc(sizeof(domid) * nentries, M_DEVBUF, + M_WAITOK | M_ZERO); + for(i = 0; i < nentries; i++) { + domids[i] = domid; + } + rc = xen_shm_map_multidom(nentries, domids, grefp, vap, handlep, flags); + free(domids, M_DEVBUF); + return rc; +} + +int +xen_shm_map_multidom(int nentries, int *domid, grant_ref_t *grefp, + vaddr_t *vap, grant_handle_t *handlep, int flags) +{ int s, i; vaddr_t new_va; vmem_addr_t new_va_pg; @@ -168,7 +184,7 @@ xen_shm_map(int nentries, int domid, grant_ref_t *grefp, vaddr_t *vap, new_va = new_va_pg << PAGE_SHIFT; for (i = 0; i < nentries; i++) { op[i].host_addr = new_va + i * PAGE_SIZE; - op[i].dom = domid; + op[i].dom = domid[i]; op[i].ref = grefp[i]; op[i].flags = GNTMAP_host_map | ((flags & XSHM_RO) ? GNTMAP_readonly : 0); diff --git a/sys/arch/xen/xen/gntdev.c b/sys/arch/xen/xen/gntdev.c new file mode 100644 index 0000000..85dd8ec --- /dev/null +++ b/sys/arch/xen/xen/gntdev.c @@ -0,0 +1,618 @@ +/* + * Copyright (c) 2012 Roger Pau MonnÃ. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <sys/cdefs.h> + +#include "opt_xen.h" + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/conf.h> + +#include <uvm/uvm.h> + +#include <xen/xen_shm.h> +#include <xen/xenio.h> + +void gntdevattach(int n); + +#define freem(va) \ + if (va) free(va, M_DEVBUF) + +#undef GNTDEBUG +#ifdef GNTDEBUG + #define debug(M, ...) \ + printf("gntdev:%d: " M "\n", __LINE__, ##__VA_ARGS__) +#else + #define debug(M, ...) +#endif + +#define VA_FREE 0 + +static int gntdev_fioctl(struct file *, u_long, void *); +static int gntdev_fclose(struct file *); + +static const struct fileops gntdev_fileops = { + .fo_read = fbadop_read, + .fo_write = fbadop_write, + .fo_ioctl = gntdev_fioctl, + .fo_fcntl = fnullop_fcntl, + .fo_poll = fnullop_poll, + .fo_stat = fbadop_stat, + .fo_close = gntdev_fclose, + .fo_kqfilter = fnullop_kqfilter, + .fo_restart = fnullop_restart, +}; + +dev_type_open(gntdev_open); + +const struct cdevsw gntdev_cdevsw = { + gntdev_open, nullclose, noread, nowrite, noioctl, + nostop, notty, nopoll, nommap, nokqfilter, D_OTHER +}; + +struct gntmap { + struct uvm_object uobj; + pmap_t pmap; + LIST_ENTRY(gntmap) next_map; + int index; + int count; + grant_ref_t *grants; + int *domids; + vaddr_t va; + vaddr_t kernel_va; + grant_handle_t *handles; + bool ro; +}; + +struct gntproc { + LIST_HEAD(,gntmap) maps; + kmutex_t lock; + struct lwp *lwp; + unsigned int num_maps; +}; + +struct gntdev { + kcondvar_t wait_mem; + kmutex_t lock; + bool callback_set; +}; + +struct gntdev priv; + +/* --- UVM handlers prototypes --- */ + +static int +gntmap_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **, + int, int, vm_prot_t, int); +static void +gntmap_reference(struct uvm_object *); +static void +gntmap_detach(struct uvm_object *); +static int +map_grant_ref(struct gntmap *map); + +static struct uvm_pagerops gntops = { + .pgo_reference = gntmap_reference, + .pgo_detach = gntmap_detach, + .pgo_fault = gntmap_fault, +}; + +/* --- Helper prototypes --- */ + +static int +gntdev_mem_callback(void *arg); +static void +insert_map(struct gntproc *proc, struct gntmap *map); +static struct gntmap * +find_map(struct gntproc *proc, int index, int count); +static struct gntmap * +find_vaddr(struct gntproc *proc, vaddr_t va); +static void +remove_map(struct gntproc *proc, struct gntmap *map); + +/* --- UVM handlers --- */ + +static int +gntdev_install_handler(struct vm_map *vmap, vaddr_t start, + struct gntmap *map) +{ + int rc; + uvm_flag_t uvmflag; + vaddr_t newstart = start; + vm_prot_t prot; + off_t size = ((off_t)map->count << PGSHIFT); + + /* remove current entries */ + uvm_unmap(vmap, start, start + size); + + uvm_obj_init(&map->uobj, &gntops, true, 1); + prot = map->ro ? VM_PROT_READ : VM_PROT_READ | VM_PROT_WRITE; + uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL, + UVM_FLAG_FIXED | UVM_FLAG_NOMERGE); + rc = uvm_map(vmap, &newstart, size, &map->uobj, 0, 0, uvmflag); + if (rc) { + debug("uvm_map failed"); + map->uobj.pgops->pgo_detach(&map->uobj); + return rc; + } + if (newstart != start) { + debug("uvm_map didn't give us back our vm space"); + return EINVAL; + } + map->pmap = vm_map_pmap(vmap); + return 0; +} + +static int +gntmap_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, + int npages, int centeridx, vm_prot_t access_type, int flags) +{ + struct vm_map_entry *entry = ufi->entry; + struct uvm_object *uobj = entry->object.uvm_obj; + struct gntmap *map = (struct gntmap*)entry->object.uvm_obj; + pmap_t pmap = ufi->orig_map->pmap; + int index, i, rc = 0; + vaddr_t u_va, k_va; + vm_prot_t prot; + paddr_t ma; + + /* compute offset from start of map */ + index = (entry->offset + (vaddr - entry->start)) >> PAGE_SHIFT; + if (index + npages > map->count) { + return EINVAL; + } + + for (i = 0; i < npages; i++, index++) { + if ((flags & PGO_ALLPAGES) == 0 && i != centeridx) + continue; + if (pps[i] == PGO_DONTCARE) + continue; + + u_va = map->va + (index * PAGE_SIZE); + k_va = map->kernel_va + (index * PAGE_SIZE); + if (pmap_extract_ma(pmap_kernel(), k_va, &ma) == false) { + debug("unable to extract kernel MA"); + return EFAULT; + } + prot = map->ro ? VM_PROT_READ : (VM_PROT_READ | VM_PROT_WRITE); + rc = pmap_enter_ma(pmap, u_va, ma, 0, prot, PMAP_CANFAIL, + map->domids[index]); + if (rc == ENOMEM) { + debug("pmap_enter_ma: ENOMEM"); + rc = ERESTART; + break; + } + if (rc) { + /* XXX for proper ptp accountings */ + debug("pmap_enter_ma: failed"); + pmap_remove(pmap, u_va, u_va + PAGE_SIZE); + } + } + pmap_update(pmap); + uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); + + if (rc == ERESTART) { + uvm_wait("privpgop_fault"); + } + return rc; +} + +static void +gntmap_reference(struct uvm_object *uobj) +{ + mutex_enter(uobj->vmobjlock); + uobj->uo_refs++; + mutex_exit(uobj->vmobjlock); +} + +static void +gntmap_detach(struct uvm_object *uobj) +{ + struct gntmap *map = (struct gntmap *)uobj; + + mutex_enter(uobj->vmobjlock); + uobj->uo_refs--; + if (uobj->uo_refs > 0) { + mutex_exit(uobj->vmobjlock); + return; + } + mutex_exit(uobj->vmobjlock); + pmap_remove(map->pmap, map->va, map->va + (map->count * PAGE_SIZE)); + pmap_update(map->pmap); +} + +/* --- Internal Helpers --- */ + +static int +gntdev_mem_callback(void *arg) +{ + mutex_enter(&priv.lock); + cv_broadcast(&priv.wait_mem); + priv.callback_set = false; + mutex_exit(&priv.lock); + return 0; +} + +static void +insert_map(struct gntproc *proc, struct gntmap *map) +{ + struct gntmap *tmap; + + mutex_enter(&proc->lock); + proc->num_maps++; + if (LIST_EMPTY(&proc->maps)) { + LIST_INSERT_HEAD(&proc->maps, map, next_map); + goto out; + } + LIST_FOREACH(tmap, &proc->maps, next_map) { + if (map->index + map->count < tmap->index) { + LIST_INSERT_BEFORE(tmap, map, next_map); + goto out; + } + map->index = tmap->index + tmap->count; + if (LIST_NEXT(tmap, next_map) == NULL) { + LIST_INSERT_AFTER(tmap, map, next_map); + goto out; + } + } + +out: + mutex_exit(&proc->lock); + return; +} + +static struct gntmap * +find_map(struct gntproc *proc, int index, int count) +{ + struct gntmap *map = NULL; + + mutex_enter(&proc->lock); + if (LIST_EMPTY(&proc->maps)) + goto out; + + LIST_FOREACH(map, &proc->maps, next_map) { + if (index != map->index) { + continue; + } + if (count && count != map->count) { + continue; + } + goto out; + } + map = NULL; + +out: + mutex_exit(&proc->lock); + return map; +} + +static struct gntmap * +find_vaddr(struct gntproc *proc, vaddr_t va) +{ + struct gntmap *map = NULL; + + mutex_enter(&proc->lock); + if (LIST_EMPTY(&proc->maps)) + goto out; + + LIST_FOREACH(map, &proc->maps, next_map) { + if (va >= map->va && va < (map->va + (map->count * PAGE_SIZE))) + goto out; + } + map = NULL; + +out: + mutex_exit(&proc->lock); + return map; +} + +static void +remove_map(struct gntproc *proc, struct gntmap *map) +{ + mutex_enter(&proc->lock); + LIST_REMOVE(map, next_map); + proc->num_maps--; + mutex_exit(&proc->lock); + if (map->kernel_va != VA_FREE) { + xen_shm_unmap(map->kernel_va, map->count, map->handles); + map->kernel_va = VA_FREE; + uvm_obj_destroy(&map->uobj, true); + } + free(map->grants, M_DEVBUF); + free(map->handles, M_DEVBUF); + free(map->domids, M_DEVBUF); + free(map, M_DEVBUF); +} + +static int +map_grant_ref(struct gntmap *map) +{ + int rc; + vaddr_t k_va; + + do { + debug("mapping grefs with index %d", map->index); + rc = xen_shm_map_multidom(map->count, map->domids, map->grants, &k_va, + map->handles, map->ro ? XSHM_RO : 0); + switch (rc) { + case 0: + /* All good */ + map->kernel_va = k_va; + break; + case ENOMEM: + mutex_enter(&priv.lock); + debug("xen_shm_map_multidom out of memory"); + if (!priv.callback_set) { + /* Register callback */ + if (xen_shm_callback(gntdev_mem_callback, NULL) != 0) { + mutex_exit(&priv.lock); + panic("ioctl_map_grant_ref: xen_shm_callback failed"); + } + priv.callback_set = true; + } + cv_wait(&priv.wait_mem, &priv.lock); + mutex_exit(&priv.lock); + default: + debug("xen_shm_map_multidom failed"); + goto error; + } + } while (rc == ENOMEM); + + return 0; + +error: + return rc; +} + +/* --- ioctl handlers --- */ + +static int +ioctl_map_grant_ref(struct gntproc *proc, + ioctl_gntdev_map_grant_ref *map_grants) +{ + grant_ref_t *refs = NULL; + grant_handle_t *handles = NULL; + int *domids = NULL; + struct gntmap *map = NULL; + struct vm_map *vmm; + ioctl_gntdev_grant_ref ioctl_map; + int i, rc; + vaddr_t va0; + + if (find_vaddr(proc, map_grants->vaddr)) { + debug("memory area already in use"); + rc = EINVAL; + goto error; + } + + debug("mapping %d refs", map_grants->count); + + refs = malloc(sizeof(*refs) * map_grants->count, M_DEVBUF, + M_WAITOK | M_ZERO); + handles = malloc(sizeof(*handles) * map_grants->count, M_DEVBUF, + M_WAITOK | M_ZERO); + domids = malloc(sizeof(*domids) * map_grants->count, M_DEVBUF, + M_WAITOK | M_ZERO); + + for (i = 0; i < map_grants->count; i++) { + rc = copyin(&map_grants->refs[i], &ioctl_map, sizeof(ioctl_map)); + if (rc != 0) { + debug("unable to copyin grant ref info %d", i); + goto error; + } + debug("mapping ref: %u Dom: %u", ioctl_map.ref, ioctl_map.domid); + refs[i] = ioctl_map.ref; + domids[i] = ioctl_map.domid; + } + map = malloc(sizeof(*map), M_DEVBUF, + M_WAITOK | M_ZERO); + vmm = &proc->lwp->l_proc->p_vmspace->vm_map; + va0 = map_grants->vaddr & ~PAGE_MASK; + vm_map_lock_read(vmm); + if (uvm_map_checkprot(vmm, va0, va0 + (map_grants->count << PGSHIFT) - 1, + VM_PROT_WRITE)) { + map->ro = false; + debug("map RW"); + } else if (uvm_map_checkprot(vmm, va0, + va0 + (map_grants->count << PGSHIFT) - 1, VM_PROT_READ)) { + map->ro = true; + debug("map RO"); + } else { + debug("unable check protection"); + vm_map_unlock_read(vmm); + goto error; + } + vm_map_unlock_read(vmm); + map->grants = refs; + map->handles = handles; + map->domids = domids; + map->va = map_grants->vaddr; + map->kernel_va = VA_FREE; + map->count = map_grants->count; + map->index = 0; + + rc = map_grant_ref(map); + if (rc) { + debug("map_grant_ref failed"); + goto error; + } + + rc = gntdev_install_handler(vmm, map->va, map); + if (rc) { + debug("gntdev_install_handler failed"); + xen_shm_unmap(map->kernel_va, map->count, map->handles); + map->kernel_va = VA_FREE; + goto error; + } + insert_map(proc, map); + map_grants->index = map->index << PAGE_SHIFT; + debug("gntrefs mapped at index %" PRIu64 "", map_grants->index); + return 0; + +error: + freem(refs); + freem(handles); + freem(domids); + freem(map); + debug("unable to map grant refs"); + return rc; +} + +static int +ioctl_unmap_grant_ref(struct gntproc *proc, + ioctl_gntdev_unmap_grant_ref *unmap_grants) +{ + struct gntmap *map; + uint64_t index = unmap_grants->index >> PAGE_SHIFT; + int rc = 0; + + debug("unmapping index %" PRIu64, index); + + map = find_map(proc, index, unmap_grants->count); + if (map == NULL) { + debug("unable to find index %" PRIu64, index); + rc = EINVAL; + goto out; + } + mutex_enter(map->uobj.vmobjlock); + if (map->uobj.uo_refs) { + debug("trying to remove a referenced map"); + mutex_exit(map->uobj.vmobjlock); + return EINVAL; + } + mutex_exit(map->uobj.vmobjlock); + remove_map(proc, map); +out: + return rc; +} + +static int +ioctl_get_offset_vaddr(struct gntproc *proc, + ioctl_gntdev_get_offset_for_vaddr *offset_vaddr) +{ + struct gntmap *map; + int rc = 0; + + debug("find offset va: %p", (void *)offset_vaddr->vaddr); + + map = find_vaddr(proc, offset_vaddr->vaddr); + if (map == NULL) { + debug("unable to find vaddr"); + rc = EINVAL; + goto out; + } + + offset_vaddr->offset = map->index << PAGE_SHIFT; + offset_vaddr->count = map->count; + +out: + return rc; +} + +/* --- Device ops handlers --- */ + +static int +gntdev_fioctl(struct file *fp, u_long cmd, void *addr) +{ + struct gntproc *proc = fp->f_data; + ioctl_gntdev_map_grant_ref *map_grants; + ioctl_gntdev_unmap_grant_ref *unmap_grants; + ioctl_gntdev_get_offset_for_vaddr *offset_vaddr; + int rc; + + switch (cmd) { + case IOCTL_GNTDEV_MAP_GRANT_REF: + map_grants = addr; + rc = ioctl_map_grant_ref(proc, map_grants); + break; + case IOCTL_GNTDEV_UNMAP_GRANT_REF: + unmap_grants = addr; + rc = ioctl_unmap_grant_ref(proc, unmap_grants); + break; + case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: + offset_vaddr = addr; + rc = ioctl_get_offset_vaddr(proc, offset_vaddr); + break; + default: + rc = EINVAL; + } + return rc; +} + +int +gntdev_open(dev_t dev, int flags, int mode, struct lwp *l) +{ + struct gntproc *proc; + struct file *fp; + int fd, rc; + + rc = fd_allocfile(&fp, &fd); + if (rc) + return rc; + + proc = malloc(sizeof(*proc), M_DEVBUF, M_WAITOK | M_ZERO); + mutex_init(&proc->lock, MUTEX_DEFAULT, IPL_NONE); + LIST_INIT(&proc->maps); + proc->lwp = l; + proc->num_maps = 0; + debug("opened for proc %p", l); + return fd_clone(fp, fd, flags, &gntdev_fileops, proc); +} + +static int +gntdev_fclose(struct file *fp) +{ + struct gntproc *proc = fp->f_data; + struct gntmap *map; + + mutex_enter(&proc->lock); + while (LIST_FIRST(&proc->maps) != NULL) { + map = LIST_FIRST(&proc->maps); + mutex_exit(&proc->lock); + remove_map(proc, map); + mutex_enter(&proc->lock); + } + KASSERT(proc->num_maps == 0); + mutex_exit(&proc->lock); + mutex_destroy(&proc->lock); + debug("closed device for proc %p", proc->lwp); + free(proc, M_DEVBUF); + return 0; +} + +void +gntdevattach(int n) +{ + mutex_init(&priv.lock, MUTEX_DEFAULT, IPL_VM); + cv_init(&priv.wait_mem, "gntdev"); + priv.callback_set = false; + debug("attached"); + return; +} diff --git a/sys/dev/DEVNAMES b/sys/dev/DEVNAMES index 45cf018..765fe45 100644 --- a/sys/dev/DEVNAMES +++ b/sys/dev/DEVNAMES @@ -1517,6 +1517,7 @@ xdc MI xdc sun3 xe next68k xel x68k +gntdev xen xencons xen xenevt xen xennet xen diff --git a/sys/rump/librump/rumpkern/devsw.c b/sys/rump/librump/rumpkern/devsw.c index 5a1af01..e513885 100644 --- a/sys/rump/librump/rumpkern/devsw.c +++ b/sys/rump/librump/rumpkern/devsw.c @@ -134,6 +134,7 @@ struct devsw_conv devsw_conv0[] = { { "rd", 22, 105, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, { "ct", 23, 106, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, { "mt", 24, 107, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, + { "gntdev", -1, 140, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, { "xenevt", -1, 141, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, { "xbd", 142, 142, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, { "xencons", -1, 143, DEVNODE_DONTBOTHER, 0, { 0, 0 }}, -- 1.7.7.5 (Apple Git-26) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |