[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/2] Have the blktap allocate devices dynamically.
This patch allocates the blktap devices on demand. I used my previous patch as a starting point (but this patch is still against xen-unstable). The previous patch had the tapfds in a link list, but Andrew and I have agreed that the O(n) search is not efficient. This patch keeps the array, and allocates 256 pointers. But the pointers are now what is allocated on demand. -- Steve Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx> diff -r f0302ed1ac62 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Oct 02 11:47:55 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Oct 02 17:37:01 2006 -0400 @@ -9,6 +9,9 @@ * Based on the blkback driver code. * * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * Clean ups and fix ups: + * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -51,7 +54,7 @@ #include <asm/tlbflush.h> #include <linux/devfs_fs_kernel.h> -#define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */ +#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */ #define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */ @@ -104,6 +107,12 @@ static int mmap_pages = MMAP_PAGES; * have a bunch of pages reserved for shared * memory rings. */ + +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ +typedef struct domid_translate { + unsigned short domid; + unsigned short busid; +} domid_translate_t ; /*Data struct associated with each of the tapdisk devices*/ typedef struct tap_blkif { @@ -123,17 +132,11 @@ typedef struct tap_blkif { unsigned long *idx_map; /*Record the user ring id to kern [req id, idx] tuple */ blkif_t *blkif; /*Associate blkif with tapdev */ - int sysfs_set; /*Set if it has a class device. */ + struct domid_translate trans; /*Translation from domid to bus. */ } tap_blkif_t; -/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ -typedef struct domid_translate { - unsigned short domid; - unsigned short busid; -} domid_translate_t ; - -static domid_translate_t translate_domid[MAX_TAP_DEV]; -static tap_blkif_t *tapfds[MAX_TAP_DEV]; +static struct tap_blkif *tapfds[MAX_TAP_DEV]; +static int blktap_next_minor; static int __init set_blkif_reqs(char *str) { @@ -320,7 +323,7 @@ struct vm_operations_struct blktap_vm_op */ /*Function Declarations*/ -static int get_next_free_dev(void); +static tap_blkif_t *get_next_free_dev(void); static int blktap_open(struct inode *inode, struct file *filp); static int blktap_release(struct inode *inode, struct file *filp); static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); @@ -338,51 +341,94 @@ static struct file_operations blktap_fop }; -static int get_next_free_dev(void) +static tap_blkif_t *get_next_free_dev(void) { tap_blkif_t *info; - int i = 0, ret = -1; - unsigned long flags; - - spin_lock_irqsave(&pending_free_lock, flags); - - while (i < MAX_TAP_DEV) { + int minor; + + /* + * This is called only from the ioctl, which + * means we should always have interrupts enabled. + */ + BUG_ON(irqs_disabled()); + + spin_lock_irq(&pending_free_lock); + + for (minor = 1; minor < blktap_next_minor; minor++) { + info = tapfds[minor]; + /* we could have failed a previous attempt. */ + if (!info || + ((info->dev_inuse == 0) && + (info->dev_pending == 0)) ) { + info->dev_pending = 1; + goto found; + } + } + info = NULL; + minor = -1; + + /* + * We didn't find free device. If we can still allocate + * more, then we grab the next device minor that is + * available. This is done while we are still under + * the protection of the pending_free_lock. + */ + if (blktap_next_minor < MAX_TAP_DEV) + minor = blktap_next_minor++; +found: + spin_unlock_irq(&pending_free_lock); + + if (!info && minor > 0) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (unlikely(!info)) { + /* + * If we failed here, try to put back + * the next minor number. But if one + * was just taken, then we just lose this + * minor. We can try to allocate this + * minor again later. + */ + spin_lock_irq(&pending_free_lock); + if (blktap_next_minor == minor+1) + blktap_next_minor--; + spin_unlock_irq(&pending_free_lock); + goto out; + } + + info->minor = minor; + /* + * Make sure that we have a minor before others can + * see us. + */ + wmb(); + tapfds[minor] = info; + + class_device_create(xen_class, NULL, + MKDEV(blktap_major, minor), NULL, + "blktap%d", minor); + devfs_mk_cdev(MKDEV(blktap_major, minor), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor); + } + +out: + return info; +} + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) +{ + tap_blkif_t *info; + int i; + + for (i = 0; i < blktap_next_minor; i++) { info = tapfds[i]; - if ( (tapfds[i] != NULL) && (info->dev_inuse == 0) - && (info->dev_pending == 0) ) { - info->dev_pending = 1; - ret = i; - goto done; - } - i++; - } - -done: - spin_unlock_irqrestore(&pending_free_lock, flags); - - /* - * We are protected by having the dev_pending set. - */ - if (!tapfds[i]->sysfs_set && xen_class) { - class_device_create(xen_class, NULL, - MKDEV(blktap_major, ret), NULL, - "blktap%d", ret); - tapfds[i]->sysfs_set = 1; - } - return ret; -} - -int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) -{ - int i; - - for (i = 0; i < MAX_TAP_DEV; i++) - if ( (translate_domid[i].domid == domid) - && (translate_domid[i].busid == xenbus_id) ) { - tapfds[i]->blkif = blkif; - tapfds[i]->status = RUNNING; + if ( info && + (info->trans.domid == domid) && + (info->trans.busid == xenbus_id) ) { + info->blkif = blkif; + info->status = RUNNING; return i; } + } return -1; } @@ -392,12 +438,16 @@ void signal_tapdisk(int idx) struct task_struct *ptask; info = tapfds[idx]; - if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) { + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) + return; + + if (info->pid > 0) { ptask = find_task_by_pid(info->pid); if (ptask) info->status = CLEANSHUTDOWN; } info->blkif = NULL; + return; } @@ -408,14 +458,15 @@ static int blktap_open(struct inode *ino tap_blkif_t *info; int i; - if (tapfds[idx] == NULL) { + info = tapfds[idx]; + + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) { WPRINTK("Unable to open device /dev/xen/blktap%d\n", - idx); - return -ENOMEM; - } + idx); + return -ENODEV; + } + DPRINTK("Opening device /dev/xen/blktap%d\n",idx); - - info = tapfds[idx]; /*Only one process can access device at a time*/ if (test_and_set_bit(0, &info->dev_inuse)) @@ -617,33 +668,31 @@ static int blktap_ioctl(struct inode *in { uint64_t val = (uint64_t)arg; domid_translate_t *tr = (domid_translate_t *)&val; - int newdev; DPRINTK("NEWINTF Req for domid %d and bus id %d\n", tr->domid, tr->busid); - newdev = get_next_free_dev(); - if (newdev < 1) { + info = get_next_free_dev(); + if (!info) { WPRINTK("Error initialising /dev/xen/blktap - " "No more devices\n"); return -1; } - translate_domid[newdev].domid = tr->domid; - translate_domid[newdev].busid = tr->busid; - return newdev; + info->trans.domid = tr->domid; + info->trans.busid = tr->busid; + return info->minor; } case BLKTAP_IOCTL_FREEINTF: { unsigned long dev = arg; unsigned long flags; - /* Looking at another device */ - info = NULL; - - if ( (dev > 0) && (dev < MAX_TAP_DEV) ) - info = tapfds[dev]; + info = tapfds[dev]; + + if ((dev > MAX_TAP_DEV) || !info) + return 0; /* should this be an error? */ spin_lock_irqsave(&pending_free_lock, flags); - if ( (info != NULL) && (info->dev_pending) ) + if (info->dev_pending) info->dev_pending = 0; spin_unlock_irqrestore(&pending_free_lock, flags); @@ -653,16 +702,12 @@ static int blktap_ioctl(struct inode *in { unsigned long dev = arg; - /* Looking at another device */ - info = NULL; - - if ( (dev > 0) && (dev < MAX_TAP_DEV) ) - info = tapfds[dev]; - - if (info != NULL) - return info->minor; - else - return -1; + info = tapfds[dev]; + + if (!dev || (dev > MAX_TAP_DEV) || !info) + return -EINVAL; + + return info->minor; } case BLKTAP_IOCTL_MAJOR: return blktap_major; @@ -702,13 +747,13 @@ void blktap_kick_user(int idx) { tap_blkif_t *info; - if (idx == 0) + info = tapfds[idx]; + + /* Don't kick control device minor==0 */ + if ((idx <= 0) || (idx > MAX_TAP_DEV) || !info) return; - - info = tapfds[idx]; - - if (info != NULL) - wake_up_interruptible(&info->wait); + + wake_up_interruptible(&info->wait); return; } @@ -868,8 +913,8 @@ static void free_req(pending_req_t *req) wake_up(&pending_free_wq); } -static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int - tapidx) +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, + int tapidx) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int i, invcount = 0; @@ -877,13 +922,16 @@ static void fast_flush_area(pending_req_ uint64_t ptep; int ret, mmap_idx; unsigned long kvaddr, uvaddr; - - tap_blkif_t *info = tapfds[tapidx]; - - if (info == NULL) { + tap_blkif_t *info; + + + info = tapfds[tapidx]; + + if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) { WPRINTK("fast_flush: Couldn't get info!\n"); return; } + mmap_idx = req->mem_idx; for (i = 0; i < req->nr_pages; i++) { @@ -1088,7 +1136,7 @@ static int do_block_io_op(blkif_t *blkif rmb(); /* Ensure we see queued requests up to 'rp'. */ /*Check blkif has corresponding UE ring*/ - if (blkif->dev_num == -1) { + if (blkif->dev_num < 0) { /*oops*/ if (print_dbug) { WPRINTK("Corresponding UE " @@ -1099,7 +1147,8 @@ static int do_block_io_op(blkif_t *blkif } info = tapfds[blkif->dev_num]; - if (info == NULL || !info->dev_inuse) { + + if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) { if (print_dbug) { WPRINTK("Can't get UE info!\n"); print_dbug = 0; @@ -1167,16 +1216,24 @@ static void dispatch_rw_block_io(blkif_t struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int nseg; int ret, i; - tap_blkif_t *info = tapfds[blkif->dev_num]; + tap_blkif_t *info; uint64_t sector; blkif_request_t *target; int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); - int usr_idx = GET_NEXT_REQ(info->idx_map); + int usr_idx; uint16_t mmap_idx = pending_req->mem_idx; + info = tapfds[blkif->dev_num]; + + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV || !info) + goto fail_response; + + usr_idx = GET_NEXT_REQ(info->idx_map); + /*Check we have space on user ring - should never fail*/ - if(usr_idx == INVALID_REQ) goto fail_flush; + if (usr_idx == INVALID_REQ) + goto fail_flush; /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -1390,9 +1447,6 @@ static int __init blkif_init(void) tap_blkif_xenbus_init(); - /*Create the blktap devices, but do not map memory or waitqueue*/ - for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF; - /* Dynamically allocate a major for this device */ ret = register_chrdev(0, "blktap", &blktap_fops); blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL); @@ -1404,24 +1458,22 @@ static int __init blkif_init(void) blktap_major = ret; - for(i = 0; i < MAX_TAP_DEV; i++ ) { - info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL); - if(tapfds[i] == NULL) - return -ENOMEM; - info->minor = i; - info->pid = 0; - info->blkif = NULL; - - ret = devfs_mk_cdev(MKDEV(blktap_major, i), - S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); - - if(ret != 0) - return -ENOMEM; - info->dev_pending = info->dev_inuse = 0; - - DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); - } - + info = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL); + if (!info) + return -ENOMEM; + + blktap_next_minor++; + + ret = devfs_mk_cdev(MKDEV(blktap_major, i), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); + + if(ret != 0) + return -ENOMEM; + + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); + + tapfds[0] = info; + /* Make sure the xen class exists */ if (!setup_xen_class()) { /* @@ -1434,7 +1486,6 @@ static int __init blkif_init(void) class_device_create(xen_class, NULL, MKDEV(blktap_major, 0), NULL, "blktap0"); - tapfds[0]->sysfs_set = 1; } else { /* this is bad, but not fatal */ WPRINTK("blktap: sysfs xen_class not created\n"); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |