[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] Have the blktap allocate devices dynamically.



This patch allocates the blktap devices on demand. I used my previous patch as a starting point (but this patch is still against xen-unstable). The previous patch had the tapfds in a link list, but Andrew and I have agreed that the O(n) search is not efficient.

This patch keeps the array, and allocates 256 pointers. But the pointers are now what is allocated on demand.

-- Steve

Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx>
diff -r f0302ed1ac62 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Oct 02 11:47:55 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Oct 02 17:37:01 
2006 -0400
@@ -9,6 +9,9 @@
  * Based on the blkback driver code.
  * 
  * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
+ *
+ * Clean ups and fix ups:
+ *    Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
@@ -51,7 +54,7 @@
 #include <asm/tlbflush.h>
 #include <linux/devfs_fs_kernel.h>
 
-#define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
+#define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
 #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
 
 
@@ -104,6 +107,12 @@ static int mmap_pages = MMAP_PAGES;
                      * have a bunch of pages reserved for shared
                      * memory rings.
                      */
+
+/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
+typedef struct domid_translate {
+       unsigned short domid;
+       unsigned short busid;
+} domid_translate_t ;
 
 /*Data struct associated with each of the tapdisk devices*/
 typedef struct tap_blkif {
@@ -123,17 +132,11 @@ typedef struct tap_blkif {
        unsigned long *idx_map;       /*Record the user ring id to kern 
                                        [req id, idx] tuple                  */
        blkif_t *blkif;               /*Associate blkif with tapdev          */
-       int sysfs_set;                /*Set if it has a class device.        */
+       struct domid_translate trans; /*Translation from domid to bus.       */
 } tap_blkif_t;
 
-/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
-typedef struct domid_translate {
-       unsigned short domid;
-       unsigned short busid;
-} domid_translate_t ;
-
-static domid_translate_t  translate_domid[MAX_TAP_DEV];
-static tap_blkif_t *tapfds[MAX_TAP_DEV];
+static struct tap_blkif *tapfds[MAX_TAP_DEV];
+static int blktap_next_minor;
 
 static int __init set_blkif_reqs(char *str)
 {
@@ -320,7 +323,7 @@ struct vm_operations_struct blktap_vm_op
  */
  
 /*Function Declarations*/
-static int get_next_free_dev(void);
+static tap_blkif_t *get_next_free_dev(void);
 static int blktap_open(struct inode *inode, struct file *filp);
 static int blktap_release(struct inode *inode, struct file *filp);
 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -338,51 +341,94 @@ static struct file_operations blktap_fop
 };
 
 
-static int get_next_free_dev(void)
+static tap_blkif_t *get_next_free_dev(void)
 {
        tap_blkif_t *info;
-       int i = 0, ret = -1;
-       unsigned long flags;
-
-       spin_lock_irqsave(&pending_free_lock, flags);
-       
-       while (i < MAX_TAP_DEV) {
+       int minor;
+
+       /*
+        * This is called only from the ioctl, which
+        * means we should always have interrupts enabled.
+        */
+       BUG_ON(irqs_disabled());
+
+       spin_lock_irq(&pending_free_lock);
+
+       for (minor = 1; minor < blktap_next_minor; minor++) {
+               info = tapfds[minor];
+               /* we could have failed a previous attempt. */
+               if (!info ||
+                   ((info->dev_inuse == 0) &&
+                    (info->dev_pending == 0)) ) {
+                       info->dev_pending = 1;
+                       goto found;
+               }
+       }
+       info = NULL;
+       minor = -1;
+
+       /*
+        * We didn't find free device. If we can still allocate
+        * more, then we grab the next device minor that is
+        * available.  This is done while we are still under
+        * the protection of the pending_free_lock.
+        */
+       if (blktap_next_minor < MAX_TAP_DEV)
+               minor = blktap_next_minor++;
+found:
+       spin_unlock_irq(&pending_free_lock);
+
+       if (!info && minor > 0) {
+               info = kzalloc(sizeof(*info), GFP_KERNEL);
+               if (unlikely(!info)) {
+                       /*
+                        * If we failed here, try to put back
+                        * the next minor number. But if one
+                        * was just taken, then we just lose this
+                        * minor.  We can try to allocate this
+                        * minor again later.
+                        */
+                       spin_lock_irq(&pending_free_lock);
+                       if (blktap_next_minor == minor+1)
+                               blktap_next_minor--;
+                       spin_unlock_irq(&pending_free_lock);
+                       goto out;
+               }
+
+               info->minor = minor;
+               /*
+                * Make sure that we have a minor before others can
+                * see us.
+                */
+               wmb();
+               tapfds[minor] = info;
+
+               class_device_create(xen_class, NULL,
+                                   MKDEV(blktap_major, minor), NULL,
+                                   "blktap%d", minor);
+               devfs_mk_cdev(MKDEV(blktap_major, minor),
+                       S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor);
+       }
+
+out:
+       return info;
+}
+
+int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
+{
+       tap_blkif_t *info;
+       int i;
+
+       for (i = 0; i < blktap_next_minor; i++) {
                info = tapfds[i];
-               if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
-                       && (info->dev_pending == 0) ) {
-                       info->dev_pending = 1;
-                       ret = i;
-                       goto done;
-               }
-               i++;
-       }
-       
-done:
-       spin_unlock_irqrestore(&pending_free_lock, flags);
-
-       /*
-        * We are protected by having the dev_pending set.
-        */
-       if (!tapfds[i]->sysfs_set && xen_class) {
-               class_device_create(xen_class, NULL,
-                                   MKDEV(blktap_major, ret), NULL,
-                                   "blktap%d", ret);
-               tapfds[i]->sysfs_set = 1;
-       }
-       return ret;
-}
-
-int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
-{
-       int i;
-               
-       for (i = 0; i < MAX_TAP_DEV; i++)
-               if ( (translate_domid[i].domid == domid)
-                   && (translate_domid[i].busid == xenbus_id) ) {
-                       tapfds[i]->blkif = blkif;
-                       tapfds[i]->status = RUNNING;
+               if ( info &&
+                    (info->trans.domid == domid) &&
+                    (info->trans.busid == xenbus_id) ) {
+                       info->blkif = blkif;
+                       info->status = RUNNING;
                        return i;
                }
+       }
        return -1;
 }
 
@@ -392,12 +438,16 @@ void signal_tapdisk(int idx)
        struct task_struct *ptask;
 
        info = tapfds[idx];
-       if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
+       if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
+               return;
+
+       if (info->pid > 0) {
                ptask = find_task_by_pid(info->pid);
                if (ptask)
                        info->status = CLEANSHUTDOWN;
        }
        info->blkif = NULL;
+
        return;
 }
 
@@ -408,14 +458,15 @@ static int blktap_open(struct inode *ino
        tap_blkif_t *info;
        int i;
        
-       if (tapfds[idx] == NULL) {
+       info = tapfds[idx];
+
+       if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
                WPRINTK("Unable to open device /dev/xen/blktap%d\n",
-                      idx);
-               return -ENOMEM;
-       }
+                       idx);
+               return -ENODEV;
+       }
+
        DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
-       
-       info = tapfds[idx];
        
        /*Only one process can access device at a time*/
        if (test_and_set_bit(0, &info->dev_inuse))
@@ -617,33 +668,31 @@ static int blktap_ioctl(struct inode *in
        {               
                uint64_t val = (uint64_t)arg;
                domid_translate_t *tr = (domid_translate_t *)&val;
-               int newdev;
 
                DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
                       tr->domid, tr->busid);
-               newdev = get_next_free_dev();
-               if (newdev < 1) {
+               info = get_next_free_dev();
+               if (!info) {
                        WPRINTK("Error initialising /dev/xen/blktap - "
                                "No more devices\n");
                        return -1;
                }
-               translate_domid[newdev].domid = tr->domid;
-               translate_domid[newdev].busid = tr->busid;
-               return newdev;
+               info->trans.domid = tr->domid;
+               info->trans.busid = tr->busid;
+               return info->minor;
        }
        case BLKTAP_IOCTL_FREEINTF:
        {
                unsigned long dev = arg;
                unsigned long flags;
 
-               /* Looking at another device */
-               info = NULL;
-
-               if ( (dev > 0) && (dev < MAX_TAP_DEV) )
-                       info = tapfds[dev];
+               info = tapfds[dev];
+
+               if ((dev > MAX_TAP_DEV) || !info)
+                       return 0; /* should this be an error? */
 
                spin_lock_irqsave(&pending_free_lock, flags);
-               if ( (info != NULL) && (info->dev_pending) )
+               if (info->dev_pending)
                        info->dev_pending = 0;
                spin_unlock_irqrestore(&pending_free_lock, flags);
 
@@ -653,16 +702,12 @@ static int blktap_ioctl(struct inode *in
        {
                unsigned long dev = arg;
 
-               /* Looking at another device */
-               info = NULL;
-               
-               if ( (dev > 0) && (dev < MAX_TAP_DEV) )
-                       info = tapfds[dev];
-               
-               if (info != NULL)
-                       return info->minor;
-               else
-                       return -1;
+               info = tapfds[dev];
+
+               if (!dev || (dev > MAX_TAP_DEV) || !info)
+                       return -EINVAL;
+
+               return info->minor;
        }
        case BLKTAP_IOCTL_MAJOR:
                return blktap_major;
@@ -702,13 +747,13 @@ void blktap_kick_user(int idx)
 {
        tap_blkif_t *info;
 
-       if (idx == 0)
+       info = tapfds[idx];
+
+       /* Don't kick control device minor==0 */
+       if ((idx <= 0) || (idx > MAX_TAP_DEV) || !info)
                return;
-       
-       info = tapfds[idx];
-       
-       if (info != NULL)
-               wake_up_interruptible(&info->wait);
+
+       wake_up_interruptible(&info->wait);
 
        return;
 }
@@ -868,8 +913,8 @@ static void free_req(pending_req_t *req)
                wake_up(&pending_free_wq);
 }
 
-static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int 
-                           tapidx)
+static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
+                           int tapidx)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
        unsigned int i, invcount = 0;
@@ -877,13 +922,16 @@ static void fast_flush_area(pending_req_
        uint64_t ptep;
        int ret, mmap_idx;
        unsigned long kvaddr, uvaddr;
-
-       tap_blkif_t *info = tapfds[tapidx];
-       
-       if (info == NULL) {
+       tap_blkif_t *info;
+       
+
+       info = tapfds[tapidx];
+
+       if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
                WPRINTK("fast_flush: Couldn't get info!\n");
                return;
        }
+
        mmap_idx = req->mem_idx;
 
        for (i = 0; i < req->nr_pages; i++) {
@@ -1088,7 +1136,7 @@ static int do_block_io_op(blkif_t *blkif
        rmb(); /* Ensure we see queued requests up to 'rp'. */
 
        /*Check blkif has corresponding UE ring*/
-       if (blkif->dev_num == -1) {
+       if (blkif->dev_num < 0) {
                /*oops*/
                if (print_dbug) {
                        WPRINTK("Corresponding UE " 
@@ -1099,7 +1147,8 @@ static int do_block_io_op(blkif_t *blkif
        }
 
        info = tapfds[blkif->dev_num];
-       if (info == NULL || !info->dev_inuse) {
+
+       if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
                if (print_dbug) {
                        WPRINTK("Can't get UE info!\n");
                        print_dbug = 0;
@@ -1167,16 +1216,24 @@ static void dispatch_rw_block_io(blkif_t
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
        unsigned int nseg;
        int ret, i;
-       tap_blkif_t *info = tapfds[blkif->dev_num];
+       tap_blkif_t *info;
        uint64_t sector;
        
        blkif_request_t *target;
        int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
-       int usr_idx = GET_NEXT_REQ(info->idx_map);
+       int usr_idx;
        uint16_t mmap_idx = pending_req->mem_idx;
 
+       info = tapfds[blkif->dev_num];
+
+       if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV || !info)
+               goto fail_response;
+
+       usr_idx = GET_NEXT_REQ(info->idx_map);
+
        /*Check we have space on user ring - should never fail*/
-       if(usr_idx == INVALID_REQ) goto fail_flush;
+       if (usr_idx == INVALID_REQ)
+               goto fail_flush;
        
        /* Check that number of segments is sane. */
        nseg = req->nr_segments;
@@ -1390,9 +1447,6 @@ static int __init blkif_init(void)
 
        tap_blkif_xenbus_init();
 
-       /*Create the blktap devices, but do not map memory or waitqueue*/
-       for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
-
        /* Dynamically allocate a major for this device */
        ret = register_chrdev(0, "blktap", &blktap_fops);
        blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
@@ -1404,24 +1458,22 @@ static int __init blkif_init(void)
        
        blktap_major = ret;
 
-       for(i = 0; i < MAX_TAP_DEV; i++ ) {
-               info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
-               if(tapfds[i] == NULL)
-                       return -ENOMEM;
-               info->minor = i;
-               info->pid = 0;
-               info->blkif = NULL;
-
-               ret = devfs_mk_cdev(MKDEV(blktap_major, i),
-                       S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
-
-               if(ret != 0)
-                       return -ENOMEM;
-               info->dev_pending = info->dev_inuse = 0;
-
-               DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
-       }
-       
+       info = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       blktap_next_minor++;
+
+       ret = devfs_mk_cdev(MKDEV(blktap_major, i),
+                           S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
+
+       if(ret != 0)
+               return -ENOMEM;
+
+       DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+
+       tapfds[0] = info;
+
        /* Make sure the xen class exists */
        if (!setup_xen_class()) {
                /*
@@ -1434,7 +1486,6 @@ static int __init blkif_init(void)
                class_device_create(xen_class, NULL,
                                    MKDEV(blktap_major, 0), NULL,
                                    "blktap0");
-               tapfds[0]->sysfs_set = 1;
        } else {
                /* this is bad, but not fatal */
                WPRINTK("blktap: sysfs xen_class not created\n");
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.