[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] linux/blkfront: fixes for 'xm block-detach ... --force'



Hi Jan.

I think this one resulted in blkfront_remove (from xenbus) racing
against blkif_release (run from close(bdev)). 

Both running for their respective kfree(info) vs. subsequent
dereferences. Or just to leak info entirely.

We only had blkfront_closing() on either thread before, which serializes
around the bd_mutex.

I'm currently thinking that info now rather wants a refcount of its own.
Ideally replacing is_ready entirely, maybe.

Any thoughts?

Daniel

On Mon, 2010-01-18 at 05:19 -0500, Jan Beulich wrote:
> Prevent prematurely freeing 'struct blkfront_info' instances (when the
> xenbus data structures are gone, but the Linux ones are still needed).
> 
> Prevent adding a disk with the same (major, minor) [and hence the same
> name and sysfs entries, which leads to oopses] when the previous
> instance wasn't fully de-allocated yet.
> 
> This still doesn't address all issues resulting from forced detach:
> I/O submitted after the detach still blocks forever, likely preventing
> subsequent un-mounting from completing. It's not clear to me (not
> knowing much about the block layer) how this can be avoided.
> 
> This also doesn't address issues with duplicate device creation caused
> by re-using the hdXX and sdXX name spaces - this would require
> synchronization with the respective native code.
> 
> As usual, written against 2.6.32.3 and made apply to the 2.6.18
> tree without further testing.
> 
> As I believe that pv-ops Xen has the same issue, I'm also attaching
> a respective (compile tested only) patch against 2.6.33-rc4.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/blkfront.c     2009-06-04 
> 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/blkfront.c  2010-01-15 
> 16:47:42.000000000 +0100
> @@ -426,7 +426,10 @@ static int blkfront_remove(struct xenbus
>  
>       blkif_free(info, 0);
>  
> -     kfree(info);
> +     if(info->users == 0)
> +             kfree(info);
> +     else
> +             info->is_ready = -1;
>  
>       return 0;
>  }
> @@ -488,6 +491,9 @@ static void blkif_restart_queue_callback
>  int blkif_open(struct inode *inode, struct file *filep)
>  {
>       struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
> +
> +     if(info->is_ready < 0)
> +             return -ENODEV;
>       info->users++;
>       return 0;
>  }
> @@ -504,7 +510,10 @@ int blkif_release(struct inode *inode, s
>               struct xenbus_device * dev = info->xbdev;
>               enum xenbus_state state = 
> xenbus_read_driver_state(dev->otherend);
>  
> -             if (state == XenbusStateClosing && info->is_ready)
> +             if(info->is_ready < 0) {
> +                     blkfront_closing(dev);
> +                     kfree(info);
> +             } else if (state == XenbusStateClosing && info->is_ready)
>                       blkfront_closing(dev);
>       }
>       return 0;
> @@ -894,7 +903,7 @@ int blkfront_is_ready(struct xenbus_devi
>  {
>       struct blkfront_info *info = dev->dev.driver_data;
>  
> -     return info->is_ready;
> +     return info->is_ready > 0;
>  }
>  
> 
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/block.h        2009-06-04 
> 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/block.h     2010-01-18 
> 08:56:41.000000000 +0100
> @@ -78,6 +78,7 @@ struct xlbd_major_info
>       int index;
>       int usage;
>       struct xlbd_type_info *type;
> +     struct xlbd_minor_state *minors;
>  };
>  
>  struct blk_shadow {
> --- sle11-2010-01-12.orig/drivers/xen/blkfront/vbd.c  2009-06-04 
> 10:47:04.000000000 +0200
> +++ sle11-2010-01-12/drivers/xen/blkfront/vbd.c       2010-01-18 
> 08:56:32.000000000 +0100
> @@ -48,6 +48,12 @@
>  #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
>  #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
>  
> +struct xlbd_minor_state {
> +     unsigned int nr;
> +     unsigned long *bitmap;
> +     spinlock_t lock;
> +};
> +
>  /*
>   * For convenience we distinguish between ide, scsi and 'other' (i.e.,
>   * potentially combinations of the two) in the naming scheme and in a few 
> other
> @@ -97,6 +103,8 @@ static struct xlbd_major_info *major_inf
>  #define XLBD_MAJOR_SCSI_RANGE        XLBD_MAJOR_SCSI_START ... 
> XLBD_MAJOR_VBD_START - 1
>  #define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + 
> NUM_VBD_MAJORS - 1
>  
> +#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ 
> (XLBD_MAJOR_VBD_START + 1))
> +
>  static struct block_device_operations xlvbd_block_fops =
>  {
>       .owner = THIS_MODULE,
> @@ -114,6 +122,7 @@ static struct xlbd_major_info *
>  xlbd_alloc_major_info(int major, int minor, int index)
>  {
>       struct xlbd_major_info *ptr;
> +     struct xlbd_minor_state *minors;
>       int do_register;
>  
>       ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
> @@ -121,6 +130,22 @@ xlbd_alloc_major_info(int major, int min
>               return NULL;
>  
>       ptr->major = major;
> +     minors = kmalloc(sizeof(*minors), GFP_KERNEL);
> +     if (minors == NULL) {
> +             kfree(ptr);
> +             return NULL;
> +     }
> +
> +     minors->bitmap = kzalloc(BITS_TO_LONGS(256) * sizeof(*minors->bitmap),
> +                              GFP_KERNEL);
> +     if (minors->bitmap == NULL) {
> +             kfree(minors);
> +             kfree(ptr);
> +             return NULL;
> +     }
> +
> +     spin_lock_init(&minors->lock);
> +     minors->nr = 256;
>       do_register = 1;
>  
>       switch (index) {
> @@ -143,13 +168,19 @@ xlbd_alloc_major_info(int major, int min
>                * if someone already registered block major 202,
>                * don't try to register it again
>                */
> -             if (major_info[XLBD_MAJOR_VBD_START] != NULL)
> +             if (major_info[XLBD_MAJOR_VBD_ALT(index)] != NULL) {
> +                     kfree(minors->bitmap);
> +                     kfree(minors);
> +                     minors = major_info[XLBD_MAJOR_VBD_ALT(index)]->minors;
>                       do_register = 0;
> +             }
>               break;
>       }
>  
>       if (do_register) {
>               if (register_blkdev(ptr->major, ptr->type->devname)) {
> +                     kfree(minors->bitmap);
> +                     kfree(minors);
>                       kfree(ptr);
>                       return NULL;
>               }
> @@ -157,6 +188,7 @@ xlbd_alloc_major_info(int major, int min
>               printk("xen-vbd: registered block device major %i\n", 
> ptr->major);
>       }
>  
> +     ptr->minors = minors;
>       major_info[index] = ptr;
>       return ptr;
>  }
> @@ -209,6 +241,61 @@ xlbd_put_major_info(struct xlbd_major_in
>  }
>  
>  static int
> +xlbd_reserve_minors(struct xlbd_major_info *mi, unsigned int minor,
> +                 unsigned int nr_minors)
> +{
> +     struct xlbd_minor_state *ms = mi->minors;
> +     unsigned int end = minor + nr_minors;
> +     int rc;
> +
> +     if (end > ms->nr) {
> +             unsigned long *bitmap, *old;
> +
> +             bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
> +                              GFP_KERNEL);
> +             if (bitmap == NULL)
> +                     return -ENOMEM;
> +
> +             spin_lock(&ms->lock);
> +             if (end > ms->nr) {
> +                     old = ms->bitmap;
> +                     memcpy(bitmap, ms->bitmap,
> +                            BITS_TO_LONGS(ms->nr) * sizeof(*bitmap));
> +                     ms->bitmap = bitmap;
> +                     ms->nr = BITS_TO_LONGS(end) * BITS_PER_LONG;
> +             } else
> +                     old = bitmap;
> +             spin_unlock(&ms->lock);
> +             kfree(old);
> +     }
> +
> +     spin_lock(&ms->lock);
> +     if (find_next_bit(ms->bitmap, end, minor) >= end) {
> +             for (; minor < end; ++minor)
> +                     __set_bit(minor, ms->bitmap);
> +             rc = 0;
> +     } else
> +             rc = -EBUSY;
> +     spin_unlock(&ms->lock);
> +
> +     return rc;
> +}
> +
> +static void
> +xlbd_release_minors(struct xlbd_major_info *mi, unsigned int minor,
> +                 unsigned int nr_minors)
> +{
> +     struct xlbd_minor_state *ms = mi->minors;
> +     unsigned int end = minor + nr_minors;
> +
> +     BUG_ON(end > ms->nr);
> +     spin_lock(&ms->lock);
> +     for (; minor < end; ++minor)
> +             __clear_bit(minor, ms->bitmap);
> +     spin_unlock(&ms->lock);
> +}
> +
> +static int
>  xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
>  {
>       request_queue_t *rq;
> @@ -285,9 +372,14 @@ xlvbd_add(blkif_sector_t capacity, int v
>       if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
>               nr_minors = 1 << mi->type->partn_shift;
>  
> +     err = xlbd_reserve_minors(mi, minor, nr_minors);
> +     if (err)
> +             goto out;
> +     err = -ENODEV;
> +
>       gd = alloc_disk(nr_minors);
>       if (gd == NULL)
> -             goto out;
> +             goto release;
>  
>       offset =  mi->index * mi->type->disks_per_major +
>                       (minor >> mi->type->partn_shift);
> @@ -326,7 +418,7 @@ xlvbd_add(blkif_sector_t capacity, int v
>  
>       if (xlvbd_init_blk_queue(gd, sector_size)) {
>               del_gendisk(gd);
> -             goto out;
> +             goto release;
>       }
>  
>       info->rq = gd->queue;
> @@ -346,6 +438,8 @@ xlvbd_add(blkif_sector_t capacity, int v
>  
>       return 0;
>  
> + release:
> +     xlbd_release_minors(mi, minor, nr_minors);
>   out:
>       if (mi)
>               xlbd_put_major_info(mi);
> @@ -356,14 +450,19 @@ xlvbd_add(blkif_sector_t capacity, int v
>  void
>  xlvbd_del(struct blkfront_info *info)
>  {
> +     unsigned int minor, nr_minors;
> +
>       if (info->mi == NULL)
>               return;
>  
>       BUG_ON(info->gd == NULL);
> +     minor = info->gd->first_minor;
> +     nr_minors = info->gd->minors;
>       del_gendisk(info->gd);
>       put_disk(info->gd);
>       info->gd = NULL;
>  
> +     xlbd_release_minors(info->mi, minor, nr_minors);
>       xlbd_put_major_info(info->mi);
>       info->mi = NULL;
>  
> 
> 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.