[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3] xen-blkback: defer freeing blkif to avoid blocking xenwatch



I'm try to combine this patch and previous, but after 5-7 days of
using i have another not disconnectable devices.
My patch for 3.10.x https://gist.github.com/raw/3d7009a18dc920ba64fe

2014-05-21 0:28 GMT+04:00 Valentin Priescu <vali.priescu@xxxxxxxxx>:
> From: Valentin Priescu <priescuv@xxxxxxxxxx>
>
> Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O
> requests to finish. If the VBD is attached to a hot-swappable disk, then
> xenwatch can hang for a long period of time, stalling other watches.
>
>  INFO: task xenwatch:39 blocked for more than 120 seconds.
>  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>  ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782
>  ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040
>  ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040
>  Call Trace:
>  [<ffffffff810b0782>] ? irq_to_desc+0x12/0x20
>  [<ffffffff8128f761>] ? list_del+0x11/0x40
>  [<ffffffff8147a080>] ? wait_for_common+0x60/0x160
>  [<ffffffff8147bcef>] ? _raw_spin_lock_irqsave+0x2f/0x50
>  [<ffffffff8147bd49>] ? _raw_spin_unlock_irqrestore+0x19/0x20
>  [<ffffffff8147a26a>] schedule+0x3a/0x60
>  [<ffffffffa018fe6a>] xen_blkif_disconnect+0x8a/0x100 [xen_blkback]
>  [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
>  [<ffffffffa018ffce>] xen_blkbk_remove+0xae/0x1e0 [xen_blkback]
>  [<ffffffff8130b254>] xenbus_dev_remove+0x44/0x90
>  [<ffffffff81345cb7>] __device_release_driver+0x77/0xd0
>  [<ffffffff81346488>] device_release_driver+0x28/0x40
>  [<ffffffff813456e8>] bus_remove_device+0x78/0xe0
>  [<ffffffff81342c9f>] device_del+0x12f/0x1a0
>  [<ffffffff81342d2d>] device_unregister+0x1d/0x60
>  [<ffffffffa0190826>] frontend_changed+0xa6/0x4d0 [xen_blkback]
>  [<ffffffffa019c252>] ? frontend_changed+0x192/0x650 [xen_netback]
>  [<ffffffff8130ae50>] ? cmp_dev+0x60/0x60
>  [<ffffffff81344fe4>] ? bus_for_each_dev+0x94/0xa0
>  [<ffffffff8130b06e>] xenbus_otherend_changed+0xbe/0x120
>  [<ffffffff8130b4cb>] frontend_changed+0xb/0x10
>  [<ffffffff81309c82>] xenwatch_thread+0xf2/0x130
>  [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
>  [<ffffffff81309b90>] ? xenbus_directory+0x80/0x80
>  [<ffffffff810799d6>] kthread+0x96/0xa0
>  [<ffffffff81485934>] kernel_thread_helper+0x4/0x10
>  [<ffffffff814839f3>] ? int_ret_from_sys_call+0x7/0x1b
>  [<ffffffff8147c17c>] ? retint_restore_args+0x5/0x6
>  [<ffffffff81485930>] ? gs_change+0x13/0x13
>
> With this patch, when there is still pending I/O, the actual disconnect
> is done by the last reference holder (last pending I/O request). In this
> case, xenwatch doesn't block indefinitely.
>
> Signed-off-by: Valentin Priescu <priescuv@xxxxxxxxxx>
> Reviewed-by: Steven Kady <stevkady@xxxxxxxxxx>
> Reviewed-by: Steven Noonan <snoonan@xxxxxxxxxx>
> Reviewed-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
> v2: Reworked an 'if' statement in xen_blkif_disconnect(), as
>     suggested by David Vrabel <david.vrabel@xxxxxxxxxx>
>     Handle the case when xen_blkif_disconnect() returns -EBUSY on
>     frontend_changed(), as noted by David Vrabel <david.vrabel@xxxxxxxxxx>
> v3: Update reviewers list.
>
>  drivers/block/xen-blkback/common.h |  4 ++--
>  drivers/block/xen-blkback/xenbus.c | 46 
> ++++++++++++++++++++++++++++----------
>  2 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/block/xen-blkback/common.h 
> b/drivers/block/xen-blkback/common.h
> index be05277..f65b807 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -314,7 +314,7 @@ struct xen_blkif {
>         unsigned long long                      st_rd_sect;
>         unsigned long long                      st_wr_sect;
>
> -       wait_queue_head_t       waiting_to_free;
> +       struct work_struct      free_work;
>         /* Thread shutdown wait queue. */
>         wait_queue_head_t       shutdown_wq;
>  };
> @@ -361,7 +361,7 @@ struct pending_req {
>  #define xen_blkif_put(_b)                              \
>         do {                                            \
>                 if (atomic_dec_and_test(&(_b)->refcnt)) \
> -                       wake_up(&(_b)->waiting_to_free);\
> +                       schedule_work(&(_b)->free_work);\
>         } while (0)
>
>  struct phys_req {
> diff --git a/drivers/block/xen-blkback/xenbus.c 
> b/drivers/block/xen-blkback/xenbus.c
> index 9a547e6..e911d28 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -35,12 +35,26 @@ static void connect(struct backend_info *);
>  static int connect_ring(struct backend_info *);
>  static void backend_changed(struct xenbus_watch *, const char **,
>                             unsigned int);
> +static void xen_blkif_free(struct xen_blkif *blkif);
> +static void xen_vbd_free(struct xen_vbd *vbd);
>
>  struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
>  {
>         return be->dev;
>  }
>
> +/*
> + * The last request could free the device from softirq context and
> + * xen_blkif_free() can sleep.
> + */
> +static void xen_blkif_deferred_free(struct work_struct *work)
> +{
> +       struct xen_blkif *blkif;
> +
> +       blkif = container_of(work, struct xen_blkif, free_work);
> +       xen_blkif_free(blkif);
> +}
> +
>  static int blkback_name(struct xen_blkif *blkif, char *buf)
>  {
>         char *devpath, *devname;
> @@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>         init_completion(&blkif->drain_complete);
>         atomic_set(&blkif->drain, 0);
>         blkif->st_print = jiffies;
> -       init_waitqueue_head(&blkif->waiting_to_free);
>         blkif->persistent_gnts.rb_node = NULL;
>         spin_lock_init(&blkif->free_pages_lock);
>         INIT_LIST_HEAD(&blkif->free_pages);
> @@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>         INIT_WORK(&blkif->persistent_purge_work, 
> xen_blkbk_unmap_purged_grants);
>
>         INIT_LIST_HEAD(&blkif->pending_free);
> +       INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
>
>         for (i = 0; i < XEN_BLKIF_REQS; i++) {
>                 req = kzalloc(sizeof(*req), GFP_KERNEL);
> @@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, 
> unsigned long shared_page,
>         return 0;
>  }
>
> -static void xen_blkif_disconnect(struct xen_blkif *blkif)
> +static int xen_blkif_disconnect(struct xen_blkif *blkif)
>  {
>         if (blkif->xenblkd) {
>                 kthread_stop(blkif->xenblkd);
> @@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
>                 blkif->xenblkd = NULL;
>         }
>
> -       atomic_dec(&blkif->refcnt);
> -       wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
> -       atomic_inc(&blkif->refcnt);
> +       /* The above kthread_stop() guarantees that at this point we
> +        * don't have any discard_io or other_io requests. So, checking
> +        * for inflight IO is enough.
> +        */
> +       if (atomic_read(&blkif->inflight) > 0)
> +               return -EBUSY;
>
>         if (blkif->irq) {
>                 unbind_from_irqhandler(blkif->irq, blkif);
> @@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
>                 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
>                 blkif->blk_rings.common.sring = NULL;
>         }
> +
> +       return 0;
>  }
>
>  static void xen_blkif_free(struct xen_blkif *blkif)
> @@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
>         struct pending_req *req, *n;
>         int i = 0, j;
>
> -       if (!atomic_dec_and_test(&blkif->refcnt))
> -               BUG();
> +       xen_blkif_disconnect(blkif);
> +       xen_vbd_free(&blkif->vbd);
>
>         /* Remove all persistent grants and the cache of ballooned pages. */
>         xen_blkbk_free_caches(blkif);
> @@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
>                 be->backend_watch.node = NULL;
>         }
>
> +       dev_set_drvdata(&dev->dev, NULL);
> +
>         if (be->blkif) {
>                 xen_blkif_disconnect(be->blkif);
> -               xen_vbd_free(&be->blkif->vbd);
> -               xen_blkif_free(be->blkif);
> -               be->blkif = NULL;
> +               xen_blkif_put(be->blkif);
>         }
>
>         kfree(be->mode);
>         kfree(be);
> -       dev_set_drvdata(&dev->dev, NULL);
>         return 0;
>  }
>
> @@ -700,7 +718,11 @@ static void frontend_changed(struct xenbus_device *dev,
>                  * Enforce precondition before potential leak point.
>                  * xen_blkif_disconnect() is idempotent.
>                  */
> -               xen_blkif_disconnect(be->blkif);
> +               err = xen_blkif_disconnect(be->blkif);
> +               if (err) {
> +                       xenbus_dev_fatal(dev, err, "pending I/O");
> +                       break;
> +               }
>
>                 err = connect_ring(be);
>                 if (err)
> --
> 1.9.1
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel



-- 
Vasiliy Tolstov,
e-mail: v.tolstov@xxxxxxxxx
jabber: vase@xxxxxxxxx

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.