[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 07/10] xen/blkback: pseudo support for multi hardware queues/rings



On November 4, 2015 10:02:26 PM EST, Bob Liu <bob.liu@xxxxxxxxxx> wrote:
>
>On 11/05/2015 10:30 AM, Konrad Rzeszutek Wilk wrote:
>> On Mon, Nov 02, 2015 at 12:21:43PM +0800, Bob Liu wrote:
>>> Preparatory patch for multiple hardware queues (rings). The number
>of
>>> rings is unconditionally set to 1, larger number will be enabled in
>next
>>> patch so as to make every single patch small and readable.
>> 
>> Instead of saying 'next patch' - spell out the title of the patch.
>> 
>> 
>>>
>>> Signed-off-by: Arianna Avanzini <avanzini.arianna@xxxxxxxxx>
>>> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
>>> ---
>>>  drivers/block/xen-blkback/common.h |   3 +-
>>>  drivers/block/xen-blkback/xenbus.c | 292
>+++++++++++++++++++++++--------------
>>>  2 files changed, 185 insertions(+), 110 deletions(-)
>>>
>>> diff --git a/drivers/block/xen-blkback/common.h
>b/drivers/block/xen-blkback/common.h
>>> index f0dd69a..4de1326 100644
>>> --- a/drivers/block/xen-blkback/common.h
>>> +++ b/drivers/block/xen-blkback/common.h
>>> @@ -341,7 +341,8 @@ struct xen_blkif {
>>>     struct work_struct      free_work;
>>>     unsigned int nr_ring_pages;
>>>     /* All rings for this device */
>>> -   struct xen_blkif_ring ring;
>>> +   struct xen_blkif_ring *rings;
>>> +   unsigned int nr_rings;
>>>  };
>>>  
>>>  struct seg_buf {
>>> diff --git a/drivers/block/xen-blkback/xenbus.c
>b/drivers/block/xen-blkback/xenbus.c
>>> index 7bdd5fd..ac4b458 100644
>>> --- a/drivers/block/xen-blkback/xenbus.c
>>> +++ b/drivers/block/xen-blkback/xenbus.c
>>> @@ -84,11 +84,12 @@ static int blkback_name(struct xen_blkif *blkif,
>char *buf)
>>>  
>>>  static void xen_update_blkif_status(struct xen_blkif *blkif)
>>>  {
>>> -   int err;
>>> +   int err, i;
>> 
>> unsigned int.
>>>     char name[BLKBACK_NAME_LEN];
>>> +   struct xen_blkif_ring *ring;
>>>  
>>>     /* Not ready to connect? */
>>> -   if (!blkif->ring.irq || !blkif->vbd.bdev)
>>> +   if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
>>>             return;
>>>  
>>>     /* Already connected? */
>>> @@ -113,19 +114,57 @@ static void xen_update_blkif_status(struct
>xen_blkif *blkif)
>>>     }
>>>     invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
>>>  
>>> -   blkif->ring.xenblkd = kthread_run(xen_blkif_schedule,
>&blkif->ring, "%s", name);
>>> -   if (IS_ERR(blkif->ring.xenblkd)) {
>>> -           err = PTR_ERR(blkif->ring.xenblkd);
>>> -           blkif->ring.xenblkd = NULL;
>>> -           xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
>>> -           return;
>>> +   if (blkif->nr_rings == 1) {
>>> +           blkif->rings[0].xenblkd = kthread_run(xen_blkif_schedule,
>&blkif->rings[0], "%s", name);
>>> +           if (IS_ERR(blkif->rings[0].xenblkd)) {
>>> +                   err = PTR_ERR(blkif->rings[0].xenblkd);
>>> +                   blkif->rings[0].xenblkd = NULL;
>>> +                   xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
>>> +                   return;
>>> +           }
>>> +   } else {
>>> +           for (i = 0; i < blkif->nr_rings; i++) {
>>> +                   ring = &blkif->rings[i];
>>> +                   ring->xenblkd = kthread_run(xen_blkif_schedule, ring, 
>>> "%s-%d",
>name, i);
>>> +                   if (IS_ERR(ring->xenblkd)) {
>>> +                           err = PTR_ERR(ring->xenblkd);
>>> +                           ring->xenblkd = NULL;
>>> +                           xenbus_dev_error(blkif->be->dev, err,
>>> +                                           "start %s-%d xenblkd", name, i);
>>> +                           return;
>>> +                   }
>>> +           }
>> 
>> Please collapse this together and just have one loop.
>> 
>> And just use the same name throughout ('%s-%d')
>> 
>> Also this does not take care of actually freeing the allocated
>> ring->xenblkd if one of them fails to start.
>> 
>> Hmm, looking at this function .. we seem to forget to change the
>> state if something fails.
>> 
>> As in, connect switches the state to Connected.. And if anything
>blows
>> up after the connect() we don't switch the state. We do report an
>error
>> in the XenBus, but the state is the same.
>> 
>> We should be using xenbus_dev_fatal I believe - so at least the state
>> changes to Closing (and then the frontend can switch itself to
>> Closing->Closed) - and we will call 'xen_blkif_disconnect' on Closed.
>
>> 
>
>Will update..
>
>>> +   }
>>> +}
>>> +
>>> +static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
>>> +{
>>> +   int r;
>> 
>> unsigned int i;
>> 
>>> +
>>> +   blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct
>xen_blkif_ring), GFP_KERNEL);
>>> +   if (!blkif->rings)
>>> +           return -ENOMEM;
>>> +
>>> +   for (r = 0; r < blkif->nr_rings; r++) {
>>> +           struct xen_blkif_ring *ring = &blkif->rings[r];
>>> +
>>> +           spin_lock_init(&ring->blk_ring_lock);
>>> +           init_waitqueue_head(&ring->wq);
>>> +           INIT_LIST_HEAD(&ring->pending_free);
>>> +
>>> +           spin_lock_init(&ring->pending_free_lock);
>>> +           init_waitqueue_head(&ring->pending_free_wq);
>>> +           init_waitqueue_head(&ring->shutdown_wq);
>>> +           ring->blkif = blkif;
>>> +           xen_blkif_get(blkif);
>>>     }
>>> +
>>> +   return 0;
>>>  }
>>>  
>>>  static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>>>  {
>>>     struct xen_blkif *blkif;
>>> -   struct xen_blkif_ring *ring;
>>>  
>>>     BUILD_BUG_ON(MAX_INDIRECT_PAGES >
>BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
>>>  
>>> @@ -136,27 +175,17 @@ static struct xen_blkif
>*xen_blkif_alloc(domid_t domid)
>>>     blkif->domid = domid;
>>>     atomic_set(&blkif->refcnt, 1);
>>>     init_completion(&blkif->drain_complete);
>>> -   atomic_set(&blkif->drain, 0);
>>>     INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
>>>     spin_lock_init(&blkif->free_pages_lock);
>>>     INIT_LIST_HEAD(&blkif->free_pages);
>>> -   blkif->free_pages_num = 0;
>>> -   blkif->persistent_gnts.rb_node = NULL;
>>>     INIT_LIST_HEAD(&blkif->persistent_purge_list);
>>> -   atomic_set(&blkif->persistent_gnt_in_use, 0);
>>>     INIT_WORK(&blkif->persistent_purge_work,
>xen_blkbk_unmap_purged_grants);
>>>  
>>> -   ring = &blkif->ring;
>>> -   ring->blkif = blkif;
>>> -   spin_lock_init(&ring->blk_ring_lock);
>>> -   init_waitqueue_head(&ring->wq);
>>> -   ring->st_print = jiffies;
>>> -   atomic_set(&ring->inflight, 0);
>>> -
>>> -   INIT_LIST_HEAD(&ring->pending_free);
>>> -   spin_lock_init(&ring->pending_free_lock);
>>> -   init_waitqueue_head(&ring->pending_free_wq);
>>> -   init_waitqueue_head(&ring->shutdown_wq);
>>> +   blkif->nr_rings = 1;
>>> +   if (xen_blkif_alloc_rings(blkif)) {
>>> +           kmem_cache_free(xen_blkif_cachep, blkif);
>>> +           return ERR_PTR(-ENOMEM);
>>> +   }
>>>  
>>>     return blkif;
>>>  }
>>> @@ -218,50 +247,54 @@ static int xen_blkif_map(struct xen_blkif_ring
>*ring, grant_ref_t *gref,
>>>  static int xen_blkif_disconnect(struct xen_blkif *blkif)
>>>  {
>>>     struct pending_req *req, *n;
>>> -   int i = 0, j;
>>> -   struct xen_blkif_ring *ring = &blkif->ring;
>>> +   int j, r;
>>>  
>> 
>> unsigned int i;
>>> -   if (ring->xenblkd) {
>>> -           kthread_stop(ring->xenblkd);
>>> -           wake_up(&ring->shutdown_wq);
>>> -           ring->xenblkd = NULL;
>>> -   }
>>> +   for (r = 0; r < blkif->nr_rings; r++) {
>>> +           struct xen_blkif_ring *ring = &blkif->rings[r];
>>> +           int i = 0;
>> 
>> unsigned int
>>>  
>>> -   /* The above kthread_stop() guarantees that at this point we
>>> -    * don't have any discard_io or other_io requests. So, checking
>>> -    * for inflight IO is enough.
>>> -    */
>>> -   if (atomic_read(&ring->inflight) > 0)
>>> -           return -EBUSY;
>>> +           if (ring->xenblkd) {
>>> +                   kthread_stop(ring->xenblkd);
>>> +                   wake_up(&ring->shutdown_wq);
>>> +                   ring->xenblkd = NULL;
>>> +           }
>>>  
>>> -   if (ring->irq) {
>>> -           unbind_from_irqhandler(ring->irq, ring);
>>> -           ring->irq = 0;
>>> -   }
>>> +           /* The above kthread_stop() guarantees that at this point we
>>> +            * don't have any discard_io or other_io requests. So, checking
>>> +            * for inflight IO is enough.
>>> +            */
>>> +           if (atomic_read(&ring->inflight) > 0)
>>> +                   return -EBUSY;
>>>  
>>> -   if (ring->blk_rings.common.sring) {
>>> -           xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
>>> -           ring->blk_rings.common.sring = NULL;
>>> -   }
>>> +           if (ring->irq) {
>>> +                   unbind_from_irqhandler(ring->irq, ring);
>>> +                   ring->irq = 0;
>>> +           }
>>>  
>>> -   /* Remove all persistent grants and the cache of ballooned pages.
>*/
>>> -   xen_blkbk_free_caches(ring);
>>> +           if (ring->blk_rings.common.sring) {
>>> +                   xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
>>> +                   ring->blk_rings.common.sring = NULL;
>>> +           }
>>>  
>>> -   /* Check that there is no request in use */
>>> -   list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
>>> -           list_del(&req->free_list);
>>> +           /* Remove all persistent grants and the cache of ballooned 
>>> pages.
>*/
>>> +           xen_blkbk_free_caches(ring);
>>>  
>>> -           for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
>>> -                   kfree(req->segments[j]);
>>> +           /* Check that there is no request in use */
>>> +           list_for_each_entry_safe(req, n, &ring->pending_free, free_list)
>{
>>> +                   list_del(&req->free_list);
>>>  
>>> -           for (j = 0; j < MAX_INDIRECT_PAGES; j++)
>>> -                   kfree(req->indirect_pages[j]);
>>> +                   for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
>>> +                           kfree(req->segments[j]);
>>>  
>>> -           kfree(req);
>>> -           i++;
>>> -   }
>>> +                   for (j = 0; j < MAX_INDIRECT_PAGES; j++)
>>> +                           kfree(req->indirect_pages[j]);
>>>  
>>> -   WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
>>> +                   kfree(req);
>>> +                   i++;
>>> +           }
>>> +
>>> +           WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
>>> +   }
>>>     blkif->nr_ring_pages = 0;
>>>  
>>>     return 0;
>>> @@ -281,6 +314,7 @@ static void xen_blkif_free(struct xen_blkif
>*blkif)
>>>     BUG_ON(!list_empty(&blkif->free_pages));
>>>     BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
>>>  
>>> +   kfree(blkif->rings);
>>>     kmem_cache_free(xen_blkif_cachep, blkif);
>>>  }
>>>  
>>> @@ -307,15 +341,19 @@ int __init xen_blkif_interface_init(void)
>>>             struct xenbus_device *dev = to_xenbus_device(_dev);     \
>>>             struct backend_info *be = dev_get_drvdata(&dev->dev);   \
>>>             struct xen_blkif *blkif = be->blkif;                    \
>>> -           struct xen_blkif_ring *ring = &blkif->ring;             \
>>> +           int i;                                                  \
>>> +                                                                   \
>>> +           for (i = 0; i < blkif->nr_rings; i++) {                 \
>>> +                   struct xen_blkif_ring *ring = &blkif->rings[i]; \
>>>                                                                     \
>>> -           blkif->st_oo_req = ring->st_oo_req;                     \
>>> -           blkif->st_rd_req = ring->st_rd_req;                     \
>>> -           blkif->st_wr_req = ring->st_wr_req;                     \
>>> -           blkif->st_f_req = ring->st_f_req;                       \
>>> -           blkif->st_ds_req = ring->st_ds_req;                     \
>>> -           blkif->st_rd_sect = ring->st_rd_sect;                   \
>>> -           blkif->st_wr_sect = ring->st_wr_sect;                   \
>>> +                   blkif->st_oo_req += ring->st_oo_req;            \
>>> +                   blkif->st_rd_req += ring->st_rd_req;            \
>>> +                   blkif->st_wr_req += ring->st_wr_req;            \
>>> +                   blkif->st_f_req += ring->st_f_req;              \
>>> +                   blkif->st_ds_req += ring->st_ds_req;            \
>>> +                   blkif->st_rd_sect += ring->st_rd_sect;          \
>>> +                   blkif->st_wr_sect += ring->st_wr_sect;          \
>>> +           }                                                       \
>> 
>> That needs fixing. You could alter the macro to only read the values
>> from the proper member.
>
>Do you think we still need these debug values for per-ring?
>I'm thinking of just leave them for per-device(blkif), but that
>requires extra lock to protect?

It should be per device. However I think you are OK with locking - as long as 
the sysfs is torn down before we deallocate the rings.




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.