Xen project Mailing List

Re: [Xen-devel] [PATCH v4 03/10] xen/blkfront: pseudo support for multi hardware queues/rings

On Mon, Nov 02, 2015 at 12:21:39PM +0800, Bob Liu wrote: > Preparatory patch for multiple hardware queues (rings). The number of > rings is unconditionally set to 1, larger number will be enabled in next > patch so as to make every single patch small and readable. s/next patch/"xen/blkfront: negotiate number of queues/rings to be used with backend" > > Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> > --- > drivers/block/xen-blkfront.c | 327 > +++++++++++++++++++++++++------------------ > 1 file changed, 188 insertions(+), 139 deletions(-) > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 2a557e4..eab78e7 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -145,6 +145,7 @@ struct blkfront_info > int vdevice; > blkif_vdev_t handle; > enum blkif_state connected; > + /* Number of pages per ring buffer */ Missing full stop, aka '.'. > unsigned int nr_ring_pages; > struct request_queue *rq; > struct list_head grants; > @@ -158,7 +159,8 @@ struct blkfront_info > unsigned int max_indirect_segments; > int is_ready; > struct blk_mq_tag_set tag_set; > - struct blkfront_ring_info rinfo; > + struct blkfront_ring_info *rinfo; > + unsigned int nr_rings; > }; > > static unsigned int nr_minors; > @@ -190,7 +192,7 @@ static DEFINE_SPINLOCK(minor_lock); > ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) > > static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); > -static int blkfront_gather_backend_features(struct blkfront_info *info); > +static void blkfront_gather_backend_features(struct blkfront_info *info); > > static int get_id_from_freelist(struct blkfront_ring_info *rinfo) > { > @@ -443,12 +445,13 @@ static int blkif_queue_request(struct request *req, > struct blkfront_ring_info *r > */ > max_grefs += INDIRECT_GREFS(req->nr_phys_segments); > > - /* Check if we have enough grants to allocate a requests */ > - if (info->persistent_gnts_c < max_grefs) { > + /* Check if we have enough grants to allocate a requests, we have to > + * reserve 'max_grefs' grants because persistent grants are shared by > all > + * rings */ Missing full stop. > + if (0 < max_grefs) { <blinks> ? 0!? max_grefs will at least be BLKIF_MAX_SEGMENTS_PER_REQUEST so this will always be true. In which ase why not just .. > new_persistent_gnts = 1; > if (gnttab_alloc_grant_references( > - max_grefs - info->persistent_gnts_c, > - &gref_head) < 0) { > + max_grefs, &gref_head) < 0) { > gnttab_request_free_callback( > &rinfo->callback, > blkif_restart_queue_callback, .. move this whole code down? And get rid of 'new_persistent_gnts' since it will always be true? But more importantly, why do we not check for 'info->persistent_gnts_c' anymore? > @@ -665,7 +668,7 @@ static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, > void *data, > { > struct blkfront_info *info = (struct blkfront_info *)data; > > - hctx->driver_data = &info->rinfo; > + hctx->driver_data = &info->rinfo[index]; Please add an ASSERT here to check to make sure that info->nr_rings < index. > return 0; > } > > @@ -924,8 +927,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > > static void xlvbd_release_gendisk(struct blkfront_info *info) > { > - unsigned int minor, nr_minors; > - struct blkfront_ring_info *rinfo = &info->rinfo; > + unsigned int minor, nr_minors, i; > > if (info->rq == NULL) > return; > @@ -933,11 +935,15 @@ static void xlvbd_release_gendisk(struct blkfront_info > *info) > /* No more blkif_request(). */ > blk_mq_stop_hw_queues(info->rq); > > - /* No more gnttab callback work. */ > - gnttab_cancel_free_callback(&rinfo->callback); > + for (i = 0; i < info->nr_rings; i++) { > + struct blkfront_ring_info *rinfo = &info->rinfo[i]; > > - /* Flush gnttab callback work. Must be done with no locks held. */ > - flush_work(&rinfo->work); > + /* No more gnttab callback work. */ > + gnttab_cancel_free_callback(&rinfo->callback); > + > + /* Flush gnttab callback work. Must be done with no locks held. > */ > + flush_work(&rinfo->work); > + } > > del_gendisk(info->gd); > > @@ -970,37 +976,11 @@ static void blkif_restart_queue(struct work_struct > *work) > spin_unlock_irq(&rinfo->dev_info->io_lock); > } > > -static void blkif_free(struct blkfront_info *info, int suspend) > +static void blkif_free_ring(struct blkfront_ring_info *rinfo) > { > struct grant *persistent_gnt; > - struct grant *n; > + struct blkfront_info *info = rinfo->dev_info; > int i, j, segs; > - struct blkfront_ring_info *rinfo = &info->rinfo; > - > - /* Prevent new requests being issued until we fix things up. */ > - spin_lock_irq(&info->io_lock); > - info->connected = suspend ? > - BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; > - /* No more blkif_request(). */ > - if (info->rq) > - blk_mq_stop_hw_queues(info->rq); > - > - /* Remove all persistent grants */ > - if (!list_empty(&info->grants)) { > - list_for_each_entry_safe(persistent_gnt, n, > - &info->grants, node) { > - list_del(&persistent_gnt->node); > - if (persistent_gnt->gref != GRANT_INVALID_REF) { > - gnttab_end_foreign_access(persistent_gnt->gref, > - 0, 0UL); > - info->persistent_gnts_c--; > - } > - if (info->feature_persistent) > - __free_page(pfn_to_page(persistent_gnt->pfn)); > - kfree(persistent_gnt); > - } > - } > - BUG_ON(info->persistent_gnts_c != 0); > > /* > * Remove indirect pages, this only happens when using indirect > @@ -1060,7 +1040,6 @@ free_shadow: > > /* No more gnttab callback work. */ > gnttab_cancel_free_callback(&rinfo->callback); > - spin_unlock_irq(&info->io_lock); > > /* Flush gnttab callback work. Must be done with no locks held. */ > flush_work(&rinfo->work); > @@ -1078,7 +1057,41 @@ free_shadow: > if (rinfo->irq) > unbind_from_irqhandler(rinfo->irq, rinfo); > rinfo->evtchn = rinfo->irq = 0; > +} > > +static void blkif_free(struct blkfront_info *info, int suspend) > +{ > + struct grant *persistent_gnt, *n; > + int i; > + > + /* Prevent new requests being issued until we fix things up. */ > + spin_lock_irq(&info->io_lock); > + info->connected = suspend ? > + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; > + /* No more blkif_request(). */ > + if (info->rq) > + blk_mq_stop_hw_queues(info->rq); > + > + /* Remove all persistent grants */ > + if (!list_empty(&info->grants)) { > + list_for_each_entry_safe(persistent_gnt, n, > + &info->grants, node) { > + list_del(&persistent_gnt->node); > + if (persistent_gnt->gref != GRANT_INVALID_REF) { > + gnttab_end_foreign_access(persistent_gnt->gref, > + 0, 0UL); > + info->persistent_gnts_c--; > + } > + if (info->feature_persistent) > + __free_page(pfn_to_page(persistent_gnt->pfn)); > + kfree(persistent_gnt); > + } > + } > + BUG_ON(info->persistent_gnts_c != 0); > + > + for (i = 0; i < info->nr_rings; i++) > + blkif_free_ring(&info->rinfo[i]); Shouldn't you also kfree info->rinfo here and reset nr_rings to zero? And make info->rinfo[i]= NULL; Otherwise I can see: blkback_changed \- talk_to_blkback (and failing) | \->blkif_free | \-> blkif_free_ring | \-- > kfree(info) And memory leak of info->rinfo[0..nr_rings] > + spin_unlock_irq(&info->io_lock); > } > > static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info > *rinfo, > @@ -1334,7 +1347,7 @@ static int talk_to_blkback(struct xenbus_device *dev, > int err, i; > unsigned int max_page_order = 0; > unsigned int ring_page_order = 0; > - struct blkfront_ring_info *rinfo = &info->rinfo; > + struct blkfront_ring_info *rinfo; > > err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > "max-ring-page-order", "%u", &max_page_order); > @@ -1345,10 +1358,13 @@ static int talk_to_blkback(struct xenbus_device *dev, > info->nr_ring_pages = 1 << ring_page_order; > } > > - /* Create shared ring, alloc event channel. */ > - err = setup_blkring(dev, rinfo); > - if (err) > - goto out; > + for (i = 0; i < info->nr_rings; i++) { > + rinfo = &info->rinfo[i]; > + /* Create shared ring, alloc event channel. */ > + err = setup_blkring(dev, rinfo); > + if (err) > + goto destroy_blkring; > + } > > again: > err = xenbus_transaction_start(&xbt); > @@ -1357,37 +1373,43 @@ again: > goto destroy_blkring; > } > > - if (info->nr_ring_pages == 1) { > - err = xenbus_printf(xbt, dev->nodename, > - "ring-ref", "%u", rinfo->ring_ref[0]); > - if (err) { > - message = "writing ring-ref"; > - goto abort_transaction; > - } > - } else { > - err = xenbus_printf(xbt, dev->nodename, > - "ring-page-order", "%u", ring_page_order); > - if (err) { > - message = "writing ring-page-order"; > - goto abort_transaction; > - } > - > - for (i = 0; i < info->nr_ring_pages; i++) { > - char ring_ref_name[RINGREF_NAME_LEN]; > - > - snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", > i); > - err = xenbus_printf(xbt, dev->nodename, ring_ref_name, > - "%u", rinfo->ring_ref[i]); > + if (info->nr_rings == 1) { > + rinfo = &info->rinfo[0]; > + if (info->nr_ring_pages == 1) { > + err = xenbus_printf(xbt, dev->nodename, > + "ring-ref", "%u", > rinfo->ring_ref[0]); > if (err) { > message = "writing ring-ref"; > goto abort_transaction; > } > + } else { > + err = xenbus_printf(xbt, dev->nodename, > + "ring-page-order", "%u", > ring_page_order); > + if (err) { > + message = "writing ring-page-order"; > + goto abort_transaction; > + } > + > + for (i = 0; i < info->nr_ring_pages; i++) { > + char ring_ref_name[RINGREF_NAME_LEN]; > + > + snprintf(ring_ref_name, RINGREF_NAME_LEN, > "ring-ref%u", i); > + err = xenbus_printf(xbt, dev->nodename, > ring_ref_name, > + "%u", rinfo->ring_ref[i]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } > } > - } > - err = xenbus_printf(xbt, dev->nodename, > - "event-channel", "%u", rinfo->evtchn); > - if (err) { > - message = "writing event-channel"; > + err = xenbus_printf(xbt, dev->nodename, > + "event-channel", "%u", rinfo->evtchn); > + if (err) { > + message = "writing event-channel"; > + goto abort_transaction; > + } > + } else { > + /* Not supported at this stage */ Missing full stop. > goto abort_transaction; > } > err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", > @@ -1410,9 +1432,14 @@ again: > goto destroy_blkring; > } > > - for (i = 0; i < BLK_RING_SIZE(info); i++) > - rinfo->shadow[i].req.u.rw.id = i+1; > - rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; > + for (i = 0; i < info->nr_rings; i++) { > + int j; > + rinfo = &info->rinfo[i]; > + > + for (j = 0; j < BLK_RING_SIZE(info); j++) > + rinfo->shadow[j].req.u.rw.id = j + 1; > + rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; > + } > xenbus_switch_state(dev, XenbusStateInitialised); > > return 0; > @@ -1423,7 +1450,7 @@ again: > xenbus_dev_fatal(dev, err, "%s", message); > destroy_blkring: > blkif_free(info, 0); > - out: > + > return err; > } > > @@ -1436,9 +1463,8 @@ again: > static int blkfront_probe(struct xenbus_device *dev, > const struct xenbus_device_id *id) > { > - int err, vdevice; > + int err, vdevice, r_index; > struct blkfront_info *info; > - struct blkfront_ring_info *rinfo; > > /* FIXME: Use dynamic device id if this is not set. */ > err = xenbus_scanf(XBT_NIL, dev->nodename, > @@ -1488,10 +1514,22 @@ static int blkfront_probe(struct xenbus_device *dev, > return -ENOMEM; > } > > - rinfo = &info->rinfo; > - INIT_LIST_HEAD(&rinfo->indirect_pages); > - rinfo->dev_info = info; > - INIT_WORK(&rinfo->work, blkif_restart_queue); > + info->nr_rings = 1; > + info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * > info->nr_rings, GFP_KERNEL); > + if (!info->rinfo) { > + xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info > structure"); > + kfree(info); > + return -ENOMEM; > + } > + > + for (r_index = 0; r_index < info->nr_rings; r_index++) { > + struct blkfront_ring_info *rinfo; > + > + rinfo = &info->rinfo[r_index]; > + INIT_LIST_HEAD(&rinfo->indirect_pages); > + rinfo->dev_info = info; > + INIT_WORK(&rinfo->work, blkif_restart_queue); > + } > > mutex_init(&info->mutex); > spin_lock_init(&info->io_lock); > @@ -1523,7 +1561,7 @@ static void split_bio_end(struct bio *bio) > > static int blkif_recover(struct blkfront_info *info) > { > - int i; > + int i, r_index; > struct request *req, *n; > struct blk_shadow *copy; > int rc; > @@ -1533,57 +1571,62 @@ static int blkif_recover(struct blkfront_info *info) > int pending, size; > struct split_bio *split_bio; > struct list_head requests; > - struct blkfront_ring_info *rinfo = &info->rinfo; > - > - /* Stage 1: Make a safe copy of the shadow state. */ > - copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), > - GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); > - if (!copy) > - return -ENOMEM; > - > - /* Stage 2: Set up free list. */ > - memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); > - for (i = 0; i < BLK_RING_SIZE(info); i++) > - rinfo->shadow[i].req.u.rw.id = i+1; > - rinfo->shadow_free = rinfo->ring.req_prod_pvt; > - rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; > - > - rc = blkfront_gather_backend_features(info); > - if (rc) { > - kfree(copy); > - return rc; > - } > > + blkfront_gather_backend_features(info); > segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; > blk_queue_max_segments(info->rq, segs); > bio_list_init(&bio_list); > INIT_LIST_HEAD(&requests); > - for (i = 0; i < BLK_RING_SIZE(info); i++) { > - /* Not in use? */ > - if (!copy[i].request) > - continue; > > - /* > - * Get the bios in the request so we can re-queue them. > - */ > - if (copy[i].request->cmd_flags & > - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { > + for (r_index = 0; r_index < info->nr_rings; r_index++) { This could have been moved to a seperate function. Then you only need to call the function within the loop. It is up to you however. > + struct blkfront_ring_info *rinfo; > + > + rinfo = &info->rinfo[r_index]; > + /* Stage 1: Make a safe copy of the shadow state. */ > + copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), > + GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); > + if (!copy) > + return -ENOMEM; > + > + /* Stage 2: Set up free list. */ > + memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); > + for (i = 0; i < BLK_RING_SIZE(info); i++) > + rinfo->shadow[i].req.u.rw.id = i+1; > + rinfo->shadow_free = rinfo->ring.req_prod_pvt; > + rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; > + > + rc = blkfront_setup_indirect(rinfo); > + if (rc) { > + kfree(copy); > + return rc; > + } > + > + for (i = 0; i < BLK_RING_SIZE(info); i++) { > + /* Not in use? */ > + if (!copy[i].request) > + continue; > + > /* > - * Flush operations don't contain bios, so > - * we need to requeue the whole request > + * Get the bios in the request so we can re-queue them. > */ > - list_add(&copy[i].request->queuelist, &requests); > - continue; > + if (copy[i].request->cmd_flags & > + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { > + /* > + * Flush operations don't contain bios, so > + * we need to requeue the whole request > + */ > + list_add(&copy[i].request->queuelist, > &requests); > + continue; > + } > + merge_bio.head = copy[i].request->bio; > + merge_bio.tail = copy[i].request->biotail; > + bio_list_merge(&bio_list, &merge_bio); > + copy[i].request->bio = NULL; > + blk_end_request_all(copy[i].request, 0); > } > - merge_bio.head = copy[i].request->bio; > - merge_bio.tail = copy[i].request->biotail; > - bio_list_merge(&bio_list, &merge_bio); > - copy[i].request->bio = NULL; > - blk_end_request_all(copy[i].request, 0); > - } > - > - kfree(copy); > > + kfree(copy); > + } > xenbus_switch_state(info->xbdev, XenbusStateConnected); > > spin_lock_irq(&info->io_lock); > @@ -1591,8 +1634,13 @@ static int blkif_recover(struct blkfront_info *info) > /* Now safe for us to use the shared ring */ > info->connected = BLKIF_STATE_CONNECTED; > > - /* Kick any other new requests queued since we resumed */ > - kick_pending_request_queues(rinfo); > + for (r_index = 0; r_index < info->nr_rings; r_index++) { > + struct blkfront_ring_info *rinfo; > + > + rinfo = &info->rinfo[r_index]; > + /* Kick any other new requests queued since we resumed */ > + kick_pending_request_queues(rinfo); > + } > > list_for_each_entry_safe(req, n, &requests, queuelist) { > /* Requeue pending requests (flush or discard) */ > @@ -1801,7 +1849,7 @@ out_of_memory: > /* > * Gather all backend feature-* > */ > -static int blkfront_gather_backend_features(struct blkfront_info *info) > +static void blkfront_gather_backend_features(struct blkfront_info *info) > { > int err; > int barrier, flush, discard, persistent; > @@ -1856,8 +1904,6 @@ static int blkfront_gather_backend_features(struct > blkfront_info *info) > else > info->max_indirect_segments = min(indirect_segments, > xen_blkif_max_segments); > - > - return blkfront_setup_indirect(&info->rinfo); Aaah nice. That was always bothering me in the code that is suppose to just gather data. > } > > /* > @@ -1870,8 +1916,7 @@ static void blkfront_connect(struct blkfront_info *info) > unsigned long sector_size; > unsigned int physical_sector_size; > unsigned int binfo; > - int err; > - struct blkfront_ring_info *rinfo = &info->rinfo; > + int err, i; > > switch (info->connected) { > case BLKIF_STATE_CONNECTED: > @@ -1928,11 +1973,14 @@ static void blkfront_connect(struct blkfront_info > *info) > if (err != 1) > physical_sector_size = sector_size; > > - err = blkfront_gather_backend_features(info); > - if (err) { > - xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", > - info->xbdev->otherend); > - return; > + blkfront_gather_backend_features(info); > + for (i = 0; i < info->nr_rings; i++) { > + err = blkfront_setup_indirect(&info->rinfo[i]); > + if (err) { > + xenbus_dev_fatal(info->xbdev, err, "setup_indirect at > %s", > + info->xbdev->otherend); > + blkif_free(info, 0); Missing 'break.' And you may want to set nr->rings = i temporarily so that the blkif_free_ring does not needlessly iterate over all the rings (even thouse that hadn't been initialized)... But it does not matter that much as blkif_free_ring should be able to handle an unitialized ring. And naturally you should also kfree the info->rinfo[].. but if you add that in blkif_free then that will be taken care of. > + } > } > > err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > @@ -1948,7 +1996,8 @@ static void blkfront_connect(struct blkfront_info *info) > /* Kick pending requests. */ > spin_lock_irq(&info->io_lock); > info->connected = BLKIF_STATE_CONNECTED; > - kick_pending_request_queues(rinfo); > + for (i = 0; i < info->nr_rings; i++) > + kick_pending_request_queues(&info->rinfo[i]); > spin_unlock_irq(&info->io_lock); > > add_disk(info->gd); > @@ -2111,7 +2160,7 @@ static void blkif_release(struct gendisk *disk, fmode_t > mode) > dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); > xlvbd_release_gendisk(info); > xenbus_frontend_closed(info->xbdev); > - } > + } ??? > > mutex_unlock(&info->mutex); > > -- > 1.8.3.1 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.