|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 03/10] xen/blkfront: pseudo support for multi hardware queues/rings
On Mon, Nov 02, 2015 at 12:21:39PM +0800, Bob Liu wrote:
> Preparatory patch for multiple hardware queues (rings). The number of
> rings is unconditionally set to 1, larger number will be enabled in next
> patch so as to make every single patch small and readable.
s/next patch/"xen/blkfront: negotiate number of queues/rings to be used with
backend"
>
> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
> ---
> drivers/block/xen-blkfront.c | 327
> +++++++++++++++++++++++++------------------
> 1 file changed, 188 insertions(+), 139 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 2a557e4..eab78e7 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -145,6 +145,7 @@ struct blkfront_info
> int vdevice;
> blkif_vdev_t handle;
> enum blkif_state connected;
> + /* Number of pages per ring buffer */
Missing full stop, aka '.'.
> unsigned int nr_ring_pages;
> struct request_queue *rq;
> struct list_head grants;
> @@ -158,7 +159,8 @@ struct blkfront_info
> unsigned int max_indirect_segments;
> int is_ready;
> struct blk_mq_tag_set tag_set;
> - struct blkfront_ring_info rinfo;
> + struct blkfront_ring_info *rinfo;
> + unsigned int nr_rings;
> };
>
> static unsigned int nr_minors;
> @@ -190,7 +192,7 @@ static DEFINE_SPINLOCK(minor_lock);
> ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
>
> static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
> -static int blkfront_gather_backend_features(struct blkfront_info *info);
> +static void blkfront_gather_backend_features(struct blkfront_info *info);
>
> static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
> {
> @@ -443,12 +445,13 @@ static int blkif_queue_request(struct request *req,
> struct blkfront_ring_info *r
> */
> max_grefs += INDIRECT_GREFS(req->nr_phys_segments);
>
> - /* Check if we have enough grants to allocate a requests */
> - if (info->persistent_gnts_c < max_grefs) {
> + /* Check if we have enough grants to allocate a requests, we have to
> + * reserve 'max_grefs' grants because persistent grants are shared by
> all
> + * rings */
Missing full stop.
> + if (0 < max_grefs) {
<blinks> ? 0!?
max_grefs will at least be BLKIF_MAX_SEGMENTS_PER_REQUEST
so this will always be true.
In which ase why not just ..
> new_persistent_gnts = 1;
> if (gnttab_alloc_grant_references(
> - max_grefs - info->persistent_gnts_c,
> - &gref_head) < 0) {
> + max_grefs, &gref_head) < 0) {
> gnttab_request_free_callback(
> &rinfo->callback,
> blkif_restart_queue_callback,
.. move this whole code down? And get rid of 'new_persistent_gnts'
since it will always be true?
But more importantly, why do we not check for 'info->persistent_gnts_c' anymore?
> @@ -665,7 +668,7 @@ static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx,
> void *data,
> {
> struct blkfront_info *info = (struct blkfront_info *)data;
>
> - hctx->driver_data = &info->rinfo;
> + hctx->driver_data = &info->rinfo[index];
Please add an ASSERT here to check to make sure
that info->nr_rings < index.
> return 0;
> }
>
> @@ -924,8 +927,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
>
> static void xlvbd_release_gendisk(struct blkfront_info *info)
> {
> - unsigned int minor, nr_minors;
> - struct blkfront_ring_info *rinfo = &info->rinfo;
> + unsigned int minor, nr_minors, i;
>
> if (info->rq == NULL)
> return;
> @@ -933,11 +935,15 @@ static void xlvbd_release_gendisk(struct blkfront_info
> *info)
> /* No more blkif_request(). */
> blk_mq_stop_hw_queues(info->rq);
>
> - /* No more gnttab callback work. */
> - gnttab_cancel_free_callback(&rinfo->callback);
> + for (i = 0; i < info->nr_rings; i++) {
> + struct blkfront_ring_info *rinfo = &info->rinfo[i];
>
> - /* Flush gnttab callback work. Must be done with no locks held. */
> - flush_work(&rinfo->work);
> + /* No more gnttab callback work. */
> + gnttab_cancel_free_callback(&rinfo->callback);
> +
> + /* Flush gnttab callback work. Must be done with no locks held.
> */
> + flush_work(&rinfo->work);
> + }
>
> del_gendisk(info->gd);
>
> @@ -970,37 +976,11 @@ static void blkif_restart_queue(struct work_struct
> *work)
> spin_unlock_irq(&rinfo->dev_info->io_lock);
> }
>
> -static void blkif_free(struct blkfront_info *info, int suspend)
> +static void blkif_free_ring(struct blkfront_ring_info *rinfo)
> {
> struct grant *persistent_gnt;
> - struct grant *n;
> + struct blkfront_info *info = rinfo->dev_info;
> int i, j, segs;
> - struct blkfront_ring_info *rinfo = &info->rinfo;
> -
> - /* Prevent new requests being issued until we fix things up. */
> - spin_lock_irq(&info->io_lock);
> - info->connected = suspend ?
> - BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
> - /* No more blkif_request(). */
> - if (info->rq)
> - blk_mq_stop_hw_queues(info->rq);
> -
> - /* Remove all persistent grants */
> - if (!list_empty(&info->grants)) {
> - list_for_each_entry_safe(persistent_gnt, n,
> - &info->grants, node) {
> - list_del(&persistent_gnt->node);
> - if (persistent_gnt->gref != GRANT_INVALID_REF) {
> - gnttab_end_foreign_access(persistent_gnt->gref,
> - 0, 0UL);
> - info->persistent_gnts_c--;
> - }
> - if (info->feature_persistent)
> - __free_page(pfn_to_page(persistent_gnt->pfn));
> - kfree(persistent_gnt);
> - }
> - }
> - BUG_ON(info->persistent_gnts_c != 0);
>
> /*
> * Remove indirect pages, this only happens when using indirect
> @@ -1060,7 +1040,6 @@ free_shadow:
>
> /* No more gnttab callback work. */
> gnttab_cancel_free_callback(&rinfo->callback);
> - spin_unlock_irq(&info->io_lock);
>
> /* Flush gnttab callback work. Must be done with no locks held. */
> flush_work(&rinfo->work);
> @@ -1078,7 +1057,41 @@ free_shadow:
> if (rinfo->irq)
> unbind_from_irqhandler(rinfo->irq, rinfo);
> rinfo->evtchn = rinfo->irq = 0;
> +}
>
> +static void blkif_free(struct blkfront_info *info, int suspend)
> +{
> + struct grant *persistent_gnt, *n;
> + int i;
> +
> + /* Prevent new requests being issued until we fix things up. */
> + spin_lock_irq(&info->io_lock);
> + info->connected = suspend ?
> + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
> + /* No more blkif_request(). */
> + if (info->rq)
> + blk_mq_stop_hw_queues(info->rq);
> +
> + /* Remove all persistent grants */
> + if (!list_empty(&info->grants)) {
> + list_for_each_entry_safe(persistent_gnt, n,
> + &info->grants, node) {
> + list_del(&persistent_gnt->node);
> + if (persistent_gnt->gref != GRANT_INVALID_REF) {
> + gnttab_end_foreign_access(persistent_gnt->gref,
> + 0, 0UL);
> + info->persistent_gnts_c--;
> + }
> + if (info->feature_persistent)
> + __free_page(pfn_to_page(persistent_gnt->pfn));
> + kfree(persistent_gnt);
> + }
> + }
> + BUG_ON(info->persistent_gnts_c != 0);
> +
> + for (i = 0; i < info->nr_rings; i++)
> + blkif_free_ring(&info->rinfo[i]);
Shouldn't you also kfree info->rinfo here and reset nr_rings to zero?
And make info->rinfo[i]= NULL;
Otherwise I can see:
blkback_changed
\- talk_to_blkback (and failing)
| \->blkif_free
| \-> blkif_free_ring
|
\-- > kfree(info)
And memory leak of info->rinfo[0..nr_rings]
> + spin_unlock_irq(&info->io_lock);
> }
>
> static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info
> *rinfo,
> @@ -1334,7 +1347,7 @@ static int talk_to_blkback(struct xenbus_device *dev,
> int err, i;
> unsigned int max_page_order = 0;
> unsigned int ring_page_order = 0;
> - struct blkfront_ring_info *rinfo = &info->rinfo;
> + struct blkfront_ring_info *rinfo;
>
> err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> "max-ring-page-order", "%u", &max_page_order);
> @@ -1345,10 +1358,13 @@ static int talk_to_blkback(struct xenbus_device *dev,
> info->nr_ring_pages = 1 << ring_page_order;
> }
>
> - /* Create shared ring, alloc event channel. */
> - err = setup_blkring(dev, rinfo);
> - if (err)
> - goto out;
> + for (i = 0; i < info->nr_rings; i++) {
> + rinfo = &info->rinfo[i];
> + /* Create shared ring, alloc event channel. */
> + err = setup_blkring(dev, rinfo);
> + if (err)
> + goto destroy_blkring;
> + }
>
> again:
> err = xenbus_transaction_start(&xbt);
> @@ -1357,37 +1373,43 @@ again:
> goto destroy_blkring;
> }
>
> - if (info->nr_ring_pages == 1) {
> - err = xenbus_printf(xbt, dev->nodename,
> - "ring-ref", "%u", rinfo->ring_ref[0]);
> - if (err) {
> - message = "writing ring-ref";
> - goto abort_transaction;
> - }
> - } else {
> - err = xenbus_printf(xbt, dev->nodename,
> - "ring-page-order", "%u", ring_page_order);
> - if (err) {
> - message = "writing ring-page-order";
> - goto abort_transaction;
> - }
> -
> - for (i = 0; i < info->nr_ring_pages; i++) {
> - char ring_ref_name[RINGREF_NAME_LEN];
> -
> - snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u",
> i);
> - err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
> - "%u", rinfo->ring_ref[i]);
> + if (info->nr_rings == 1) {
> + rinfo = &info->rinfo[0];
> + if (info->nr_ring_pages == 1) {
> + err = xenbus_printf(xbt, dev->nodename,
> + "ring-ref", "%u",
> rinfo->ring_ref[0]);
> if (err) {
> message = "writing ring-ref";
> goto abort_transaction;
> }
> + } else {
> + err = xenbus_printf(xbt, dev->nodename,
> + "ring-page-order", "%u",
> ring_page_order);
> + if (err) {
> + message = "writing ring-page-order";
> + goto abort_transaction;
> + }
> +
> + for (i = 0; i < info->nr_ring_pages; i++) {
> + char ring_ref_name[RINGREF_NAME_LEN];
> +
> + snprintf(ring_ref_name, RINGREF_NAME_LEN,
> "ring-ref%u", i);
> + err = xenbus_printf(xbt, dev->nodename,
> ring_ref_name,
> + "%u", rinfo->ring_ref[i]);
> + if (err) {
> + message = "writing ring-ref";
> + goto abort_transaction;
> + }
> + }
> }
> - }
> - err = xenbus_printf(xbt, dev->nodename,
> - "event-channel", "%u", rinfo->evtchn);
> - if (err) {
> - message = "writing event-channel";
> + err = xenbus_printf(xbt, dev->nodename,
> + "event-channel", "%u", rinfo->evtchn);
> + if (err) {
> + message = "writing event-channel";
> + goto abort_transaction;
> + }
> + } else {
> + /* Not supported at this stage */
Missing full stop.
> goto abort_transaction;
> }
> err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
> @@ -1410,9 +1432,14 @@ again:
> goto destroy_blkring;
> }
>
> - for (i = 0; i < BLK_RING_SIZE(info); i++)
> - rinfo->shadow[i].req.u.rw.id = i+1;
> - rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
> + for (i = 0; i < info->nr_rings; i++) {
> + int j;
> + rinfo = &info->rinfo[i];
> +
> + for (j = 0; j < BLK_RING_SIZE(info); j++)
> + rinfo->shadow[j].req.u.rw.id = j + 1;
> + rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
> + }
> xenbus_switch_state(dev, XenbusStateInitialised);
>
> return 0;
> @@ -1423,7 +1450,7 @@ again:
> xenbus_dev_fatal(dev, err, "%s", message);
> destroy_blkring:
> blkif_free(info, 0);
> - out:
> +
> return err;
> }
>
> @@ -1436,9 +1463,8 @@ again:
> static int blkfront_probe(struct xenbus_device *dev,
> const struct xenbus_device_id *id)
> {
> - int err, vdevice;
> + int err, vdevice, r_index;
> struct blkfront_info *info;
> - struct blkfront_ring_info *rinfo;
>
> /* FIXME: Use dynamic device id if this is not set. */
> err = xenbus_scanf(XBT_NIL, dev->nodename,
> @@ -1488,10 +1514,22 @@ static int blkfront_probe(struct xenbus_device *dev,
> return -ENOMEM;
> }
>
> - rinfo = &info->rinfo;
> - INIT_LIST_HEAD(&rinfo->indirect_pages);
> - rinfo->dev_info = info;
> - INIT_WORK(&rinfo->work, blkif_restart_queue);
> + info->nr_rings = 1;
> + info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) *
> info->nr_rings, GFP_KERNEL);
> + if (!info->rinfo) {
> + xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info
> structure");
> + kfree(info);
> + return -ENOMEM;
> + }
> +
> + for (r_index = 0; r_index < info->nr_rings; r_index++) {
> + struct blkfront_ring_info *rinfo;
> +
> + rinfo = &info->rinfo[r_index];
> + INIT_LIST_HEAD(&rinfo->indirect_pages);
> + rinfo->dev_info = info;
> + INIT_WORK(&rinfo->work, blkif_restart_queue);
> + }
>
> mutex_init(&info->mutex);
> spin_lock_init(&info->io_lock);
> @@ -1523,7 +1561,7 @@ static void split_bio_end(struct bio *bio)
>
> static int blkif_recover(struct blkfront_info *info)
> {
> - int i;
> + int i, r_index;
> struct request *req, *n;
> struct blk_shadow *copy;
> int rc;
> @@ -1533,57 +1571,62 @@ static int blkif_recover(struct blkfront_info *info)
> int pending, size;
> struct split_bio *split_bio;
> struct list_head requests;
> - struct blkfront_ring_info *rinfo = &info->rinfo;
> -
> - /* Stage 1: Make a safe copy of the shadow state. */
> - copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
> - GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
> - if (!copy)
> - return -ENOMEM;
> -
> - /* Stage 2: Set up free list. */
> - memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
> - for (i = 0; i < BLK_RING_SIZE(info); i++)
> - rinfo->shadow[i].req.u.rw.id = i+1;
> - rinfo->shadow_free = rinfo->ring.req_prod_pvt;
> - rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
> -
> - rc = blkfront_gather_backend_features(info);
> - if (rc) {
> - kfree(copy);
> - return rc;
> - }
>
> + blkfront_gather_backend_features(info);
> segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
> blk_queue_max_segments(info->rq, segs);
> bio_list_init(&bio_list);
> INIT_LIST_HEAD(&requests);
> - for (i = 0; i < BLK_RING_SIZE(info); i++) {
> - /* Not in use? */
> - if (!copy[i].request)
> - continue;
>
> - /*
> - * Get the bios in the request so we can re-queue them.
> - */
> - if (copy[i].request->cmd_flags &
> - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
> + for (r_index = 0; r_index < info->nr_rings; r_index++) {
This could have been moved to a seperate function. Then you only need
to call the function within the loop.
It is up to you however.
> + struct blkfront_ring_info *rinfo;
> +
> + rinfo = &info->rinfo[r_index];
> + /* Stage 1: Make a safe copy of the shadow state. */
> + copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
> + GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
> + if (!copy)
> + return -ENOMEM;
> +
> + /* Stage 2: Set up free list. */
> + memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
> + for (i = 0; i < BLK_RING_SIZE(info); i++)
> + rinfo->shadow[i].req.u.rw.id = i+1;
> + rinfo->shadow_free = rinfo->ring.req_prod_pvt;
> + rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
> +
> + rc = blkfront_setup_indirect(rinfo);
> + if (rc) {
> + kfree(copy);
> + return rc;
> + }
> +
> + for (i = 0; i < BLK_RING_SIZE(info); i++) {
> + /* Not in use? */
> + if (!copy[i].request)
> + continue;
> +
> /*
> - * Flush operations don't contain bios, so
> - * we need to requeue the whole request
> + * Get the bios in the request so we can re-queue them.
> */
> - list_add(©[i].request->queuelist, &requests);
> - continue;
> + if (copy[i].request->cmd_flags &
> + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
> + /*
> + * Flush operations don't contain bios, so
> + * we need to requeue the whole request
> + */
> + list_add(©[i].request->queuelist,
> &requests);
> + continue;
> + }
> + merge_bio.head = copy[i].request->bio;
> + merge_bio.tail = copy[i].request->biotail;
> + bio_list_merge(&bio_list, &merge_bio);
> + copy[i].request->bio = NULL;
> + blk_end_request_all(copy[i].request, 0);
> }
> - merge_bio.head = copy[i].request->bio;
> - merge_bio.tail = copy[i].request->biotail;
> - bio_list_merge(&bio_list, &merge_bio);
> - copy[i].request->bio = NULL;
> - blk_end_request_all(copy[i].request, 0);
> - }
> -
> - kfree(copy);
>
> + kfree(copy);
> + }
> xenbus_switch_state(info->xbdev, XenbusStateConnected);
>
> spin_lock_irq(&info->io_lock);
> @@ -1591,8 +1634,13 @@ static int blkif_recover(struct blkfront_info *info)
> /* Now safe for us to use the shared ring */
> info->connected = BLKIF_STATE_CONNECTED;
>
> - /* Kick any other new requests queued since we resumed */
> - kick_pending_request_queues(rinfo);
> + for (r_index = 0; r_index < info->nr_rings; r_index++) {
> + struct blkfront_ring_info *rinfo;
> +
> + rinfo = &info->rinfo[r_index];
> + /* Kick any other new requests queued since we resumed */
> + kick_pending_request_queues(rinfo);
> + }
>
> list_for_each_entry_safe(req, n, &requests, queuelist) {
> /* Requeue pending requests (flush or discard) */
> @@ -1801,7 +1849,7 @@ out_of_memory:
> /*
> * Gather all backend feature-*
> */
> -static int blkfront_gather_backend_features(struct blkfront_info *info)
> +static void blkfront_gather_backend_features(struct blkfront_info *info)
> {
> int err;
> int barrier, flush, discard, persistent;
> @@ -1856,8 +1904,6 @@ static int blkfront_gather_backend_features(struct
> blkfront_info *info)
> else
> info->max_indirect_segments = min(indirect_segments,
> xen_blkif_max_segments);
> -
> - return blkfront_setup_indirect(&info->rinfo);
Aaah nice. That was always bothering me in the code that is suppose
to just gather data.
> }
>
> /*
> @@ -1870,8 +1916,7 @@ static void blkfront_connect(struct blkfront_info *info)
> unsigned long sector_size;
> unsigned int physical_sector_size;
> unsigned int binfo;
> - int err;
> - struct blkfront_ring_info *rinfo = &info->rinfo;
> + int err, i;
>
> switch (info->connected) {
> case BLKIF_STATE_CONNECTED:
> @@ -1928,11 +1973,14 @@ static void blkfront_connect(struct blkfront_info
> *info)
> if (err != 1)
> physical_sector_size = sector_size;
>
> - err = blkfront_gather_backend_features(info);
> - if (err) {
> - xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
> - info->xbdev->otherend);
> - return;
> + blkfront_gather_backend_features(info);
> + for (i = 0; i < info->nr_rings; i++) {
> + err = blkfront_setup_indirect(&info->rinfo[i]);
> + if (err) {
> + xenbus_dev_fatal(info->xbdev, err, "setup_indirect at
> %s",
> + info->xbdev->otherend);
> + blkif_free(info, 0);
Missing 'break.'
And you may want to set nr->rings = i temporarily so that the blkif_free_ring
does not needlessly iterate over all the rings (even thouse that
hadn't been initialized)... But it does not matter that much as blkif_free_ring
should be able to handle an unitialized ring.
And naturally you should also kfree the info->rinfo[].. but if you
add that in blkif_free then that will be taken care of.
> + }
> }
>
> err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
> @@ -1948,7 +1996,8 @@ static void blkfront_connect(struct blkfront_info *info)
> /* Kick pending requests. */
> spin_lock_irq(&info->io_lock);
> info->connected = BLKIF_STATE_CONNECTED;
> - kick_pending_request_queues(rinfo);
> + for (i = 0; i < info->nr_rings; i++)
> + kick_pending_request_queues(&info->rinfo[i]);
> spin_unlock_irq(&info->io_lock);
>
> add_disk(info->gd);
> @@ -2111,7 +2160,7 @@ static void blkif_release(struct gendisk *disk, fmode_t
> mode)
> dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
> xlvbd_release_gendisk(info);
> xenbus_frontend_closed(info->xbdev);
> - }
> + }
???
>
> mutex_unlock(&info->mutex);
>
> --
> 1.8.3.1
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |