[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/1] drivers/block/xen-blkback: Limit blkback i/o
From: Vasiliy Tolstov <vase@xxxxxxxx> This patch provide ability to limit i/o for each domU block device. With this patch dom0 administrator can specify maximum iops for each block device. Changes apply dinamicaly and domU does not need shutdown (in case of dm-ioband). Another good thing that dom0 may not use CFQ scheduler. Afer apply this patch we can control domU disk speed by writing needed iops maximum to specific block device. via sysfs: echo 1500 > /sys/devices/xen-backend/vbd-1-51712/qos/reqrate via xenstore: xenstore write /local/domain/1/device/vbd/51712/reqrate 1500 Current xen i/o limiting solutions have following disadvantages: 1) dm-ioband It need to create another dm layer on top of block device. Lacks of ability to change weight on the fly (needs recreate layer). Its not in kernel yet. Patches need to backport/forwardport to specific kernel version. Under our heavy load, sometimes dm-ioband layer crash dom0. If we use dm-ioband on srp->lvm->raid1 setup and srp target disconnects dm-ioband may breaks data and domU fs have many errors. 2) cgroups Very good thing. But in our setup we can't use it. cgroups needs CFQ scheduler, but CFQ not apply to bio devices see device-mapper list http://goo.gl/YHiyI Our setup contains 2 storage nodes that export disks by srp. On each storage we have lvm (not clvm). Each domU have disk on lvm. Before start domain on xen node we construct raid1 from two lvm vg. In this case CFQ scheduler may be applied only to srp disk (/dev/sd*), but in this case we only limit all domU on this xen node in the same time. Signed-off-by: Vasiliy Tolstov <vase@xxxxxxxx> --- drivers/block/xen-blkback/blkback.c | 35 +++++++- drivers/block/xen-blkback/common.h | 5 ++ drivers/block/xen-blkback/xenbus.c | 152 +++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 74374fb..0672ab0 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -387,10 +387,18 @@ static void print_stats(struct xen_blkif *blkif) blkif->st_ds_req = 0; } +static void refill_iops(blkif_t *blkif) +{ + blkif->reqtime = jiffies + msecs_to_jiffies(1000); + blkif->reqcount = 0; +} + int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; + int ret = 0; + struct timeval cur_time; xen_blkif_get(blkif); @@ -411,9 +419,20 @@ int xen_blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - if (do_block_io_op(blkif)) + ret = do_block_io_op(blkif); + if (ret) blkif->waiting_reqs = 1; + if (blkif->reqrate) { + if (2 == ret && (blkif->reqtime > jiffies)) { + jiffies_to_timeval(jiffies, &cur_time); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(blkif->reqtime - jiffies); + } + if (time_after(jiffies, blkif->reqtime)) + refill_iops(blkif); + } + if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } @@ -760,6 +779,10 @@ __do_block_io_op(struct xen_blkif *blkif) rp = blk_rings->common.sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ + if (blkif->reqrate && (blkif->reqcount >= blkif->reqrate)) { + return (rc != rp) ? 2 : 0; + } + while (rc != rp) { if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) @@ -770,6 +793,13 @@ __do_block_io_op(struct xen_blkif *blkif) break; } + if (blkif->reqrate) { + if (blkif->reqcount >= blkif->reqrate) { + more_to_do = 2; + break; + } + } + pending_req = alloc_req(); if (NULL == pending_req) { blkif->st_oo_req++; @@ -792,6 +822,7 @@ __do_block_io_op(struct xen_blkif *blkif) } blk_rings->common.req_cons = ++rc; /* before make_response() */ + blkif->reqcount++; /* Apply all sanity checks to /private copy/ of request. */ barrier(); if (unlikely(req.operation == BLKIF_OP_DISCARD)) { @@ -842,6 +873,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, struct blk_plug plug; bool drain = false; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct timeval cur_time; switch (req->operation) { case BLKIF_OP_READ: @@ -992,6 +1024,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, else if (operation & WRITE) blkif->st_wr_sect += preq.nr_sects; + jiffies_to_timeval(jiffies, &cur_time); return 0; fail_flush: diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6072390..0552ce3 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -206,6 +206,11 @@ struct xen_blkif { struct rb_root persistent_gnts; unsigned int persistent_gnt_c; + /* qos information */ + unsigned long reqtime; + int reqcount; + int reqrate; + /* statistics */ unsigned long st_print; int st_rd_req; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6398072..f8afe76 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -25,6 +25,7 @@ struct backend_info { struct xenbus_device *dev; struct xen_blkif *blkif; struct xenbus_watch backend_watch; + struct xenbus_watch reqrate_watch; unsigned major; unsigned minor; char *mode; @@ -230,6 +231,79 @@ int __init xen_blkif_interface_init(void) } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) +static ssize_t +show_reqrate(struct device *_dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = -ENODEV; + struct xenbus_device *dev; + struct backend_info *be; + + if (!get_device(_dev)) + return ret; + + dev = to_xenbus_device(_dev); + be = dev_get_drvdata(&dev->dev); + + if (be != NULL) + ret = sprintf(buf, "%d\n", be->blkif->reqrate); + + put_device(_dev); + + return ret; +} + +static ssize_t +store_reqrate(struct device *_dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int value; + struct xenbus_device *dev; + struct backend_info *be; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!get_device(_dev)) + return -ENODEV; + + if (sscanf(buf, "%d", &value) != 1) + return -EINVAL; + + dev = to_xenbus_device(_dev); + be = dev_get_drvdata(&dev->dev); + + if (be != NULL) + be->blkif->reqrate = value; + + put_device(_dev); + + return size; +} +static DEVICE_ATTR(reqrate, S_IRUGO | S_IWUSR, show_reqrate, + store_reqrate); + +static ssize_t +show_reqcount(struct device *_dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = -ENODEV; + struct xenbus_device *dev; + struct backend_info *be; + + if (!get_device(_dev)) + return ret; + + dev = to_xenbus_device(_dev); + be = dev_get_drvdata(&dev->dev); + + if (be != NULL) + ret = sprintf(buf, "%d\n", be->blkif->reqcount); + + put_device(_dev); + + return ret; +} +static DEVICE_ATTR(reqcount, S_IRUGO | S_IWUSR, show_reqcount, NULL); + VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); @@ -254,6 +328,17 @@ static struct attribute_group xen_vbdstat_group = { .attrs = xen_vbdstat_attrs, }; +static struct attribute *vbdreq_attrs[] = { + &dev_attr_reqrate.attr, + &dev_attr_reqcount.attr, + NULL +}; + +static const struct attribute_group vbdreq_group = { + .name = "qos", + .attrs = vbdreq_attrs, +}; + VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); VBD_SHOW(mode, "%s\n", be->mode); @@ -273,8 +358,13 @@ static int xenvbd_sysfs_addif(struct xenbus_device *dev) if (error) goto fail3; + error = sysfs_create_group(&dev->dev.kobj, &xen_vbdreq_group); + if (error) + goto fail4; + return 0; +fail4: sysfs_remove_group(&dev->dev.kobj, &xen_vbdreq_group); fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); fail2: device_remove_file(&dev->dev, &dev_attr_mode); fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); @@ -283,6 +373,7 @@ fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); static void xenvbd_sysfs_delif(struct xenbus_device *dev) { + sysfs_remove_group(&dev->dev.kobj, &xen_vbdreq_group); sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); device_remove_file(&dev->dev, &dev_attr_mode); device_remove_file(&dev->dev, &dev_attr_physical_device); @@ -360,6 +451,12 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->backend_watch.node = NULL; } + if (be->reqrate_watch.node) { + unregister_xenbus_watch(&be->reqrate_watch); + kfree(be->reqrate_watch.node); + be->reqrate_watch.node = NULL; + } + if (be->blkif) { xen_blkif_disconnect(be->blkif); xen_vbd_free(&be->blkif->vbd); @@ -503,6 +600,7 @@ static void backend_changed(struct xenbus_watch *watch, struct xenbus_device *dev = be->dev; int cdrom = 0; char *device_type; + char name[TASK_COMM_LEN]; DPRINTK(""); @@ -542,6 +640,21 @@ static void backend_changed(struct xenbus_watch *watch, kfree(device_type); } + /* gather information about QoS policy for this device. */ + err = blkback_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blkback dev name"); + return; + } + + err = xenbus_gather(XBT_NIL, dev->otherend, + "reqrate", "%d", &be->blkif->reqrate, + NULL); + if (err) + DPRINTK("%s xenbus_gather(reqrate) error", name); + + be->blkif->reqtime = jiffies; + if (be->major == 0 && be->minor == 0) { /* Front end dir is a number, which is used as the handle. */ @@ -645,6 +758,30 @@ static void frontend_changed(struct xenbus_device *dev, /* ** Connection ** */ +static void reqrate_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct backend_info *be = container_of(watch, struct backend_info, + reqrate_watch); + int err; + char name[TASK_COMM_LEN]; + + err = blkback_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blkback dev name"); + return; + } + + err = xenbus_gather(XBT_NIL, be->dev->otherend, + "reqrate", "%d", + &be->blkif->reqrate, NULL); + if (err) { + DPRINTK("%s xenbus_gather(reqrate) error", name); + } else { + if (be->blkif->reqrate <= 0) + be->blkif->reqrate = 0; + } +} /* * Write the physical details regarding the block device to the store, and @@ -717,6 +854,21 @@ again: xenbus_dev_fatal(dev, err, "%s: switching to Connected state", dev->nodename); + if (be->reqrate_watch.node) { + unregister_xenbus_watch(&be->reqrate_watch); + kfree(be->reqrate_watch.node); + be->reqrate_watch.node = NULL; + } + + err = xenbus_watch_path2(dev, dev->otherend, "reqrate", + &be->reqrate_watch, + reqrate_changed); + if (err) { + xenbus_dev_fatal(dev, err, "%s: watching reqrate", + dev->nodename); + goto abort; + } + return; abort: xenbus_transaction_end(xbt, 1); -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |