[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] VM disk I/O limit patch
Hi all, I add a blkback QoS patch. You can config(dynamic/static) different I/O speed for different VM disk by this patch. ---------------------------------------------------------------------------- diff -urNp blkback/blkback.c blkback-qos/blkback.c --- blkback/blkback.c 2011-06-22 07:54:19.000000000 +0800 +++ blkback-qos/blkback.c 2011-06-22 07:53:18.000000000 +0800 @@ -44,6 +44,11 @@ #include <asm/hypervisor.h> #include "common.h" +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + printk("blkback/blkback (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of @@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin static int do_block_io_op(blkif_t *blkif); static int dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, - pending_req_t *pending_req); + pending_req_t *pending_req, + int *done_nr_sects); static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); @@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif) blkif->st_pk_req = 0; } +static void refill_reqcount(blkif_t *blkif) +{ + blkif->reqtime = jiffies + msecs_to_jiffies(1000); + blkif->reqcount = blkif->reqrate; + if (blkif->reqcount < blkif->reqmin) + blkif->reqcount = blkif->reqmin; +} + int blkif_schedule(void *arg) { blkif_t *blkif = arg; struct vbd *vbd = &blkif->vbd; + int ret = 0; + struct timeval cur_time; blkif_get(blkif); @@ -232,12 +248,34 @@ int blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - if (do_block_io_op(blkif)) + ret = do_block_io_op(blkif); + if (ret) blkif->waiting_reqs = 1; unplug_queue(blkif); + if(blkif->reqmin){ + if(2 == ret && (blkif->reqtime > jiffies)){ + jiffies_to_timeval(jiffies, &cur_time); + if(log_stats && (cur_time.tv_sec % 10 ==1 )) + printk(KERN_DEBUG "%s: going to sleep %d millsecs(rate=%d)\n", + current->comm, + jiffies_to_msecs(blkif->reqtime - jiffies), + blkif->reqrate); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(blkif->reqtime - jiffies); + + if(log_stats && (cur_time.tv_sec % 10 ==1 )) + printk(KERN_DEBUG "%s: sleep end(rate=%d)\n", + current->comm,blkif->reqrate); + } + if (time_after(jiffies, blkif->reqtime)) + refill_reqcount(blkif); + } + if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); + } if (log_stats) @@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void * /****************************************************************** * DOWNWARD CALLS -- These interface with the block-device layer proper. */ - static int do_block_io_op(blkif_t *blkif) { blkif_back_rings_t *blk_rings = &blkif->blk_rings; @@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0, ret; + static int last_done_nr_sects = 0; rc = blk_rings->common.req_cons; rp = blk_rings->common.sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ + + if (blkif->reqmin && blkif->reqcount <= 0) + return (rc != rp) ? 2 : 0; while ((rc != rp) || (blkif->is_suspended_req)) { if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; + + if(blkif->reqmin){ + blkif->reqcount -= last_done_nr_sects; + if (blkif->reqcount <= 0) { + more_to_do = 2; + break; + } + } if (kthread_should_stop()) { more_to_do = 1; @@ -367,14 +416,14 @@ handle_request: switch (req.operation) { case BLKIF_OP_READ: blkif->st_rd_req++; - ret = dispatch_rw_block_io(blkif, &req, pending_req); + ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects); break; case BLKIF_OP_WRITE_BARRIER: blkif->st_br_req++; /* fall through */ case BLKIF_OP_WRITE: blkif->st_wr_req++; - ret = dispatch_rw_block_io(blkif, &req, pending_req); + ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects); break; case BLKIF_OP_PACKET: DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n"); @@ -412,9 +461,29 @@ handle_request: return more_to_do; } +static char* operation2str(int operation) +{ + char* ret_str = NULL; + switch (operation) { + case BLKIF_OP_READ: + ret_str = "READ"; + break; + case BLKIF_OP_WRITE: + ret_str = "WRITE"; + break; + case BLKIF_OP_WRITE_BARRIER: + ret_str = "WRITE_BARRIER"; + break; + default: + ret_str = "0"; + } + return ret_str; +} + static int dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, - pending_req_t *pending_req) + pending_req_t *pending_req, + int *done_nr_sects) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t struct bio *bio = NULL; int ret, i; int operation; + struct timeval cur_time; + + *done_nr_sects = 0; switch (req->operation) { case BLKIF_OP_READ: @@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; + *done_nr_sects = preq.nr_sects; + jiffies_to_timeval(jiffies, &cur_time); + if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 )) + printk(KERN_DEBUG " operation=%s sects=%d\n", + operation2str(req->operation),preq.nr_sects); + return 0; fail_flush: @@ -695,6 +773,8 @@ static int __init blkif_init(void) blkif_xenbus_init(); + DPRINTK("blkif_inited\n"); + return 0; out_of_memory: diff -urNp blkback/cdrom.c blkback-qos/cdrom.c --- blkback/cdrom.c 2010-05-20 18:07:00.000000000 +0800 +++ blkback-qos/cdrom.c 2011-06-22 07:34:50.000000000 +0800 @@ -35,9 +35,9 @@ #include "common.h" #undef DPRINTK -#define DPRINTK(_f, _a...) \ - printk("(%s() file=%s, line=%d) " _f "\n", \ - __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a ) +#define DPRINTK(fmt, args...) \ + printk("blkback/cdrom (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) #define MEDIA_PRESENT "media-present" diff -urNp blkback/common.h blkback-qos/common.h --- blkback/common.h 2010-05-20 18:07:00.000000000 +0800 +++ blkback-qos/common.h 2011-06-22 07:34:50.000000000 +0800 @@ -100,8 +100,17 @@ typedef struct blkif_st { grant_handle_t shmem_handle; grant_ref_t shmem_ref; + + /* qos information */ + unsigned long reqtime; + int reqcount; + int reqmin; + int reqrate; + } blkif_t; +#define VBD_QOS_MIN_RATE_LIMIT 2*1024 /* 1MBs */ + struct backend_info { struct xenbus_device *dev; @@ -111,6 +120,8 @@ struct backend_info unsigned major; unsigned minor; char *mode; + struct xenbus_watch rate_watch; + int have_rate_watch; }; blkif_t *blkif_alloc(domid_t domid); diff -urNp blkback/vbd.c blkback-qos/vbd.c --- blkback/vbd.c 2010-05-20 18:07:00.000000000 +0800 +++ blkback-qos/vbd.c 2011-06-22 07:34:50.000000000 +0800 @@ -35,6 +35,11 @@ #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + printk("blkback/vbd (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + unsigned long long vbd_size(struct vbd *vbd) { return vbd_sz(vbd); @@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + DPRINTK("Successful creation of handle=%04x (dom=%u)", handle, blkif->domid); return 0; } diff -urNp blkback/xenbus.c blkback-qos/xenbus.c --- blkback/xenbus.c 2010-05-20 18:07:00.000000000 +0800 +++ blkback-qos/xenbus.c 2011-06-22 07:34:50.000000000 +0800 @@ -25,13 +25,14 @@ #undef DPRINTK #define DPRINTK(fmt, args...) \ - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + printk("blkback/xenbus (%s:%d) " fmt ".\n", \ __FUNCTION__, __LINE__, ##args) static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +static void unregister_rate_watch(struct backend_info *be); static int blkback_name(blkif_t *blkif, char *buf) { @@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t char name[TASK_COMM_LEN]; /* Not ready to connect? */ - if (!blkif->irq || !blkif->vbd.bdev) + if (!blkif->irq || !blkif->vbd.bdev){ + DPRINTK("Not ready to connect"); return; + } /* Already connected? */ if (blkif->be->dev->state == XenbusStateConnected) @@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_ be->cdrom_watch.node = NULL; } + unregister_rate_watch(be); + if (be->blkif) { blkif_disconnect(be->blkif); vbd_free(&be->blkif->vbd); @@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d err = xenbus_watch_path2(dev, dev->nodename, "physical-device", &be->backend_watch, backend_changed); + + DPRINTK("blkback_probe called"); + DPRINTK("dev->nodename=%s/physical-device",dev->nodename); + if (err) goto fail; @@ -266,7 +275,6 @@ fail: return err; } - /** * Callback received when the hotplug scripts have placed the physical-device * node. Read it and the mode node, and create a vbd. If the frontend is @@ -283,8 +291,9 @@ static void backend_changed(struct xenbu struct xenbus_device *dev = be->dev; int cdrom = 0; char *device_type; + char name[TASK_COMM_LEN]; - DPRINTK(""); + DPRINTK("backend_changed called"); err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", &major, &minor); @@ -322,6 +331,34 @@ static void backend_changed(struct xenbu kfree(device_type); } + /* gather information about QoS policy for this device. */ + err = blkback_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blkback dev name"); + return; + } + + err = xenbus_gather(XBT_NIL, dev->otherend, + "tokens-rate", "%d", &be->blkif->reqrate, + NULL); + if(err){ + DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name); + }else{ + if(be->blkif->reqrate <= 0){ + be->blkif->reqmin = 0 ; + DPRINTK("%s tokens-rate == 0,no limit",name); + }else{ + DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate); + be->blkif->reqrate *= 2; + be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT; + if(be->blkif->reqmin > be->blkif->reqrate){ + be->blkif->reqrate = be->blkif->reqmin; + DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate); + } + } + } + be->blkif->reqtime = jiffies; + if (be->major == 0 && be->minor == 0) { /* Front end dir is a number, which is used as the handle. */ @@ -414,6 +451,49 @@ static void frontend_changed(struct xenb /* ** Connection ** */ +static void unregister_rate_watch(struct backend_info *be) +{ + if (be->have_rate_watch) { + unregister_xenbus_watch(&be->rate_watch); + kfree(be->rate_watch.node); + } + be->have_rate_watch = 0; +} + +static void rate_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + + struct backend_info *be=container_of(watch,struct backend_info, rate_watch); + int err; + char name[TASK_COMM_LEN]; + + err = blkback_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blkback dev name"); + return; + } + + err = xenbus_gather(XBT_NIL,be->dev->otherend, + "tokens-rate", "%d", + &be->blkif->reqrate,NULL); + if(err){ + DPRINTK("%s xenbus_gather(tokens-rate) error",name); + }else{ + if(be->blkif->reqrate <= 0){ + be->blkif->reqmin = 0; + DPRINTK("%s tokens-rate == 0,no limit",name); + }else{ + DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate); + be->blkif->reqrate *= 2; + be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT; + if(be->blkif->reqmin > be->blkif->reqrate){ + be->blkif->reqrate = be->blkif->reqmin; + DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate); + } + } + } +} /** * Write the physical details regarding the block device to the store, and @@ -439,6 +519,14 @@ again: if (err) goto abort; + /*add by andrew for centos pv*/ + err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1"); + if (err){ + xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", vbd_size(&be->blkif->vbd)); if (err) { @@ -469,11 +557,22 @@ again: if (err) xenbus_dev_fatal(dev, err, "ending transaction"); + DPRINTK("xenbus_switch_to XenbusStateConnected"); + err = xenbus_switch_state(dev, XenbusStateConnected); if (err) xenbus_dev_fatal(dev, err, "switching to Connected state", dev->nodename); + unregister_rate_watch(be); + err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate", + &be->rate_watch,rate_changed); + if (!err) + be->have_rate_watch = 1; + else + xenbus_dev_fatal(dev, err, "watching tokens-rate", + dev->nodename); + return; abort: xenbus_transaction_end(xbt, 1); Attachment:
blkback-qos-20110621.diff _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |