[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-users] Re: [Xen-devel] VM disk I/O limit patch
Hi, relying on DM is not advisable since this is yet another layer that breaks write barriers, plus it kills portability to non-linux OS. btw: It would be cool if a block shaping patch finally made it in the Xen mainline, since the last one that was submitted in 2009 was apparently forgotten... Regards, Florian 2011/6/21 Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>: > On Tue, Jun 21, 2011 at 04:29:35PM +0800, Andrew Xu wrote: >> Hi all, >> >> I add a blkback QoS patch. > > What tree is this against? There is a xen-blkback in 3.0-rc4, can you rebase > it against that please. > > What is the patch solving? Why can't it be done with dm-ioband? > >> You can config(dynamic/static) different I/O speed for different VM disk >> by this patch. >> >> ---------------------------------------------------------------------------- >> >> diff -urNp blkback/blkback.c blkback-qos/blkback.c >> --- blkback/blkback.c 2011-06-22 07:54:19.000000000 +0800 >> +++ blkback-qos/blkback.c 2011-06-22 07:53:18.000000000 +0800 >> @@ -44,6 +44,11 @@ >> #include <asm/hypervisor.h> >> #include "common.h" >> >> +#undef DPRINTK >> +#define DPRINTK(fmt, args...) \ >> + printk("blkback/blkback (%s:%d) " fmt ".\n", \ >> + __FUNCTION__, __LINE__, ##args) >> + >> /* >> * These are rather arbitrary. They are fairly large because adjacent >> requests >> * pulled from a communication ring are quite likely to end up being part of >> @@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin >> static int do_block_io_op(blkif_t *blkif); >> static int dispatch_rw_block_io(blkif_t *blkif, >> blkif_request_t *req, >> - pending_req_t *pending_req); >> + pending_req_t *pending_req, >> + int *done_nr_sects); >> static void make_response(blkif_t *blkif, u64 id, >> unsigned short op, int st); >> >> @@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif) >> blkif->st_pk_req = 0; >> } >> >> +static void refill_reqcount(blkif_t *blkif) >> +{ >> + blkif->reqtime = jiffies + msecs_to_jiffies(1000); >> + blkif->reqcount = blkif->reqrate; >> + if (blkif->reqcount < blkif->reqmin) >> + blkif->reqcount = blkif->reqmin; >> +} >> + >> int blkif_schedule(void *arg) >> { >> blkif_t *blkif = arg; >> struct vbd *vbd = &blkif->vbd; >> + int ret = 0; >> + struct timeval cur_time; >> >> blkif_get(blkif); >> >> @@ -232,12 +248,34 @@ int blkif_schedule(void *arg) >> blkif->waiting_reqs = 0; >> smp_mb(); /* clear flag *before* checking for work */ >> >> - if (do_block_io_op(blkif)) >> + ret = do_block_io_op(blkif); >> + if (ret) >> blkif->waiting_reqs = 1; >> unplug_queue(blkif); >> >> + if(blkif->reqmin){ >> + if(2 == ret && (blkif->reqtime > jiffies)){ >> + jiffies_to_timeval(jiffies, &cur_time); >> + if(log_stats && (cur_time.tv_sec % 10 ==1 )) >> + printk(KERN_DEBUG "%s: going to sleep >> %d millsecs(rate=%d)\n", >> + current->comm, >> + >> jiffies_to_msecs(blkif->reqtime - jiffies), >> + blkif->reqrate); >> + >> + set_current_state(TASK_INTERRUPTIBLE); >> + schedule_timeout(blkif->reqtime - jiffies); >> + >> + if(log_stats && (cur_time.tv_sec % 10 ==1 )) >> + printk(KERN_DEBUG "%s: sleep >> end(rate=%d)\n", >> + >> current->comm,blkif->reqrate); >> + } >> + if (time_after(jiffies, blkif->reqtime)) >> + refill_reqcount(blkif); >> + } >> + >> if (log_stats && time_after(jiffies, blkif->st_print)) >> print_stats(blkif); >> + >> } >> >> if (log_stats) >> @@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void * >> /****************************************************************** >> * DOWNWARD CALLS -- These interface with the block-device layer proper. >> */ >> - >> static int do_block_io_op(blkif_t *blkif) >> { >> blkif_back_rings_t *blk_rings = &blkif->blk_rings; >> @@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif >> pending_req_t *pending_req; >> RING_IDX rc, rp; >> int more_to_do = 0, ret; >> + static int last_done_nr_sects = 0; >> >> rc = blk_rings->common.req_cons; >> rp = blk_rings->common.sring->req_prod; >> rmb(); /* Ensure we see queued requests up to 'rp'. */ >> + >> + if (blkif->reqmin && blkif->reqcount <= 0) >> + return (rc != rp) ? 2 : 0; >> >> while ((rc != rp) || (blkif->is_suspended_req)) { >> >> if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) >> break; >> + >> + if(blkif->reqmin){ >> + blkif->reqcount -= last_done_nr_sects; >> + if (blkif->reqcount <= 0) { >> + more_to_do = 2; >> + break; >> + } >> + } >> >> if (kthread_should_stop()) { >> more_to_do = 1; >> @@ -367,14 +416,14 @@ handle_request: >> switch (req.operation) { >> case BLKIF_OP_READ: >> blkif->st_rd_req++; >> - ret = dispatch_rw_block_io(blkif, &req, pending_req); >> + ret = dispatch_rw_block_io(blkif, &req, >> pending_req,&last_done_nr_sects); >> break; >> case BLKIF_OP_WRITE_BARRIER: >> blkif->st_br_req++; >> /* fall through */ >> case BLKIF_OP_WRITE: >> blkif->st_wr_req++; >> - ret = dispatch_rw_block_io(blkif, &req, pending_req); >> + ret = dispatch_rw_block_io(blkif, &req, >> pending_req,&last_done_nr_sects); >> break; >> case BLKIF_OP_PACKET: >> DPRINTK("error: block operation BLKIF_OP_PACKET not >> implemented\n"); >> @@ -412,9 +461,29 @@ handle_request: >> return more_to_do; >> } >> >> +static char* operation2str(int operation) >> +{ >> + char* ret_str = NULL; >> + switch (operation) { >> + case BLKIF_OP_READ: >> + ret_str = "READ"; >> + break; >> + case BLKIF_OP_WRITE: >> + ret_str = "WRITE"; >> + break; >> + case BLKIF_OP_WRITE_BARRIER: >> + ret_str = "WRITE_BARRIER"; >> + break; >> + default: >> + ret_str = "0"; >> + } >> + return ret_str; >> +} >> + >> static int dispatch_rw_block_io(blkif_t *blkif, >> blkif_request_t *req, >> - pending_req_t *pending_req) >> + pending_req_t *pending_req, >> + int *done_nr_sects) >> { >> extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); >> struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; >> @@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t >> struct bio *bio = NULL; >> int ret, i; >> int operation; >> + struct timeval cur_time; >> + >> + *done_nr_sects = 0; >> >> switch (req->operation) { >> case BLKIF_OP_READ: >> @@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t >> else if (operation == WRITE || operation == WRITE_BARRIER) >> blkif->st_wr_sect += preq.nr_sects; >> >> + *done_nr_sects = preq.nr_sects; >> + jiffies_to_timeval(jiffies, &cur_time); >> + if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 )) >> + printk(KERN_DEBUG " operation=%s sects=%d\n", >> + operation2str(req->operation),preq.nr_sects); >> + >> return 0; >> >> fail_flush: >> @@ -695,6 +773,8 @@ static int __init blkif_init(void) >> >> blkif_xenbus_init(); >> >> + DPRINTK("blkif_inited\n"); >> + >> return 0; >> >> out_of_memory: >> diff -urNp blkback/cdrom.c blkback-qos/cdrom.c >> --- blkback/cdrom.c 2010-05-20 18:07:00.000000000 +0800 >> +++ blkback-qos/cdrom.c 2011-06-22 07:34:50.000000000 +0800 >> @@ -35,9 +35,9 @@ >> #include "common.h" >> >> #undef DPRINTK >> -#define DPRINTK(_f, _a...) \ >> - printk("(%s() file=%s, line=%d) " _f "\n", \ >> - __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a ) >> +#define DPRINTK(fmt, args...) \ >> + printk("blkback/cdrom (%s:%d) " fmt ".\n", \ >> + __FUNCTION__, __LINE__, ##args) >> >> >> #define MEDIA_PRESENT "media-present" >> diff -urNp blkback/common.h blkback-qos/common.h >> --- blkback/common.h 2010-05-20 18:07:00.000000000 +0800 >> +++ blkback-qos/common.h 2011-06-22 07:34:50.000000000 +0800 >> @@ -100,8 +100,17 @@ typedef struct blkif_st { >> >> grant_handle_t shmem_handle; >> grant_ref_t shmem_ref; >> + >> + /* qos information */ >> + unsigned long reqtime; >> + int reqcount; >> + int reqmin; >> + int reqrate; >> + >> } blkif_t; >> >> +#define VBD_QOS_MIN_RATE_LIMIT 2*1024 /* >> 1MBs */ >> + >> struct backend_info >> { >> struct xenbus_device *dev; >> @@ -111,6 +120,8 @@ struct backend_info >> unsigned major; >> unsigned minor; >> char *mode; >> + struct xenbus_watch rate_watch; >> + int have_rate_watch; >> }; >> >> blkif_t *blkif_alloc(domid_t domid); >> diff -urNp blkback/vbd.c blkback-qos/vbd.c >> --- blkback/vbd.c 2010-05-20 18:07:00.000000000 +0800 >> +++ blkback-qos/vbd.c 2011-06-22 07:34:50.000000000 +0800 >> @@ -35,6 +35,11 @@ >> #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ >> (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) >> >> +#undef DPRINTK >> +#define DPRINTK(fmt, args...) \ >> + printk("blkback/vbd (%s:%d) " fmt ".\n", \ >> + __FUNCTION__, __LINE__, ##args) >> + >> unsigned long long vbd_size(struct vbd *vbd) >> { >> return vbd_sz(vbd); >> @@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde >> if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) >> vbd->type |= VDISK_REMOVABLE; >> >> - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", >> + DPRINTK("Successful creation of handle=%04x (dom=%u)", >> handle, blkif->domid); >> return 0; >> } >> diff -urNp blkback/xenbus.c blkback-qos/xenbus.c >> --- blkback/xenbus.c 2010-05-20 18:07:00.000000000 +0800 >> +++ blkback-qos/xenbus.c 2011-06-22 07:34:50.000000000 +0800 >> @@ -25,13 +25,14 @@ >> >> #undef DPRINTK >> #define DPRINTK(fmt, args...) \ >> - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ >> + printk("blkback/xenbus (%s:%d) " fmt ".\n", \ >> __FUNCTION__, __LINE__, ##args) >> >> static void connect(struct backend_info *); >> static int connect_ring(struct backend_info *); >> static void backend_changed(struct xenbus_watch *, const char **, >> unsigned int); >> +static void unregister_rate_watch(struct backend_info *be); >> >> static int blkback_name(blkif_t *blkif, char *buf) >> { >> @@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t >> char name[TASK_COMM_LEN]; >> >> /* Not ready to connect? */ >> - if (!blkif->irq || !blkif->vbd.bdev) >> + if (!blkif->irq || !blkif->vbd.bdev){ >> + DPRINTK("Not ready to connect"); >> return; >> + } >> >> /* Already connected? */ >> if (blkif->be->dev->state == XenbusStateConnected) >> @@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_ >> be->cdrom_watch.node = NULL; >> } >> >> + unregister_rate_watch(be); >> + >> if (be->blkif) { >> blkif_disconnect(be->blkif); >> vbd_free(&be->blkif->vbd); >> @@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d >> >> err = xenbus_watch_path2(dev, dev->nodename, "physical-device", >> &be->backend_watch, backend_changed); >> + >> + DPRINTK("blkback_probe called"); >> + DPRINTK("dev->nodename=%s/physical-device",dev->nodename); >> + >> if (err) >> goto fail; >> >> @@ -266,7 +275,6 @@ fail: >> return err; >> } >> >> - >> /** >> * Callback received when the hotplug scripts have placed the >> physical-device >> * node. Read it and the mode node, and create a vbd. If the frontend is >> @@ -283,8 +291,9 @@ static void backend_changed(struct xenbu >> struct xenbus_device *dev = be->dev; >> int cdrom = 0; >> char *device_type; >> + char name[TASK_COMM_LEN]; >> >> - DPRINTK(""); >> + DPRINTK("backend_changed called"); >> >> err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", >> &major, &minor); >> @@ -322,6 +331,34 @@ static void backend_changed(struct xenbu >> kfree(device_type); >> } >> >> + /* gather information about QoS policy for this device. */ >> + err = blkback_name(be->blkif, name); >> + if (err) { >> + xenbus_dev_error(be->dev, err, "get blkback dev name"); >> + return; >> + } >> + >> + err = xenbus_gather(XBT_NIL, dev->otherend, >> + "tokens-rate", "%d", &be->blkif->reqrate, >> + NULL); >> + if(err){ >> + DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name); >> + }else{ >> + if(be->blkif->reqrate <= 0){ >> + be->blkif->reqmin = 0 ; >> + DPRINTK("%s tokens-rate == 0,no limit",name); >> + }else{ >> + DPRINTK("%s >> xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate); >> + be->blkif->reqrate *= 2; >> + be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT; >> + if(be->blkif->reqmin > be->blkif->reqrate){ >> + be->blkif->reqrate = be->blkif->reqmin; >> + DPRINTK("%s reset default >> value(tokens-rate=%d)",name,be->blkif->reqrate); >> + } >> + } >> + } >> + be->blkif->reqtime = jiffies; >> + >> if (be->major == 0 && be->minor == 0) { >> /* Front end dir is a number, which is used as the handle. */ >> >> @@ -414,6 +451,49 @@ static void frontend_changed(struct xenb >> >> /* ** Connection ** */ >> >> +static void unregister_rate_watch(struct backend_info *be) >> +{ >> + if (be->have_rate_watch) { >> + unregister_xenbus_watch(&be->rate_watch); >> + kfree(be->rate_watch.node); >> + } >> + be->have_rate_watch = 0; >> +} >> + >> +static void rate_changed(struct xenbus_watch *watch, >> + const char **vec, unsigned int len) >> +{ >> + >> + struct backend_info *be=container_of(watch,struct backend_info, >> rate_watch); >> + int err; >> + char name[TASK_COMM_LEN]; >> + >> + err = blkback_name(be->blkif, name); >> + if (err) { >> + xenbus_dev_error(be->dev, err, "get blkback dev name"); >> + return; >> + } >> + >> + err = xenbus_gather(XBT_NIL,be->dev->otherend, >> + "tokens-rate", "%d", >> + &be->blkif->reqrate,NULL); >> + if(err){ >> + DPRINTK("%s xenbus_gather(tokens-rate) error",name); >> + }else{ >> + if(be->blkif->reqrate <= 0){ >> + be->blkif->reqmin = 0; >> + DPRINTK("%s tokens-rate == 0,no limit",name); >> + }else{ >> + DPRINTK("%s >> xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate); >> + be->blkif->reqrate *= 2; >> + be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT; >> + if(be->blkif->reqmin > be->blkif->reqrate){ >> + be->blkif->reqrate = be->blkif->reqmin; >> + DPRINTK("%s reset default >> value(tokens-rate=%d)",name,be->blkif->reqrate); >> + } >> + } >> + } >> +} >> >> /** >> * Write the physical details regarding the block device to the store, and >> @@ -439,6 +519,14 @@ again: >> if (err) >> goto abort; >> >> + /*add by andrew for centos pv*/ >> + err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1"); >> + if (err){ >> + xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache", >> + dev->nodename); >> + goto abort; >> + } >> + >> err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", >> vbd_size(&be->blkif->vbd)); >> if (err) { >> @@ -469,11 +557,22 @@ again: >> if (err) >> xenbus_dev_fatal(dev, err, "ending transaction"); >> >> + DPRINTK("xenbus_switch_to XenbusStateConnected"); >> + >> err = xenbus_switch_state(dev, XenbusStateConnected); >> if (err) >> xenbus_dev_fatal(dev, err, "switching to Connected state", >> dev->nodename); >> >> + unregister_rate_watch(be); >> + err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate", >> + >> &be->rate_watch,rate_changed); >> + if (!err) >> + be->have_rate_watch = 1; >> + else >> + xenbus_dev_fatal(dev, err, "watching tokens-rate", >> + dev->nodename); >> + >> return; >> abort: >> xenbus_transaction_end(xbt, 1); > > >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@xxxxxxxxxxxxxxxxxxx >> http://lists.xensource.com/xen-devel > > > _______________________________________________ > Xen-users mailing list > Xen-users@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-users > -- the purpose of libvirt is to provide an abstraction layer hiding all xen features added since 2006 until they were finally understood and copied by the kvm devs. _______________________________________________ Xen-users mailing list Xen-users@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-users
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |