[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] Refactor ioreq server for better performance
> -----Original Message----- > From: Yu Zhang [mailto:yu.c.zhang@xxxxxxxxxxxxxxx] > Sent: 26 June 2015 11:30 > To: xen-devel@xxxxxxxxxxxxxxxxxxxx; Paul Durrant; Andrew Cooper; > JBeulich@xxxxxxxx; Kevin Tian; zhiyuan.lv@xxxxxxxxx > Subject: [PATCH] Refactor ioreq server for better performance > > XenGT leverages ioreq server to track and forward the accesses to > GPU IO resources, e.g. the PPGTT(per-process graphic translation > tables). Currently, ioreq server uses rangeset to track the BDF/ > PIO/MMIO ranges to be emulated. To select an ioreq server, the > rangeset is searched to see if the IO range is recorded. However, > traversing the link list inside rangeset could be time consuming > when number of ranges is too high. On HSW platform, number of PPGTTs > for each vGPU could be several hundred. On BDW, this value could > be several thousand. > To increase the performance, a new data structure, rb_rangeset > is defined. Compared with rangeset, which is based on doubly linked > list with O(n) time complexity for searching, this rb_rangeset is > based on red-black tree with O(log(n)) time complexity. Besides the > underlying data structure difference with the rangeset, another one > is that the rb_rangeset does not provide a spinlock, instead, it > left this to users. > Besides, NR_IO_RANGE_TYPES is changed to 8192 to accommodate more > ranges. > > Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> > --- > xen/arch/x86/hvm/hvm.c | 52 ++++----- > xen/common/Makefile | 1 + > xen/common/rb_rangeset.c | 243 > +++++++++++++++++++++++++++++++++++++++ > xen/include/asm-x86/hvm/domain.h | 4 +- > xen/include/xen/rb_rangeset.h | 45 ++++++++ > 5 files changed, 311 insertions(+), 34 deletions(-) > create mode 100644 xen/common/rb_rangeset.c > create mode 100644 xen/include/xen/rb_rangeset.h > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 535d622..be70925 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -37,6 +37,7 @@ > #include <xen/wait.h> > #include <xen/mem_access.h> > #include <xen/rangeset.h> > +#include <xen/rb_rangeset.h> > #include <asm/shadow.h> > #include <asm/hap.h> > #include <asm/current.h> > @@ -809,7 +810,7 @@ static void hvm_ioreq_server_unmap_pages(struct > hvm_ioreq_server *s, > } > } > > -static void hvm_ioreq_server_free_rangesets(struct hvm_ioreq_server *s, > +static void hvm_ioreq_server_free_rb_rangesets(struct hvm_ioreq_server > *s, Did you need to change the name of the function here? > bool_t is_default) > { > unsigned int i; > @@ -818,10 +819,10 @@ static void > hvm_ioreq_server_free_rangesets(struct hvm_ioreq_server *s, > return; > > for ( i = 0; i < NR_IO_RANGE_TYPES; i++ ) > - rangeset_destroy(s->range[i]); > + rb_rangeset_destroy(s->range[i]); > } > > -static int hvm_ioreq_server_alloc_rangesets(struct hvm_ioreq_server *s, > +static int hvm_ioreq_server_alloc_rb_rangesets(struct hvm_ioreq_server > *s, Same here. > bool_t is_default) > { > unsigned int i; > @@ -832,33 +833,20 @@ static int hvm_ioreq_server_alloc_rangesets(struct > hvm_ioreq_server *s, > > for ( i = 0; i < NR_IO_RANGE_TYPES; i++ ) > { > - char *name; > - > - rc = asprintf(&name, "ioreq_server %d %s", s->id, > - (i == HVMOP_IO_RANGE_PORT) ? "port" : > - (i == HVMOP_IO_RANGE_MEMORY) ? "memory" : > - (i == HVMOP_IO_RANGE_PCI) ? "pci" : > - ""); > - if ( rc ) > - goto fail; > - > - s->range[i] = rangeset_new(s->domain, name, > - RANGESETF_prettyprint_hex); > - > - xfree(name); > + s->range[i] = rb_rangeset_new(); > I think assigning a name to the rangeset and having a debug-key dump is useful. Can you not duplicate that in your new implementation? > rc = -ENOMEM; > if ( !s->range[i] ) > goto fail; > > - rangeset_limit(s->range[i], MAX_NR_IO_RANGES); > + s->range[i]->nr_ranges = MAX_NR_IO_RANGES; I'd add a limit function rather than just stooging into the structure fields. > } > > done: > return 0; > > fail: > - hvm_ioreq_server_free_rangesets(s, 0); > + hvm_ioreq_server_free_rb_rangesets(s, 0); > Without the name change this diff is gone. > return rc; > } > @@ -934,7 +922,7 @@ static int hvm_ioreq_server_init(struct > hvm_ioreq_server *s, struct domain *d, > INIT_LIST_HEAD(&s->ioreq_vcpu_list); > spin_lock_init(&s->bufioreq_lock); > > - rc = hvm_ioreq_server_alloc_rangesets(s, is_default); > + rc = hvm_ioreq_server_alloc_rb_rangesets(s, is_default); And this one. > if ( rc ) > return rc; > > @@ -960,7 +948,7 @@ static int hvm_ioreq_server_init(struct > hvm_ioreq_server *s, struct domain *d, > hvm_ioreq_server_unmap_pages(s, is_default); > > fail_map: > - hvm_ioreq_server_free_rangesets(s, is_default); > + hvm_ioreq_server_free_rb_rangesets(s, is_default); > And this one. > return rc; > } > @@ -971,7 +959,7 @@ static void hvm_ioreq_server_deinit(struct > hvm_ioreq_server *s, > ASSERT(!s->enabled); > hvm_ioreq_server_remove_all_vcpus(s); > hvm_ioreq_server_unmap_pages(s, is_default); > - hvm_ioreq_server_free_rangesets(s, is_default); > + hvm_ioreq_server_free_rb_rangesets(s, is_default); And this one. > } > > static ioservid_t next_ioservid(struct domain *d) > @@ -1149,7 +1137,7 @@ static int > hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, > > if ( s->id == id ) > { > - struct rangeset *r; > + struct rb_rangeset *r; > > switch ( type ) > { > @@ -1169,10 +1157,10 @@ static int > hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, > break; > > rc = -EEXIST; > - if ( rangeset_overlaps_range(r, start, end) ) > + if ( rb_rangeset_overlaps_range(r, start, end) ) > break; > > - rc = rangeset_add_range(r, start, end); > + rc = rb_rangeset_add_range(r, start, end); > break; > } > } > @@ -1200,7 +1188,7 @@ static int > hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, > > if ( s->id == id ) > { > - struct rangeset *r; > + struct rb_rangeset *r; > > switch ( type ) > { > @@ -1220,10 +1208,10 @@ static int > hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, > break; > > rc = -ENOENT; > - if ( !rangeset_contains_range(r, start, end) ) > + if ( !rb_rangeset_contains_range(r, start, end) ) > break; > > - rc = rangeset_remove_range(r, start, end); > + rc = rb_rangeset_remove_range(r, start, end); > break; > } > } > @@ -2465,7 +2453,7 @@ struct hvm_ioreq_server > *hvm_select_ioreq_server(struct domain *d, > &d->arch.hvm_domain.ioreq_server.list, > list_entry ) > { > - struct rangeset *r; > + struct rb_rangeset *r; > > if ( s == d->arch.hvm_domain.default_ioreq_server ) > continue; > @@ -2484,18 +2472,18 @@ struct hvm_ioreq_server > *hvm_select_ioreq_server(struct domain *d, > > case IOREQ_TYPE_PIO: > end = addr + p->size - 1; > - if ( rangeset_contains_range(r, addr, end) ) > + if ( rb_rangeset_contains_range(r, addr, end) ) > return s; > > break; > case IOREQ_TYPE_COPY: > end = addr + (p->size * p->count) - 1; > - if ( rangeset_contains_range(r, addr, end) ) > + if ( rb_rangeset_contains_range(r, addr, end) ) > return s; > > break; > case IOREQ_TYPE_PCI_CONFIG: > - if ( rangeset_contains_singleton(r, addr >> 32) ) > + if ( rb_rangeset_contains_range(r, addr >> 32, addr >> 32) ) > { > p->type = type; > p->addr = addr; > diff --git a/xen/common/Makefile b/xen/common/Makefile > index 1cddebc..ec9273e 100644 > --- a/xen/common/Makefile > +++ b/xen/common/Makefile > @@ -26,6 +26,7 @@ obj-$(HAS_PDX) += pdx.o > obj-y += preempt.o > obj-y += random.o > obj-y += rangeset.o > +obj-y += rb_rangeset.o > obj-y += radix-tree.o > obj-y += rbtree.o > obj-y += rcupdate.o > diff --git a/xen/common/rb_rangeset.c b/xen/common/rb_rangeset.c > new file mode 100644 > index 0000000..d3f3a08 > --- /dev/null > +++ b/xen/common/rb_rangeset.c > @@ -0,0 +1,243 @@ > +/* > + Red-black tree based rangeset > + > + This program is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 2 of the License, or > + (at your option) any later version. > + > + This program is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program; if not, write to the Free Software > + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > +*/ > + > +#include <xen/lib.h> > +#include <xen/kernel.h> > +#include <xen/errno.h> > +#include <xen/rb_rangeset.h> > + > +static struct rb_range *alloc_and_init_rb_range( > + struct rb_rangeset *r, unsigned long s, unsigned long e) > +{ > + struct rb_range *range = NULL; > + > + if ( r->nr_ranges == 0 ) > + return NULL; > + > + range = xmalloc(struct rb_range); > + if ( range ) > + { > + --r->nr_ranges; > + range->s = s; > + range->e = e; > + } > + return range; > +} > + > +static void free_rb_range(struct rb_rangeset *r, struct rb_range *range) > +{ > + r->nr_ranges++; > + rb_erase(&range->node, &r->rbroot); > + xfree(range); > +} > + > +static struct rb_range *rb_rangeset_find_range( > + struct rb_rangeset *r, unsigned long s) > +{ > + struct rb_node *node; > + > + node = r->rbroot.rb_node; > + > + while ( node ) > + { > + struct rb_range *range = container_of(node, struct rb_range, node); > + > + if ( (s >= range->s) && (s <= range->e) ) > + return range; > + if ( s < range->s ) > + node = node->rb_left; > + else if ( s > range->s ) > + node = node->rb_right; > + } > + return NULL; > +} > + > +bool_t rb_rangeset_overlaps_range(struct rb_rangeset *r, > + unsigned long s, unsigned long e) > +{ > + struct rb_node *node; > + bool_t rc = 0; > + > + ASSERT(s <= e); > + > + node = r->rbroot.rb_node; > + while ( node ) > + { > + struct rb_range *range = container_of(node, struct rb_range, node); > + if ( (s <= range->e) && (e >= range->s) ) > + { > + rc = 1; > + break; > + } > + else if ( s < range->s ) > + node = node->rb_left; > + else if ( s > range->s ) > + node = node->rb_right; > + } > + return rc; > +} > + > +bool_t rb_rangeset_contains_range( > + struct rb_rangeset *r, unsigned long s, unsigned long e) > +{ > + struct rb_range *range; > + bool_t contains; > + > + ASSERT(s <= e); > + > + range = rb_rangeset_find_range(r, s); > + contains = (range && (range->e >= e)); > + return contains; > +} > + > +static void rb_rangeset_insert_range( > + struct rb_root *root, struct rb_range *range) > +{ > + struct rb_node **new = &(root->rb_node); > + struct rb_node *parent = NULL; > + > + /* Figure out where to put new node */ > + while ( *new ) > + { > + struct rb_range *this = container_of(*new, struct rb_range, node); > + parent = *new; > + > + if ( range->s < this->s ) > + new = &((*new)->rb_left); > + else if ( range->s > this->s ) > + new = &((*new)->rb_right); > + } > + > + /* Add new node and rebalance the range tree. */ > + rb_link_node(&range->node, parent, new); > + rb_insert_color(&range->node, root); > +} > + > +/* > + * Add a new range into the rb_rangeset, rb_rangeset_overlaps_range() > + * should be called first, to ensure nodes inside the rb_rangeset will > + * not interleave. > + */ > +int rb_rangeset_add_range(struct rb_rangeset *r, > + unsigned long s, unsigned long e) > +{ > + struct rb_range *range = NULL; > + struct rb_range *next = NULL; > + > + ASSERT(s <= e); > + > + if ( (s) && (range = rb_rangeset_find_range(r, s - 1)) ) > + { > + /* range tree overlapped */ > + if ( range->e != (s - 1) ) > + return -EEXIST; > + range->e = e; > + } > + else > + { > + range = alloc_and_init_rb_range(r, s, e); > + if ( !range ) > + return -ENOMEM; > + rb_rangeset_insert_range(&r->rbroot, range); > + } > + > + next = container_of(rb_next(&range->node), struct rb_range, node); > + > + if ( next && (next->s == (e + 1)) ) > + { > + range->e = next->e; > + free_rb_range(r, next); > + } > + > + return 0; > +} > + > +int rb_rangeset_remove_range(struct rb_rangeset *r, > + unsigned long s, unsigned long e) > +{ > + struct rb_range *range = NULL; > + struct rb_range *next = NULL; > + unsigned long start, end; > + > + ASSERT(s <= e); > + > + range = rb_rangeset_find_range(r, s); > + if ( !range ) > + return -ENOENT; > + > + start = range->s; > + end = range->e; > + > + /* the range to be removed must be contained in one rb_range */ > + if ( end < e ) > + return -ENOENT; > + > + /* value of start can only be less than or equal to s */ > + if ( start == s ) > + { > + if ( end > e ) > + range->s = e + 1; > + else > + free_rb_range(r, range); > + } > + else > + { > + if ( end > e ) > + { > + next = alloc_and_init_rb_range(r, e + 1, end); > + if ( next == NULL ) > + return -ENOMEM; > + > + rb_rangeset_insert_range(&r->rbroot, next); > + } > + range->e = s - 1; > + } > + return 0; > +} > + > +void rb_rangeset_destroy(struct rb_rangeset *r) > +{ > + struct rb_root *root; > + struct rb_node *node; > + > + if ( r == NULL ) > + return; > + > + root = &r->rbroot; > + node = rb_first(root); > + while ( node ) > + { > + struct rb_range *range = container_of(node, struct rb_range, node); > + free_rb_range(r, range); > + node = rb_first(root); > + } > + > + xfree(r); > +} > + > +struct rb_rangeset *rb_rangeset_new() > +{ > + struct rb_rangeset *r; > + > + r = xmalloc(struct rb_rangeset); > + if ( r == NULL ) > + return NULL; > + > + r->rbroot = RB_ROOT; > + return r; > +} > diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm- > x86/hvm/domain.h > index ad68fcf..a2f60a8 100644 > --- a/xen/include/asm-x86/hvm/domain.h > +++ b/xen/include/asm-x86/hvm/domain.h > @@ -49,7 +49,7 @@ struct hvm_ioreq_vcpu { > }; > > #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1) > -#define MAX_NR_IO_RANGES 256 > +#define MAX_NR_IO_RANGES 8192 I this limit enough? I think having something that's toolstack-tunable would be more future-proof. Paul > > struct hvm_ioreq_server { > struct list_head list_entry; > @@ -68,7 +68,7 @@ struct hvm_ioreq_server { > /* Lock to serialize access to buffered ioreq ring */ > spinlock_t bufioreq_lock; > evtchn_port_t bufioreq_evtchn; > - struct rangeset *range[NR_IO_RANGE_TYPES]; > + struct rb_rangeset *range[NR_IO_RANGE_TYPES]; > bool_t enabled; > bool_t bufioreq_atomic; > }; > diff --git a/xen/include/xen/rb_rangeset.h > b/xen/include/xen/rb_rangeset.h > new file mode 100644 > index 0000000..768230c > --- /dev/null > +++ b/xen/include/xen/rb_rangeset.h > @@ -0,0 +1,45 @@ > +/* > + Red-black tree based rangeset > + > + This program is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 2 of the License, or > + (at your option) any later version. > + > + This program is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program; if not, write to the Free Software > + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > +*/ > + > +#ifndef __RB_RANGESET_H__ > +#define __RB_RANGESET_H__ > + > +#include <xen/rbtree.h> > + > +struct rb_rangeset { > + long nr_ranges; > + struct rb_root rbroot; > +}; > + > +struct rb_range { > + struct rb_node node; > + unsigned long s, e; > +}; > + > +struct rb_rangeset *rb_rangeset_new(void); > +void rb_rangeset_destroy(struct rb_rangeset *r); > +bool_t rb_rangeset_overlaps_range(struct rb_rangeset *r, > + unsigned long s, unsigned long e); > +bool_t rb_rangeset_contains_range( > + struct rb_rangeset *r, unsigned long s, unsigned long e); > +int rb_rangeset_add_range(struct rb_rangeset *r, > + unsigned long s, unsigned long e); > +int rb_rangeset_remove_range(struct rb_rangeset *r, > + unsigned long s, unsigned long e); > + > +#endif /* __RB_RANGESET_H__ */ > -- > 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |