[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] Refactor ioreq server for better performance



> -----Original Message-----
> From: Yu Zhang [mailto:yu.c.zhang@xxxxxxxxxxxxxxx]
> Sent: 26 June 2015 11:30
> To: xen-devel@xxxxxxxxxxxxxxxxxxxx; Paul Durrant; Andrew Cooper;
> JBeulich@xxxxxxxx; Kevin Tian; zhiyuan.lv@xxxxxxxxx
> Subject: [PATCH] Refactor ioreq server for better performance
> 
>   XenGT leverages ioreq server to track and forward the accesses to
> GPU IO resources, e.g. the PPGTT(per-process graphic translation
> tables). Currently, ioreq server uses rangeset to track the BDF/
> PIO/MMIO ranges to be emulated. To select an ioreq server, the
> rangeset is searched to see if the IO range is recorded. However,
> traversing the link list inside rangeset could be time consuming
> when number of ranges is too high. On HSW platform, number of PPGTTs
> for each vGPU could be several hundred. On BDW, this value could
> be several thousand.
>   To increase the performance, a new data structure, rb_rangeset
> is defined. Compared with rangeset, which is based on doubly linked
> list with O(n) time complexity for searching, this rb_rangeset is
> based on red-black tree with O(log(n)) time complexity. Besides the
> underlying data structure difference with the rangeset, another one
> is that the rb_rangeset does not provide a spinlock, instead, it
> left this to users.
>   Besides, NR_IO_RANGE_TYPES is changed to 8192 to accommodate more
> ranges.
> 
> Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx>
> ---
>  xen/arch/x86/hvm/hvm.c           |  52 ++++-----
>  xen/common/Makefile              |   1 +
>  xen/common/rb_rangeset.c         | 243
> +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/hvm/domain.h |   4 +-
>  xen/include/xen/rb_rangeset.h    |  45 ++++++++
>  5 files changed, 311 insertions(+), 34 deletions(-)
>  create mode 100644 xen/common/rb_rangeset.c
>  create mode 100644 xen/include/xen/rb_rangeset.h
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 535d622..be70925 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -37,6 +37,7 @@
>  #include <xen/wait.h>
>  #include <xen/mem_access.h>
>  #include <xen/rangeset.h>
> +#include <xen/rb_rangeset.h>
>  #include <asm/shadow.h>
>  #include <asm/hap.h>
>  #include <asm/current.h>
> @@ -809,7 +810,7 @@ static void hvm_ioreq_server_unmap_pages(struct
> hvm_ioreq_server *s,
>      }
>  }
> 
> -static void hvm_ioreq_server_free_rangesets(struct hvm_ioreq_server *s,
> +static void hvm_ioreq_server_free_rb_rangesets(struct hvm_ioreq_server
> *s,

Did you need to change the name of the function here?

>                                              bool_t is_default)
>  {
>      unsigned int i;
> @@ -818,10 +819,10 @@ static void
> hvm_ioreq_server_free_rangesets(struct hvm_ioreq_server *s,
>          return;
> 
>      for ( i = 0; i < NR_IO_RANGE_TYPES; i++ )
> -        rangeset_destroy(s->range[i]);
> +        rb_rangeset_destroy(s->range[i]);
>  }
> 
> -static int hvm_ioreq_server_alloc_rangesets(struct hvm_ioreq_server *s,
> +static int hvm_ioreq_server_alloc_rb_rangesets(struct hvm_ioreq_server
> *s,

Same here.

>                                              bool_t is_default)
>  {
>      unsigned int i;
> @@ -832,33 +833,20 @@ static int hvm_ioreq_server_alloc_rangesets(struct
> hvm_ioreq_server *s,
> 
>      for ( i = 0; i < NR_IO_RANGE_TYPES; i++ )
>      {
> -        char *name;
> -
> -        rc = asprintf(&name, "ioreq_server %d %s", s->id,
> -                      (i == HVMOP_IO_RANGE_PORT) ? "port" :
> -                      (i == HVMOP_IO_RANGE_MEMORY) ? "memory" :
> -                      (i == HVMOP_IO_RANGE_PCI) ? "pci" :
> -                      "");
> -        if ( rc )
> -            goto fail;
> -
> -        s->range[i] = rangeset_new(s->domain, name,
> -                                   RANGESETF_prettyprint_hex);
> -
> -        xfree(name);
> +        s->range[i] = rb_rangeset_new();
> 

I think assigning a name to the rangeset and having a debug-key dump is useful. 
Can you not duplicate that in your new implementation?

>          rc = -ENOMEM;
>          if ( !s->range[i] )
>              goto fail;
> 
> -        rangeset_limit(s->range[i], MAX_NR_IO_RANGES);
> +        s->range[i]->nr_ranges = MAX_NR_IO_RANGES;

I'd add a limit function rather than just stooging into the structure fields.

>      }
> 
>   done:
>      return 0;
> 
>   fail:
> -    hvm_ioreq_server_free_rangesets(s, 0);
> +    hvm_ioreq_server_free_rb_rangesets(s, 0);
> 

Without the name change this diff is gone.

>      return rc;
>  }
> @@ -934,7 +922,7 @@ static int hvm_ioreq_server_init(struct
> hvm_ioreq_server *s, struct domain *d,
>      INIT_LIST_HEAD(&s->ioreq_vcpu_list);
>      spin_lock_init(&s->bufioreq_lock);
> 
> -    rc = hvm_ioreq_server_alloc_rangesets(s, is_default);
> +    rc = hvm_ioreq_server_alloc_rb_rangesets(s, is_default);

And this one.

>      if ( rc )
>          return rc;
> 
> @@ -960,7 +948,7 @@ static int hvm_ioreq_server_init(struct
> hvm_ioreq_server *s, struct domain *d,
>      hvm_ioreq_server_unmap_pages(s, is_default);
> 
>   fail_map:
> -    hvm_ioreq_server_free_rangesets(s, is_default);
> +    hvm_ioreq_server_free_rb_rangesets(s, is_default);
> 

And this one.

>      return rc;
>  }
> @@ -971,7 +959,7 @@ static void hvm_ioreq_server_deinit(struct
> hvm_ioreq_server *s,
>      ASSERT(!s->enabled);
>      hvm_ioreq_server_remove_all_vcpus(s);
>      hvm_ioreq_server_unmap_pages(s, is_default);
> -    hvm_ioreq_server_free_rangesets(s, is_default);
> +    hvm_ioreq_server_free_rb_rangesets(s, is_default);

And this one.

>  }
> 
>  static ioservid_t next_ioservid(struct domain *d)
> @@ -1149,7 +1137,7 @@ static int
> hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
> 
>          if ( s->id == id )
>          {
> -            struct rangeset *r;
> +            struct rb_rangeset *r;
> 
>              switch ( type )
>              {
> @@ -1169,10 +1157,10 @@ static int
> hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
>                  break;
> 
>              rc = -EEXIST;
> -            if ( rangeset_overlaps_range(r, start, end) )
> +            if ( rb_rangeset_overlaps_range(r, start, end) )
>                  break;
> 
> -            rc = rangeset_add_range(r, start, end);
> +            rc = rb_rangeset_add_range(r, start, end);
>              break;
>          }
>      }
> @@ -1200,7 +1188,7 @@ static int
> hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
> 
>          if ( s->id == id )
>          {
> -            struct rangeset *r;
> +            struct rb_rangeset *r;
> 
>              switch ( type )
>              {
> @@ -1220,10 +1208,10 @@ static int
> hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
>                  break;
> 
>              rc = -ENOENT;
> -            if ( !rangeset_contains_range(r, start, end) )
> +            if ( !rb_rangeset_contains_range(r, start, end) )
>                  break;
> 
> -            rc = rangeset_remove_range(r, start, end);
> +            rc = rb_rangeset_remove_range(r, start, end);
>              break;
>          }
>      }
> @@ -2465,7 +2453,7 @@ struct hvm_ioreq_server
> *hvm_select_ioreq_server(struct domain *d,
>                            &d->arch.hvm_domain.ioreq_server.list,
>                            list_entry )
>      {
> -        struct rangeset *r;
> +        struct rb_rangeset *r;
> 
>          if ( s == d->arch.hvm_domain.default_ioreq_server )
>              continue;
> @@ -2484,18 +2472,18 @@ struct hvm_ioreq_server
> *hvm_select_ioreq_server(struct domain *d,
> 
>          case IOREQ_TYPE_PIO:
>              end = addr + p->size - 1;
> -            if ( rangeset_contains_range(r, addr, end) )
> +            if ( rb_rangeset_contains_range(r, addr, end) )
>                  return s;
> 
>              break;
>          case IOREQ_TYPE_COPY:
>              end = addr + (p->size * p->count) - 1;
> -            if ( rangeset_contains_range(r, addr, end) )
> +            if ( rb_rangeset_contains_range(r, addr, end) )
>                  return s;
> 
>              break;
>          case IOREQ_TYPE_PCI_CONFIG:
> -            if ( rangeset_contains_singleton(r, addr >> 32) )
> +            if ( rb_rangeset_contains_range(r, addr >> 32, addr >> 32) )
>              {
>                  p->type = type;
>                  p->addr = addr;
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index 1cddebc..ec9273e 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -26,6 +26,7 @@ obj-$(HAS_PDX) += pdx.o
>  obj-y += preempt.o
>  obj-y += random.o
>  obj-y += rangeset.o
> +obj-y += rb_rangeset.o
>  obj-y += radix-tree.o
>  obj-y += rbtree.o
>  obj-y += rcupdate.o
> diff --git a/xen/common/rb_rangeset.c b/xen/common/rb_rangeset.c
> new file mode 100644
> index 0000000..d3f3a08
> --- /dev/null
> +++ b/xen/common/rb_rangeset.c
> @@ -0,0 +1,243 @@
> +/*
> +  Red-black tree based rangeset
> +
> +  This program is free software; you can redistribute it and/or modify
> +  it under the terms of the GNU General Public License as published by
> +  the Free Software Foundation; either version 2 of the License, or
> +  (at your option) any later version.
> +
> +  This program is distributed in the hope that it will be useful,
> +  but WITHOUT ANY WARRANTY; without even the implied warranty of
> +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +  GNU General Public License for more details.
> +
> +  You should have received a copy of the GNU General Public License
> +  along with this program; if not, write to the Free Software
> +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> +*/
> +
> +#include <xen/lib.h>
> +#include <xen/kernel.h>
> +#include <xen/errno.h>
> +#include <xen/rb_rangeset.h>
> +
> +static struct rb_range *alloc_and_init_rb_range(
> +    struct rb_rangeset *r, unsigned long s, unsigned long e)
> +{
> +    struct rb_range *range = NULL;
> +
> +    if ( r->nr_ranges == 0 )
> +        return NULL;
> +
> +    range =  xmalloc(struct rb_range);
> +    if ( range )
> +    {
> +        --r->nr_ranges;
> +        range->s = s;
> +        range->e = e;
> +    }
> +    return range;
> +}
> +
> +static void free_rb_range(struct rb_rangeset *r, struct rb_range *range)
> +{
> +    r->nr_ranges++;
> +    rb_erase(&range->node, &r->rbroot);
> +    xfree(range);
> +}
> +
> +static struct rb_range *rb_rangeset_find_range(
> +    struct rb_rangeset *r, unsigned long s)
> +{
> +    struct rb_node *node;
> +
> +    node = r->rbroot.rb_node;
> +
> +    while ( node )
> +    {
> +        struct rb_range *range = container_of(node, struct rb_range, node);
> +
> +        if ( (s >= range->s) && (s <= range->e) )
> +            return range;
> +        if ( s < range->s )
> +            node = node->rb_left;
> +        else if ( s > range->s )
> +            node = node->rb_right;
> +    }
> +    return NULL;
> +}
> +
> +bool_t rb_rangeset_overlaps_range(struct rb_rangeset *r,
> +    unsigned long s, unsigned long e)
> +{
> +    struct rb_node *node;
> +    bool_t rc = 0;
> +
> +    ASSERT(s <= e);
> +
> +    node = r->rbroot.rb_node;
> +    while ( node )
> +    {
> +        struct rb_range *range = container_of(node, struct rb_range, node);
> +        if ( (s <= range->e) && (e >= range->s) )
> +        {
> +            rc = 1;
> +            break;
> +        }
> +        else if ( s < range->s )
> +            node = node->rb_left;
> +        else if ( s > range->s )
> +            node = node->rb_right;
> +    }
> +    return rc;
> +}
> +
> +bool_t rb_rangeset_contains_range(
> +    struct rb_rangeset *r, unsigned long s, unsigned long e)
> +{
> +    struct rb_range *range;
> +    bool_t contains;
> +
> +    ASSERT(s <= e);
> +
> +    range = rb_rangeset_find_range(r, s);
> +    contains = (range && (range->e >= e));
> +    return contains;
> +}
> +
> +static void rb_rangeset_insert_range(
> +    struct rb_root *root, struct rb_range *range)
> +{
> +    struct rb_node **new = &(root->rb_node);
> +    struct rb_node *parent = NULL;
> +
> +    /* Figure out where to put new node */
> +    while ( *new )
> +    {
> +        struct rb_range *this = container_of(*new, struct rb_range, node);
> +        parent = *new;
> +
> +        if ( range->s < this->s )
> +            new = &((*new)->rb_left);
> +        else if ( range->s > this->s )
> +            new = &((*new)->rb_right);
> +    }
> +
> +    /* Add new node and rebalance the range tree. */
> +    rb_link_node(&range->node, parent, new);
> +    rb_insert_color(&range->node, root);
> +}
> +
> +/*
> + * Add a new range into the rb_rangeset, rb_rangeset_overlaps_range()
> + * should be called first, to ensure nodes inside the rb_rangeset will
> + * not interleave.
> + */
> +int rb_rangeset_add_range(struct rb_rangeset *r,
> +    unsigned long s, unsigned long e)
> +{
> +    struct rb_range *range = NULL;
> +    struct rb_range *next = NULL;
> +
> +    ASSERT(s <= e);
> +
> +    if ( (s) && (range = rb_rangeset_find_range(r, s - 1)) )
> +    {
> +        /* range tree overlapped */
> +        if ( range->e != (s - 1) )
> +            return -EEXIST;
> +        range->e = e;
> +    }
> +    else
> +    {
> +        range = alloc_and_init_rb_range(r, s, e);
> +        if ( !range )
> +            return -ENOMEM;
> +        rb_rangeset_insert_range(&r->rbroot, range);
> +    }
> +
> +    next = container_of(rb_next(&range->node), struct rb_range, node);
> +
> +    if ( next && (next->s == (e + 1)) )
> +    {
> +        range->e = next->e;
> +        free_rb_range(r, next);
> +    }
> +
> +    return 0;
> +}
> +
> +int rb_rangeset_remove_range(struct rb_rangeset *r,
> +    unsigned long s, unsigned long e)
> +{
> +    struct rb_range *range = NULL;
> +    struct rb_range *next = NULL;
> +    unsigned long start, end;
> +
> +    ASSERT(s <= e);
> +
> +    range = rb_rangeset_find_range(r, s);
> +    if ( !range )
> +        return -ENOENT;
> +
> +    start = range->s;
> +    end = range->e;
> +
> +    /* the range to be removed must be contained in one rb_range */
> +    if ( end < e )
> +        return -ENOENT;
> +
> +    /* value of start can only be less than or equal to s */
> +    if ( start == s )
> +    {
> +        if ( end > e )
> +            range->s = e + 1;
> +        else
> +            free_rb_range(r, range);
> +    }
> +    else
> +    {
> +        if ( end > e )
> +        {
> +            next = alloc_and_init_rb_range(r, e + 1, end);
> +            if ( next == NULL )
> +                return -ENOMEM;
> +
> +            rb_rangeset_insert_range(&r->rbroot, next);
> +        }
> +        range->e = s - 1;
> +    }
> +    return 0;
> +}
> +
> +void rb_rangeset_destroy(struct rb_rangeset *r)
> +{
> +    struct rb_root *root;
> +    struct rb_node *node;
> +
> +    if ( r == NULL )
> +        return;
> +
> +    root = &r->rbroot;
> +    node = rb_first(root);
> +    while ( node )
> +    {
> +        struct rb_range *range = container_of(node, struct rb_range, node);
> +        free_rb_range(r, range);
> +        node = rb_first(root);
> +    }
> +
> +    xfree(r);
> +}
> +
> +struct rb_rangeset *rb_rangeset_new()
> +{
> +    struct rb_rangeset *r;
> +
> +    r = xmalloc(struct rb_rangeset);
> +    if ( r == NULL )
> +        return NULL;
> +
> +    r->rbroot = RB_ROOT;
> +    return r;
> +}
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-
> x86/hvm/domain.h
> index ad68fcf..a2f60a8 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -49,7 +49,7 @@ struct hvm_ioreq_vcpu {
>  };
> 
>  #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
> -#define MAX_NR_IO_RANGES  256
> +#define MAX_NR_IO_RANGES  8192

I this limit enough? I think having something that's toolstack-tunable would be 
more future-proof.

  Paul

> 
>  struct hvm_ioreq_server {
>      struct list_head       list_entry;
> @@ -68,7 +68,7 @@ struct hvm_ioreq_server {
>      /* Lock to serialize access to buffered ioreq ring */
>      spinlock_t             bufioreq_lock;
>      evtchn_port_t          bufioreq_evtchn;
> -    struct rangeset        *range[NR_IO_RANGE_TYPES];
> +    struct rb_rangeset     *range[NR_IO_RANGE_TYPES];
>      bool_t                 enabled;
>      bool_t                 bufioreq_atomic;
>  };
> diff --git a/xen/include/xen/rb_rangeset.h
> b/xen/include/xen/rb_rangeset.h
> new file mode 100644
> index 0000000..768230c
> --- /dev/null
> +++ b/xen/include/xen/rb_rangeset.h
> @@ -0,0 +1,45 @@
> +/*
> +  Red-black tree based rangeset
> +
> +  This program is free software; you can redistribute it and/or modify
> +  it under the terms of the GNU General Public License as published by
> +  the Free Software Foundation; either version 2 of the License, or
> +  (at your option) any later version.
> +
> +  This program is distributed in the hope that it will be useful,
> +  but WITHOUT ANY WARRANTY; without even the implied warranty of
> +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +  GNU General Public License for more details.
> +
> +  You should have received a copy of the GNU General Public License
> +  along with this program; if not, write to the Free Software
> +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> +*/
> +
> +#ifndef __RB_RANGESET_H__
> +#define __RB_RANGESET_H__
> +
> +#include <xen/rbtree.h>
> +
> +struct rb_rangeset {
> +    long             nr_ranges;
> +    struct rb_root   rbroot;
> +};
> +
> +struct rb_range {
> +    struct rb_node node;
> +    unsigned long s, e;
> +};
> +
> +struct rb_rangeset *rb_rangeset_new(void);
> +void rb_rangeset_destroy(struct rb_rangeset *r);
> +bool_t rb_rangeset_overlaps_range(struct rb_rangeset *r,
> +    unsigned long s, unsigned long e);
> +bool_t rb_rangeset_contains_range(
> +    struct rb_rangeset *r, unsigned long s, unsigned long e);
> +int rb_rangeset_add_range(struct rb_rangeset *r,
> +    unsigned long s, unsigned long e);
> +int rb_rangeset_remove_range(struct rb_rangeset *r,
> +    unsigned long s, unsigned long e);
> +
> +#endif /* __RB_RANGESET_H__ */
> --
> 1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.