[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCHv7 3/3] gnttab: use per-VCPU maptrack free lists



>>> On 30.04.15 at 15:28, <david.vrabel@xxxxxxxxxx> wrote:
> From: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
> 
> Performance analysis of aggregate network throughput with many VMs
> shows that performance is signficantly limited by contention on the
> maptrack lock when obtaining/releasing maptrack handles from the free
> list.
> 
> Instead of a single free list use a per-VCPU list. This avoids any
> contention when obtaining a handle.  Handles must be released back to
> their original list and since this may occur on a different VCPU there
> is some contention on the destination VCPU's free list tail pointer
> (but this is much better than a per-domain lock).
> 
> A domain's maptrack limit is multiplied by the number of VCPUs.  This
> ensures that a worst case domain that only performs grant table
> operations via one VCPU will not see a lower map track limit.
> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
>  xen/common/grant_table.c      |  131 
> ++++++++++++++++++++++++-----------------
>  xen/include/xen/grant_table.h |    8 ++-
>  xen/include/xen/sched.h       |    5 ++
>  3 files changed, 87 insertions(+), 57 deletions(-)
> 
> diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
> index 569b3d3..d8e7373 100644
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -118,9 +118,9 @@ struct gnttab_unmap_common {
>      ((t)->maptrack[(e)/MAPTRACK_PER_PAGE][(e)%MAPTRACK_PER_PAGE])
>  
>  static inline unsigned int
> -nr_maptrack_frames(struct grant_table *t)
> +nr_vcpu_maptrack_frames(struct vcpu *v)
>  {
> -    return t->maptrack_limit / MAPTRACK_PER_PAGE;
> +    return v->maptrack_limit / MAPTRACK_PER_PAGE;
>  }
>  
>  #define MAPTRACK_TAIL (~0u)
> @@ -255,66 +255,91 @@ static int __get_paged_frame(unsigned long gfn, 
> unsigned long *frame, struct pag
>  
>  static inline int
>  __get_maptrack_handle(
> -    struct grant_table *t)
> +    struct grant_table *t,
> +    struct vcpu *v)
>  {
> -    unsigned int h;
> -    if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
> +    unsigned int head, next;
> +
> +    head = v->maptrack_head;
> +    if ( unlikely(head == MAPTRACK_TAIL) )
> +        return -1;
> +
> +    next = maptrack_entry(t, head).ref;
> +    if ( unlikely(next == MAPTRACK_TAIL) )
>          return -1;
> -    t->maptrack_head = maptrack_entry(t, h).ref;
> -    return h;
> +
> +    v->maptrack_head = next;
> +
> +    return head;
>  }
>  
>  static inline void
>  put_maptrack_handle(
>      struct grant_table *t, int handle)
>  {
> -    spin_lock(&t->maptrack_lock);
> -    maptrack_entry(t, handle).ref = t->maptrack_head;
> -    t->maptrack_head = handle;
> -    spin_unlock(&t->maptrack_lock);
> +    struct domain *d = current->domain;
> +    struct vcpu *v;
> +    u32 prev_tail, cur_tail;
> +
> +    /* Set current hande to have TAIL reference */
> +    maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
> +    v = d->vcpu[maptrack_entry(t,handle).vcpu];

There's a missing blank here; the comment above has a typo and
is missing a stop. (There are further similar issues below.)

> +    cur_tail = v->maptrack_tail;

read_atomic()?

> +    do {
> +        prev_tail = cur_tail;
> +        cur_tail = cmpxchg((u32 *)&v->maptrack_tail, prev_tail, handle);

Please adjust the type of maptrack_tail (or the types of {prev,cur}_tail)
such that no cast is needed here.

> +    } while ( cur_tail != prev_tail );
> +    maptrack_entry(t, prev_tail).ref = handle;
>  }
>  
>  static inline int
>  get_maptrack_handle(
>      struct grant_table *lgt)
>  {
> +    struct vcpu          *v = current;
>      int                   i;
>      grant_handle_t        handle;
>      struct grant_mapping *new_mt;
> -    unsigned int          new_mt_limit, nr_frames;
> -
> -    spin_lock(&lgt->maptrack_lock);
> +    unsigned int          nr_frames;
>  
> -    while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
> -    {
> -        nr_frames = nr_maptrack_frames(lgt);
> -        if ( nr_frames >= max_maptrack_frames )
> -            break;
> -
> -        new_mt = alloc_xenheap_page();
> -        if ( !new_mt )
> -            break;
> +    handle = __get_maptrack_handle(lgt, v);
> +    if ( likely(handle != -1) )
> +        return handle;
>  
> -        clear_page(new_mt);
> +    nr_frames = nr_vcpu_maptrack_frames(v);
> +    if ( nr_frames >= max_maptrack_frames )
> +        return -1;
>  
> -        new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
> +    new_mt = alloc_xenheap_page();
> +    if ( !new_mt )
> +        return -1;
> +    clear_page(new_mt);
>  
> -        for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
> -            new_mt[i - 1].ref = lgt->maptrack_limit + i;
> -        new_mt[i - 1].ref = lgt->maptrack_head;
> -        lgt->maptrack_head = lgt->maptrack_limit;
> +    spin_lock(&lgt->maptrack_lock);
>  
> -        lgt->maptrack[nr_frames] = new_mt;
> -        smp_wmb();
> -        lgt->maptrack_limit      = new_mt_limit;
> +    for ( i = 1; i < MAPTRACK_PER_PAGE; i++ ){
> +        new_mt[i - 1].ref = (lgt->maptrack_pages * MAPTRACK_PER_PAGE) + i;
> +        new_mt[i - 1].vcpu = v->vcpu_id;
> +    }
> +    /* Set last entry vcpu and ref */
> +    new_mt[i - 1].ref = v->maptrack_head;
> +    new_mt[i - 1].vcpu = v->vcpu_id;
>  
> -        gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
> -                 nr_frames + 1);
> +    v->maptrack_head = lgt->maptrack_pages * MAPTRACK_PER_PAGE;
> +    if (v->maptrack_tail == MAPTRACK_TAIL)

Doesn't this imply v->maptrack_head == MAPTRACK_TAIL (before
the assignment right above) and hence ...

> +    {
> +        v->maptrack_tail = (lgt->maptrack_pages * MAPTRACK_PER_PAGE)
> +            + MAPTRACK_PER_PAGE - 1;
> +        new_mt[i - 1].ref = MAPTRACK_TAIL;

... this being redundant with the setting of the last entry above? In
which case this could better be an ASSERT() than an assignment.

> @@ -566,7 +591,7 @@ static void mapcount(
>      ASSERT(rw_is_locked(&rd->grant_table->lock));
>      spin_lock(&lgt->maptrack_lock);
>  
> -    for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
> +    for ( handle = 0; handle < lgt->maptrack_pages * MAPTRACK_PER_PAGE; 
> handle++ )

Long line.

> @@ -1430,6 +1456,17 @@ gnttab_setup_table(
>      gt = d->grant_table;
>      write_lock(&gt->lock);
>  
> +    /* Tracking of mapped foreign frames table */
> +    if ( (gt->maptrack = xzalloc_array(struct grant_mapping *,
> +                                       max_maptrack_frames * d->max_vcpus)) 
> == NULL )
> +        goto out3;

This surely can easily become an allocation of far more than a page,
and hence needs to be broken up (perhaps using vmap() to map
individually allocated pages).

> +    for_each_vcpu( d, v )
> +    {
> +        v->maptrack_head = MAPTRACK_TAIL;
> +        v->maptrack_tail = MAPTRACK_TAIL;
> +    }
> +    gt->maptrack_pages = 0;

I don't think this is needed. Or if it is (i.e. the value can be reset here)
then doing the allocation above unconditionally would look wrong too.
(I suppose it gets moved here because of the dependency on
d->max_vcpus.) Indeed I don't see anything preventing a guest from
invoking GNTTABOP_setup_table more than once.

> @@ -3066,12 +3091,10 @@ grant_table_create(
>          free_xenheap_page(t->shared_raw[i]);
>      xfree(t->shared_raw);
>   no_mem_3:
> -    free_xenheap_page(t->maptrack[0]);
> -    xfree(t->maptrack);
> - no_mem_2:
>      for ( i = 0;
>            i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
>          free_xenheap_page(t->active[i]);
> + no_mem_2:
>      xfree(t->active);

This movement of the no_mem_2 label looks wrong.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.