Xen project Mailing List

Re: [Xen-devel] [PATCHv7 3/3] gnttab: use per-VCPU maptrack free lists

To: "David Vrabel" <david.vrabel@xxxxxxxxxx>

From: "Jan Beulich" <JBeulich@xxxxxxxx>

Date: Tue, 05 May 2015 13:34:30 +0100

Cc: Keir Fraser <keir@xxxxxxx>, Ian Campbell <ian.campbell@xxxxxxxxxx>, Christoph Egger <chegger@xxxxxxxxx>, Tim Deegan <tim@xxxxxxx>, Matt Wilson <msw@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx, Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>

Delivery-date: Tue, 05 May 2015 12:34:44 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

>>> On 30.04.15 at 15:28, <david.vrabel@xxxxxxxxxx> wrote: > From: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx> > > Performance analysis of aggregate network throughput with many VMs > shows that performance is signficantly limited by contention on the > maptrack lock when obtaining/releasing maptrack handles from the free > list. > > Instead of a single free list use a per-VCPU list. This avoids any > contention when obtaining a handle. Handles must be released back to > their original list and since this may occur on a different VCPU there > is some contention on the destination VCPU's free list tail pointer > (but this is much better than a per-domain lock). > > A domain's maptrack limit is multiplied by the number of VCPUs. This > ensures that a worst case domain that only performs grant table > operations via one VCPU will not see a lower map track limit. > > Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx> > Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> > --- > xen/common/grant_table.c | 131 > ++++++++++++++++++++++++----------------- > xen/include/xen/grant_table.h | 8 ++- > xen/include/xen/sched.h | 5 ++ > 3 files changed, 87 insertions(+), 57 deletions(-) > > diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c > index 569b3d3..d8e7373 100644 > --- a/xen/common/grant_table.c > +++ b/xen/common/grant_table.c > @@ -118,9 +118,9 @@ struct gnttab_unmap_common { > ((t)->maptrack[(e)/MAPTRACK_PER_PAGE][(e)%MAPTRACK_PER_PAGE]) > > static inline unsigned int > -nr_maptrack_frames(struct grant_table *t) > +nr_vcpu_maptrack_frames(struct vcpu *v) > { > - return t->maptrack_limit / MAPTRACK_PER_PAGE; > + return v->maptrack_limit / MAPTRACK_PER_PAGE; > } > > #define MAPTRACK_TAIL (~0u) > @@ -255,66 +255,91 @@ static int __get_paged_frame(unsigned long gfn, > unsigned long *frame, struct pag > > static inline int > __get_maptrack_handle( > - struct grant_table *t) > + struct grant_table *t, > + struct vcpu *v) > { > - unsigned int h; > - if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) ) > + unsigned int head, next; > + > + head = v->maptrack_head; > + if ( unlikely(head == MAPTRACK_TAIL) ) > + return -1; > + > + next = maptrack_entry(t, head).ref; > + if ( unlikely(next == MAPTRACK_TAIL) ) > return -1; > - t->maptrack_head = maptrack_entry(t, h).ref; > - return h; > + > + v->maptrack_head = next; > + > + return head; > } > > static inline void > put_maptrack_handle( > struct grant_table *t, int handle) > { > - spin_lock(&t->maptrack_lock); > - maptrack_entry(t, handle).ref = t->maptrack_head; > - t->maptrack_head = handle; > - spin_unlock(&t->maptrack_lock); > + struct domain *d = current->domain; > + struct vcpu *v; > + u32 prev_tail, cur_tail; > + > + /* Set current hande to have TAIL reference */ > + maptrack_entry(t, handle).ref = MAPTRACK_TAIL; > + v = d->vcpu[maptrack_entry(t,handle).vcpu]; There's a missing blank here; the comment above has a typo and is missing a stop. (There are further similar issues below.) > + cur_tail = v->maptrack_tail; read_atomic()? > + do { > + prev_tail = cur_tail; > + cur_tail = cmpxchg((u32 *)&v->maptrack_tail, prev_tail, handle); Please adjust the type of maptrack_tail (or the types of {prev,cur}_tail) such that no cast is needed here. > + } while ( cur_tail != prev_tail ); > + maptrack_entry(t, prev_tail).ref = handle; > } > > static inline int > get_maptrack_handle( > struct grant_table *lgt) > { > + struct vcpu *v = current; > int i; > grant_handle_t handle; > struct grant_mapping *new_mt; > - unsigned int new_mt_limit, nr_frames; > - > - spin_lock(&lgt->maptrack_lock); > + unsigned int nr_frames; > > - while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) ) > - { > - nr_frames = nr_maptrack_frames(lgt); > - if ( nr_frames >= max_maptrack_frames ) > - break; > - > - new_mt = alloc_xenheap_page(); > - if ( !new_mt ) > - break; > + handle = __get_maptrack_handle(lgt, v); > + if ( likely(handle != -1) ) > + return handle; > > - clear_page(new_mt); > + nr_frames = nr_vcpu_maptrack_frames(v); > + if ( nr_frames >= max_maptrack_frames ) > + return -1; > > - new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE; > + new_mt = alloc_xenheap_page(); > + if ( !new_mt ) > + return -1; > + clear_page(new_mt); > > - for ( i = 1; i < MAPTRACK_PER_PAGE; i++ ) > - new_mt[i - 1].ref = lgt->maptrack_limit + i; > - new_mt[i - 1].ref = lgt->maptrack_head; > - lgt->maptrack_head = lgt->maptrack_limit; > + spin_lock(&lgt->maptrack_lock); > > - lgt->maptrack[nr_frames] = new_mt; > - smp_wmb(); > - lgt->maptrack_limit = new_mt_limit; > + for ( i = 1; i < MAPTRACK_PER_PAGE; i++ ){ > + new_mt[i - 1].ref = (lgt->maptrack_pages * MAPTRACK_PER_PAGE) + i; > + new_mt[i - 1].vcpu = v->vcpu_id; > + } > + /* Set last entry vcpu and ref */ > + new_mt[i - 1].ref = v->maptrack_head; > + new_mt[i - 1].vcpu = v->vcpu_id; > > - gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n", > - nr_frames + 1); > + v->maptrack_head = lgt->maptrack_pages * MAPTRACK_PER_PAGE; > + if (v->maptrack_tail == MAPTRACK_TAIL) Doesn't this imply v->maptrack_head == MAPTRACK_TAIL (before the assignment right above) and hence ... > + { > + v->maptrack_tail = (lgt->maptrack_pages * MAPTRACK_PER_PAGE) > + + MAPTRACK_PER_PAGE - 1; > + new_mt[i - 1].ref = MAPTRACK_TAIL; ... this being redundant with the setting of the last entry above? In which case this could better be an ASSERT() than an assignment. > @@ -566,7 +591,7 @@ static void mapcount( > ASSERT(rw_is_locked(&rd->grant_table->lock)); > spin_lock(&lgt->maptrack_lock); > > - for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) > + for ( handle = 0; handle < lgt->maptrack_pages * MAPTRACK_PER_PAGE; > handle++ ) Long line. > @@ -1430,6 +1456,17 @@ gnttab_setup_table( > gt = d->grant_table; > write_lock(&gt->lock); > > + /* Tracking of mapped foreign frames table */ > + if ( (gt->maptrack = xzalloc_array(struct grant_mapping *, > + max_maptrack_frames * d->max_vcpus)) > == NULL ) > + goto out3; This surely can easily become an allocation of far more than a page, and hence needs to be broken up (perhaps using vmap() to map individually allocated pages). > + for_each_vcpu( d, v ) > + { > + v->maptrack_head = MAPTRACK_TAIL; > + v->maptrack_tail = MAPTRACK_TAIL; > + } > + gt->maptrack_pages = 0; I don't think this is needed. Or if it is (i.e. the value can be reset here) then doing the allocation above unconditionally would look wrong too. (I suppose it gets moved here because of the dependency on d->max_vcpus.) Indeed I don't see anything preventing a guest from invoking GNTTABOP_setup_table more than once. > @@ -3066,12 +3091,10 @@ grant_table_create( > free_xenheap_page(t->shared_raw[i]); > xfree(t->shared_raw); > no_mem_3: > - free_xenheap_page(t->maptrack[0]); > - xfree(t->maptrack); > - no_mem_2: > for ( i = 0; > i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ ) > free_xenheap_page(t->active[i]); > + no_mem_2: > xfree(t->active); This movement of the no_mem_2 label looks wrong. Jan _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.