[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCHv6 2/5] gnttab: introduce per-active entry locks
From: Matt Wilson <msw@xxxxxxxxxx> Instead of protecting the state of a grant table active entry with the grant table lock, use per active entry locks. Active entries must be acquired with active_entry_acquire() and released with active_entry_release() which lock and unlock the entry's spinlock. This is the first step to improving grant table scalability and will allow the heaviliy contented grant table lock to be used less. Signed-off-by: Matt Wilson <msw@xxxxxxxxxx> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> --- xen/common/grant_table.c | 213 ++++++++++++++++++++++++++++------------------ 1 file changed, 129 insertions(+), 84 deletions(-) diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index dfb45f8..3a555f9 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -157,10 +157,13 @@ struct active_grant_entry { in the page. */ unsigned length:16; /* For sub-page grants, the length of the grant. */ + spinlock_t lock; /* lock to protect access of this entry. + see docs/misc/grant-tables.txt for + locking protocol */ }; #define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry)) -#define active_entry(t, e) \ +#define _active_entry(t, e) \ ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE]) static inline void gnttab_flush_tlb(const struct domain *d) @@ -188,6 +191,24 @@ nr_active_grant_frames(struct grant_table *gt) return num_act_frames_from_sha_frames(nr_grant_frames(gt)); } +static inline struct active_grant_entry * +active_entry_acquire(struct grant_table *t, grant_ref_t e) +{ + struct active_grant_entry *act; + + ASSERT(spin_is_locked(&t->lock)); + + act = &_active_entry(t, e); + spin_lock(&act->lock); + + return act; +} + +static inline void active_entry_release(struct active_grant_entry *act) +{ + spin_unlock(&act->lock); +} + /* Check if the page has been paged out, or needs unsharing. If rc == GNTST_okay, *page contains the page struct with a ref taken. Caller must do put_page(*page). @@ -228,30 +249,6 @@ static int __get_paged_frame(unsigned long gfn, unsigned long *frame, struct pag return rc; } -static inline void -double_gt_lock(struct grant_table *lgt, struct grant_table *rgt) -{ - if ( lgt < rgt ) - { - spin_lock(&lgt->lock); - spin_lock(&rgt->lock); - } - else - { - if ( lgt != rgt ) - spin_lock(&rgt->lock); - spin_lock(&lgt->lock); - } -} - -static inline void -double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt) -{ - spin_unlock(&lgt->lock); - if ( lgt != rgt ) - spin_unlock(&rgt->lock); -} - static inline int __get_maptrack_handle( struct grant_table *t) @@ -505,26 +502,23 @@ static int grant_map_exists(const struct domain *ld, unsigned long mfn, unsigned int *ref_count) { - const struct active_grant_entry *act; + struct active_grant_entry *act; unsigned int ref, max_iter; ASSERT(spin_is_locked(&rgt->lock)); max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT), nr_grant_entries(rgt)); - for ( ref = *ref_count; ref < max_iter; ref++ ) + for ( ref = *ref_count; ref < max_iter; active_entry_release(act), ref++ ) { - act = &active_entry(rgt, ref); + act = active_entry_acquire(rgt, ref); - if ( !act->pin ) - continue; - - if ( act->domid != ld->domain_id ) - continue; - - if ( act->frame != mfn ) + if ( !act->pin || + act->domid != ld->domain_id || + act->frame != mfn ) continue; + active_entry_release(act); return 0; } @@ -548,11 +542,14 @@ static void mapcount( for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) { + struct active_grant_entry *act; + map = &maptrack_entry(lgt, handle); if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) || map->domid != rd->domain_id ) continue; - if ( active_entry(rd->grant_table, map->ref).frame == mfn ) + act = &_active_entry(rd->grant_table, map->ref); + if ( act->frame == mfn ) (map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++; } } @@ -576,7 +573,6 @@ __gnttab_map_grant_ref( struct page_info *pg = NULL; int rc = GNTST_okay; u32 old_pin; - u32 act_pin; unsigned int cache_flags; struct active_grant_entry *act = NULL; struct grant_mapping *mt; @@ -639,7 +635,7 @@ __gnttab_map_grant_ref( if ( unlikely(op->ref >= nr_grant_entries(rgt))) PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref); - act = &active_entry(rgt, op->ref); + act = active_entry_acquire(rgt, op->ref); shah = shared_entry_header(rgt, op->ref); if (rgt->gt_version == 1) { sha1 = &shared_entry_v1(rgt, op->ref); @@ -656,7 +652,7 @@ __gnttab_map_grant_ref( ((act->domid != ld->domain_id) || (act->pin & 0x80808080U) != 0 || (act->is_sub_page)) ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(act_release_out, GNTST_general_error, "Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n", act->domid, ld->domain_id, act->pin, act->is_sub_page); @@ -667,7 +663,7 @@ __gnttab_map_grant_ref( if ( (rc = _set_status(rgt->gt_version, ld->domain_id, op->flags & GNTMAP_readonly, 1, shah, act, status) ) != GNTST_okay ) - goto unlock_out; + goto act_release_out; if ( !act->pin ) { @@ -698,12 +694,9 @@ __gnttab_map_grant_ref( GNTPIN_hstr_inc : GNTPIN_hstw_inc; frame = act->frame; - act_pin = act->pin; cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); - spin_unlock(&rgt->lock); - /* pg may be set, with a refcount included, from __get_paged_frame */ if ( !pg ) { @@ -778,8 +771,6 @@ __gnttab_map_grant_ref( goto undo_out; } - double_gt_lock(lgt, rgt); - if ( gnttab_need_iommu_mapping(ld) ) { unsigned int wrc, rdc; @@ -787,21 +778,20 @@ __gnttab_map_grant_ref( /* We're not translated, so we know that gmfns and mfns are the same things, so the IOMMU entry is always 1-to-1. */ mapcount(lgt, rd, frame, &wrc, &rdc); - if ( (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && + if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { if ( wrc == 0 ) err = iommu_map_page(ld, frame, frame, IOMMUF_readable|IOMMUF_writable); } - else if ( act_pin && !old_pin ) + else if ( act->pin && !old_pin ) { if ( (wrc + rdc) == 0 ) err = iommu_map_page(ld, frame, frame, IOMMUF_readable); } if ( err ) { - double_gt_unlock(lgt, rgt); rc = GNTST_general_error; goto undo_out; } @@ -814,7 +804,8 @@ __gnttab_map_grant_ref( mt->ref = op->ref; mt->flags = op->flags; - double_gt_unlock(lgt, rgt); + active_entry_release(act); + spin_unlock(&rgt->lock); op->dev_bus_addr = (u64)frame << PAGE_SHIFT; op->handle = handle; @@ -837,10 +828,6 @@ __gnttab_map_grant_ref( put_page(pg); } - spin_lock(&rgt->lock); - - act = &active_entry(rgt, op->ref); - if ( op->flags & GNTMAP_device_map ) act->pin -= (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; @@ -856,6 +843,9 @@ __gnttab_map_grant_ref( if ( !act->pin ) gnttab_clear_flag(_GTF_reading, status); + act_release_out: + active_entry_release(act); + unlock_out: spin_unlock(&rgt->lock); op->status = rc; @@ -939,18 +929,19 @@ __gnttab_unmap_common( TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom); rgt = rd->grant_table; - double_gt_lock(lgt, rgt); + + spin_lock(&rgt->lock); + act = active_entry_acquire(rgt, op->map->ref); op->flags = op->map->flags; if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) ) { gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle); rc = GNTST_bad_handle; - goto unmap_out; + goto act_release_out; } op->rd = rd; - act = &active_entry(rgt, op->map->ref); if ( op->frame == 0 ) { @@ -959,7 +950,7 @@ __gnttab_unmap_common( else { if ( unlikely(op->frame != act->frame) ) - PIN_FAIL(unmap_out, GNTST_general_error, + PIN_FAIL(act_release_out, GNTST_general_error, "Bad frame number doesn't match gntref. (%lx != %lx)\n", op->frame, act->frame); if ( op->flags & GNTMAP_device_map ) @@ -978,7 +969,7 @@ __gnttab_unmap_common( if ( (rc = replace_grant_host_mapping(op->host_addr, op->frame, op->new_addr, op->flags)) < 0 ) - goto unmap_out; + goto act_release_out; ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)); op->map->flags &= ~GNTMAP_host_map; @@ -1000,7 +991,7 @@ __gnttab_unmap_common( if ( err ) { rc = GNTST_general_error; - goto unmap_out; + goto act_release_out; } } @@ -1008,8 +999,10 @@ __gnttab_unmap_common( if ( !(op->flags & GNTMAP_readonly) ) gnttab_mark_dirty(rd, op->frame); - unmap_out: - double_gt_unlock(lgt, rgt); + act_release_out: + active_entry_release(act); + spin_unlock(&rgt->lock); + op->status = rc; rcu_unlock_domain(rd); } @@ -1039,12 +1032,12 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) rcu_lock_domain(rd); rgt = rd->grant_table; - spin_lock(&rgt->lock); + spin_lock(&rgt->lock); if ( rgt->gt_version == 0 ) - goto unmap_out; + goto unlock_out; - act = &active_entry(rgt, op->map->ref); + act = active_entry_acquire(rgt, op->map->ref); sha = shared_entry_header(rgt, op->map->ref); if ( rgt->gt_version == 1 ) @@ -1058,7 +1051,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) * Suggests that __gntab_unmap_common failed early and so * nothing further to do */ - goto unmap_out; + goto act_release_out; } pg = mfn_to_page(op->frame); @@ -1082,7 +1075,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) * Suggests that __gntab_unmap_common failed in * replace_grant_host_mapping() so nothing further to do */ - goto unmap_out; + goto act_release_out; } if ( !is_iomem_page(op->frame) ) @@ -1103,8 +1096,12 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); - unmap_out: + act_release_out: + active_entry_release(act); + + unlock_out: spin_unlock(&rgt->lock); + if ( put_handle ) { op->map->flags = 0; @@ -1290,13 +1287,17 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) gt->nr_status_frames = 0; } +/* + * Grow the grant table. The caller must hold the grant table's + * write lock before calling this function. + */ int gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) { - /* d's grant table lock must be held by the caller */ + /* d's grant table write lock must be held by the caller */ struct grant_table *gt = d->grant_table; - unsigned int i; + unsigned int i, j; ASSERT(req_nr_frames <= max_grant_frames); @@ -1311,6 +1312,8 @@ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) if ( (gt->active[i] = alloc_xenheap_page()) == NULL ) goto active_alloc_failed; clear_page(gt->active[i]); + for ( j = 0; j < ACGNT_PER_PAGE; j++ ) + spin_lock_init(>->active[i][j].lock); } /* Shared */ @@ -1806,7 +1809,7 @@ __release_grant_for_copy( spin_lock(&rgt->lock); - act = &active_entry(rgt, gref); + act = active_entry_acquire(rgt, gref); sha = shared_entry_header(rgt, gref); r_frame = act->frame; @@ -1845,6 +1848,7 @@ __release_grant_for_copy( released_read = 1; } + active_entry_release(act); spin_unlock(&rgt->lock); if ( td != rd ) @@ -1906,14 +1910,14 @@ __acquire_grant_for_copy( spin_lock(&rgt->lock); if ( rgt->gt_version == 0 ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(gnt_unlock_out, GNTST_general_error, "remote grant table not ready\n"); if ( unlikely(gref >= nr_grant_entries(rgt)) ) - PIN_FAIL(unlock_out, GNTST_bad_gntref, + PIN_FAIL(gnt_unlock_out, GNTST_bad_gntref, "Bad grant reference %ld\n", gref); - act = &active_entry(rgt, gref); + act = active_entry_acquire(rgt, gref); shah = shared_entry_header(rgt, gref); if ( rgt->gt_version == 1 ) { @@ -1972,6 +1976,13 @@ __acquire_grant_for_copy( PIN_FAIL(unlock_out_clear, GNTST_general_error, "transitive grant referenced bad domain %d\n", trans_domid); + + /* + * __acquire_grant_for_copy() could take the read lock on + * the right table (if rd == td), so we have to drop the + * lock here and reacquire + */ + active_entry_release(act); spin_unlock(&rgt->lock); rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, @@ -1979,10 +1990,13 @@ __acquire_grant_for_copy( &trans_page_off, &trans_length, 0); spin_lock(&rgt->lock); + act = active_entry_acquire(rgt, gref); + if ( rc != GNTST_okay ) { __fixup_status_for_copy_pin(act, status); - rcu_unlock_domain(td); + active_entry_release(act); spin_unlock(&rgt->lock); + rcu_unlock_domain(td); return rc; } @@ -1994,6 +2008,7 @@ __acquire_grant_for_copy( { __fixup_status_for_copy_pin(act, status); rcu_unlock_domain(td); + active_entry_release(act); spin_unlock(&rgt->lock); put_page(*page); return __acquire_grant_for_copy(rd, gref, ldom, readonly, @@ -2062,6 +2077,7 @@ __acquire_grant_for_copy( *length = act->length; *frame = act->frame; + active_entry_release(act); spin_unlock(&rgt->lock); return rc; @@ -2074,7 +2090,11 @@ __acquire_grant_for_copy( gnttab_clear_flag(_GTF_reading, status); unlock_out: + active_entry_release(act); + + gnt_unlock_out: spin_unlock(&rgt->lock); + return rc; } @@ -2399,16 +2419,18 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) { for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ ) { - act = &active_entry(gt, i); + act = active_entry_acquire(gt, i); if ( act->pin != 0 ) { gdprintk(XENLOG_WARNING, "tried to change grant table version from %d to %d, but some grant entries still in use\n", gt->gt_version, op.version); + active_entry_release(act); res = -EBUSY; goto out_unlock; } + active_entry_release(act); } } @@ -2587,9 +2609,17 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) { struct domain *d = rcu_lock_current_domain(); struct grant_table *gt = d->grant_table; - struct active_grant_entry *act; + struct active_grant_entry *act_a = NULL; + struct active_grant_entry *act_b = NULL; s16 rc = GNTST_okay; + if ( ref_a == ref_b ) + /* + * noop, so avoid acquiring the same active entry + * twice which would case a deadlock. + */ + return rc; + spin_lock(>->lock); /* Bounds check on the grant refs */ @@ -2598,12 +2628,12 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) if ( unlikely(ref_b >= nr_grant_entries(d->grant_table))) PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b); - act = &active_entry(gt, ref_a); - if ( act->pin ) + act_a = active_entry_acquire(gt, ref_a); + if ( act_a->pin ) PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a); - act = &active_entry(gt, ref_b); - if ( act->pin ) + act_b = active_entry_acquire(gt, ref_b); + if ( act_b->pin ) PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b); if ( gt->gt_version == 1 ) @@ -2630,6 +2660,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) } out: + if ( act_b != NULL ) + active_entry_release(act_b); + if ( act_a != NULL ) + active_entry_release(act_a); spin_unlock(>->lock); rcu_unlock_domain(d); @@ -2939,7 +2973,7 @@ grant_table_create( struct domain *d) { struct grant_table *t; - int i; + int i, j; if ( (t = xzalloc(struct grant_table)) == NULL ) goto no_mem_0; @@ -2958,6 +2992,8 @@ grant_table_create( if ( (t->active[i] = alloc_xenheap_page()) == NULL ) goto no_mem_2; clear_page(t->active[i]); + for ( j = 0; j < ACGNT_PER_PAGE; j++ ) + spin_lock_init(&t->active[i][j].lock); } /* Tracking of mapped foreign frames table */ @@ -3054,7 +3090,7 @@ gnttab_release_mappings( rgt = rd->grant_table; spin_lock(&rgt->lock); - act = &active_entry(rgt, ref); + act = active_entry_acquire(rgt, ref); sha = shared_entry_header(rgt, ref); if (rgt->gt_version == 1) status = &sha->flags; @@ -3112,6 +3148,7 @@ gnttab_release_mappings( if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); + active_entry_release(act); spin_unlock(&rgt->lock); rcu_unlock_domain(rd); @@ -3130,7 +3167,9 @@ grant_table_destroy( if ( t == NULL ) return; - + + spin_lock(&t->lock); + for ( i = 0; i < nr_grant_frames(t); i++ ) free_xenheap_page(t->shared_raw[i]); xfree(t->shared_raw); @@ -3147,6 +3186,8 @@ grant_table_destroy( free_xenheap_page(t->status[i]); xfree(t->status); + spin_unlock(&t->lock); + xfree(t); d->grant_table = NULL; } @@ -3174,9 +3215,12 @@ static void gnttab_usage_print(struct domain *rd) uint16_t status; uint64_t frame; - act = &active_entry(gt, ref); + act = active_entry_acquire(gt, ref); if ( !act->pin ) + { + active_entry_release(act); continue; + } sha = shared_entry_header(gt, ref); @@ -3206,6 +3250,7 @@ static void gnttab_usage_print(struct domain *rd) printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n", ref, act->domid, act->frame, act->pin, sha->domid, frame, status); + active_entry_release(act); } out: -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |