[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 1/2] gnttab: Introduce rwlock to protect updates to grant table state
Split grant table lock into two separate locks. One to protect maptrack state and change the other into a rwlock. The rwlock is used to prevent readers from accessing inconsistent grant table state such as current version, partially initialized active table pages, etc. Signed-off-by: Matt Wilson <msw@xxxxxxxxxx> [chegger: ported to xen-staging, split into multiple commits] v2: * Add arm part per request from Julien Grall Signed-off-by: Christoph Egger <chegger@xxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Keir Fraser <keir@xxxxxxx> Cc: Julien Grall <julien.grall@xxxxxxxxxx> --- docs/misc/grant-tables.txt | 28 ++++++++- xen/arch/arm/mm.c | 4 +- xen/arch/x86/mm.c | 4 +- xen/common/grant_table.c | 135 ++++++++++++++++++++++++----------------- xen/include/xen/grant_table.h | 9 ++- 5 files changed, 117 insertions(+), 63 deletions(-) diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt index 19db4ec..c9ae8f2 100644 --- a/docs/misc/grant-tables.txt +++ b/docs/misc/grant-tables.txt @@ -74,7 +74,33 @@ is complete. matching map track entry is then removed, as if unmap had been invoked. These are not used by the transfer mechanism. map->domid : owner of the mapped frame - map->ref_and_flags : grant reference, ro/rw, mapped for host or device access + map->ref : grant reference + map->flags : ro/rw, mapped for host or device access + +******************************************************************************** + Locking + ~~~~~~~ + Xen uses several locks to serialize access to the internal grant table state. + + grant_table->lock : rwlock used to prevent readers from accessing + inconsistent grant table state such as current + version, partially initialized active table pages, + etc. + grant_table->maptrack_lock : spinlock used to protect the maptrack state + + The primary lock for the grant table is a read/write spinlock. All + functions that access members of struct grant_table must acquire a + read lock around critical sections. Any modification to the members + of struct grant_table (e.g., nr_status_frames, nr_grant_frames, + active frames, etc.) must only be made if the write lock is + held. These elements are read-mostly, and read critical sections can + be large, which makes a rwlock a good choice. + + The maptrack state is protected by its own spinlock. Any access (read + or write) of struct grant_table members that have a "maptrack_" + prefix must be made while holding the maptrack lock. The maptrack + state can be rapidly modified under some workloads, and the critical + sections are very small, thus we use a spinlock to protect them. ******************************************************************************** diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index 7d4ba0c..2765683 100644 --- a/xen/arch/arm/mm.c +++ b/xen/arch/arm/mm.c @@ -1037,7 +1037,7 @@ int xenmem_add_to_physmap_one( switch ( space ) { case XENMAPSPACE_grant_table: - spin_lock(&d->grant_table->lock); + write_lock(&d->grant_table->lock); if ( d->grant_table->gt_version == 0 ) d->grant_table->gt_version = 1; @@ -1067,7 +1067,7 @@ int xenmem_add_to_physmap_one( t = p2m_ram_rw; - spin_unlock(&d->grant_table->lock); + write_unlock(&d->grant_table->lock); break; case XENMAPSPACE_shared_info: if ( idx != 0 ) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 522c43d..37c13b1 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4565,7 +4565,7 @@ int xenmem_add_to_physmap_one( mfn = virt_to_mfn(d->shared_info); break; case XENMAPSPACE_grant_table: - spin_lock(&d->grant_table->lock); + write_lock(&d->grant_table->lock); if ( d->grant_table->gt_version == 0 ) d->grant_table->gt_version = 1; @@ -4587,7 +4587,7 @@ int xenmem_add_to_physmap_one( mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); } - spin_unlock(&d->grant_table->lock); + write_unlock(&d->grant_table->lock); break; case XENMAPSPACE_gmfn_range: case XENMAPSPACE_gmfn: diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 8fba923..018b5b6 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -227,23 +227,23 @@ double_gt_lock(struct grant_table *lgt, struct grant_table *rgt) { if ( lgt < rgt ) { - spin_lock(&lgt->lock); - spin_lock(&rgt->lock); + spin_lock(&lgt->maptrack_lock); + spin_lock(&rgt->maptrack_lock); } else { if ( lgt != rgt ) - spin_lock(&rgt->lock); - spin_lock(&lgt->lock); + spin_lock(&rgt->maptrack_lock); + spin_lock(&lgt->maptrack_lock); } } static inline void double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt) { - spin_unlock(&lgt->lock); + spin_unlock(&lgt->maptrack_lock); if ( lgt != rgt ) - spin_unlock(&rgt->lock); + spin_unlock(&rgt->maptrack_lock); } static inline int @@ -261,10 +261,10 @@ static inline void put_maptrack_handle( struct grant_table *t, int handle) { - spin_lock(&t->lock); + spin_lock(&t->maptrack_lock); maptrack_entry(t, handle).ref = t->maptrack_head; t->maptrack_head = handle; - spin_unlock(&t->lock); + spin_unlock(&t->maptrack_lock); } static inline int @@ -276,7 +276,7 @@ get_maptrack_handle( struct grant_mapping *new_mt; unsigned int new_mt_limit, nr_frames; - spin_lock(&lgt->lock); + spin_lock(&lgt->maptrack_lock); while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) ) { @@ -305,7 +305,7 @@ get_maptrack_handle( nr_frames + 1); } - spin_unlock(&lgt->lock); + spin_unlock(&lgt->maptrack_lock); return handle; } @@ -502,7 +502,7 @@ static int grant_map_exists(const struct domain *ld, const struct active_grant_entry *act; unsigned int ref, max_iter; - ASSERT(spin_is_locked(&rgt->lock)); + ASSERT(rw_is_locked(&rgt->lock)); max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT), nr_grant_entries(rgt)); @@ -623,7 +623,7 @@ __gnttab_map_grant_ref( } rgt = rd->grant_table; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) PIN_FAIL(unlock_out, GNTST_general_error, @@ -696,7 +696,7 @@ __gnttab_map_grant_ref( cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); /* pg may be set, with a refcount included, from __get_paged_frame */ if ( !pg ) @@ -831,7 +831,7 @@ __gnttab_map_grant_ref( put_page(pg); } - spin_lock(&rgt->lock); + read_lock(&rgt->lock); act = &active_entry(rgt, op->ref); @@ -851,7 +851,7 @@ __gnttab_map_grant_ref( gnttab_clear_flag(_GTF_reading, status); unlock_out: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); op->status = rc; put_maptrack_handle(lgt, handle); rcu_unlock_domain(rd); @@ -891,28 +891,28 @@ __gnttab_unmap_common( ld = current->domain; lgt = ld->grant_table; + read_lock(&lgt->lock); op->frame = (unsigned long)(op->dev_bus_addr >> PAGE_SHIFT); if ( unlikely(op->handle >= lgt->maptrack_limit) ) { + read_unlock(&lgt->lock); gdprintk(XENLOG_INFO, "Bad handle (%d).\n", op->handle); op->status = GNTST_bad_handle; return; } + read_unlock(&lgt->lock); op->map = &maptrack_entry(lgt, op->handle); - spin_lock(&lgt->lock); if ( unlikely(!op->map->flags) ) { - spin_unlock(&lgt->lock); gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle); op->status = GNTST_bad_handle; return; } dom = op->map->domid; - spin_unlock(&lgt->lock); if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) ) { @@ -944,6 +944,7 @@ __gnttab_unmap_common( } op->rd = rd; + read_lock(&rgt->lock); act = &active_entry(rgt, op->map->ref); if ( op->frame == 0 ) @@ -1004,6 +1005,7 @@ __gnttab_unmap_common( unmap_out: double_gt_unlock(lgt, rgt); + read_unlock(&rgt->lock); op->status = rc; rcu_unlock_domain(rd); } @@ -1033,8 +1035,8 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) rcu_lock_domain(rd); rgt = rd->grant_table; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) goto unmap_out; @@ -1098,7 +1100,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) gnttab_clear_flag(_GTF_reading, status); unmap_out: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); if ( put_handle ) { op->map->flags = 0; @@ -1284,10 +1286,13 @@ gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) gt->nr_status_frames = 0; } +/* Grow the grant table. The caller must hold the grant table's + * write lock before calling this function. + */ int gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) { - /* d's grant table lock must be held by the caller */ + /* d's grant table write lock must be held by the caller */ struct grant_table *gt = d->grant_table; unsigned int i; @@ -1392,7 +1397,7 @@ gnttab_setup_table( } gt = d->grant_table; - spin_lock(>->lock); + write_lock(>->lock); if ( gt->gt_version == 0 ) gt->gt_version = 1; @@ -1420,7 +1425,7 @@ gnttab_setup_table( } out3: - spin_unlock(>->lock); + write_unlock(>->lock); out2: rcu_unlock_domain(d); out1: @@ -1462,13 +1467,13 @@ gnttab_query_size( goto query_out_unlock; } - spin_lock(&d->grant_table->lock); + read_lock(&d->grant_table->lock); op.nr_frames = nr_grant_frames(d->grant_table); op.max_nr_frames = max_grant_frames; op.status = GNTST_okay; - spin_unlock(&d->grant_table->lock); + read_unlock(&d->grant_table->lock); query_out_unlock: @@ -1494,7 +1499,7 @@ gnttab_prepare_for_transfer( union grant_combo scombo, prev_scombo, new_scombo; int retries = 0; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) { @@ -1545,11 +1550,11 @@ gnttab_prepare_for_transfer( scombo = prev_scombo; } - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return 1; fail: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return 0; } @@ -1741,7 +1746,7 @@ gnttab_transfer( TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id); /* Tell the guest about its new page frame. */ - spin_lock(&e->grant_table->lock); + read_lock(&e->grant_table->lock); if ( e->grant_table->gt_version == 1 ) { @@ -1759,7 +1764,7 @@ gnttab_transfer( shared_entry_header(e->grant_table, gop.ref)->flags |= GTF_transfer_completed; - spin_unlock(&e->grant_table->lock); + read_unlock(&e->grant_table->lock); rcu_unlock_domain(e); @@ -1797,7 +1802,7 @@ __release_grant_for_copy( released_read = 0; released_write = 0; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); act = &active_entry(rgt, gref); sha = shared_entry_header(rgt, gref); @@ -1838,7 +1843,7 @@ __release_grant_for_copy( released_read = 1; } - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); if ( td != rd ) { @@ -1896,7 +1901,7 @@ __acquire_grant_for_copy( *page = NULL; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rgt->gt_version == 0 ) PIN_FAIL(unlock_out, GNTST_general_error, @@ -1965,17 +1970,21 @@ __acquire_grant_for_copy( PIN_FAIL(unlock_out_clear, GNTST_general_error, "transitive grant referenced bad domain %d\n", trans_domid); - spin_unlock(&rgt->lock); + + /* __acquire_grant_for_copy() could take the read lock on + * the right table (if rd == td), so we have to drop the + * lock here and reacquire */ + read_unlock(&rgt->lock); rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, readonly, &grant_frame, page, &trans_page_off, &trans_length, 0); - spin_lock(&rgt->lock); + read_lock(&rgt->lock); if ( rc != GNTST_okay ) { __fixup_status_for_copy_pin(act, status); + read_unlock(&rgt->lock); rcu_unlock_domain(td); - spin_unlock(&rgt->lock); return rc; } @@ -1987,7 +1996,7 @@ __acquire_grant_for_copy( { __fixup_status_for_copy_pin(act, status); rcu_unlock_domain(td); - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); put_page(*page); return __acquire_grant_for_copy(rd, gref, ldom, readonly, frame, page, page_off, length, @@ -2055,7 +2064,7 @@ __acquire_grant_for_copy( *length = act->length; *frame = act->frame; - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return rc; unlock_out_clear: @@ -2067,7 +2076,7 @@ __acquire_grant_for_copy( gnttab_clear_flag(_GTF_reading, status); unlock_out: - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); return rc; } @@ -2241,7 +2250,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) if ( gt->gt_version == op.version ) goto out; - spin_lock(>->lock); + write_lock(>->lock); /* Make sure that the grant table isn't currently in use when we change the version number, except for the first 8 entries which are allowed to be in use (xenstore/xenconsole keeps them mapped). @@ -2327,7 +2336,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop) gt->gt_version = op.version; out_unlock: - spin_unlock(>->lock); + write_unlock(>->lock); out: op.version = gt->gt_version; @@ -2383,7 +2392,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop, op.status = GNTST_okay; - spin_lock(>->lock); + read_lock(>->lock); for ( i = 0; i < op.nr_frames; i++ ) { @@ -2392,7 +2401,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop, op.status = GNTST_bad_virt_addr; } - spin_unlock(>->lock); + read_unlock(>->lock); out2: rcu_unlock_domain(d); out1: @@ -2441,7 +2450,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) struct active_grant_entry *act; s16 rc = GNTST_okay; - spin_lock(>->lock); + read_lock(>->lock); /* Bounds check on the grant refs */ if ( unlikely(ref_a >= nr_grant_entries(d->grant_table))) @@ -2481,7 +2490,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b) } out: - spin_unlock(>->lock); + read_unlock(>->lock); rcu_unlock_domain(d); @@ -2501,8 +2510,19 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop, return i; if ( unlikely(__copy_from_guest(&op, uop, 1)) ) return -EFAULT; - op.status = __gnttab_swap_grant_ref(op.ref_a, op.ref_b); - if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) + if ( unlikely(op.ref_a == op.ref_b) ) + { + /* noop, so avoid acquiring the same active entry + * twice in __gnttab_swap_grant_ref(), which would + * case a deadlock. + */ + op.status = GNTST_okay; + } + else + { + op.status = __gnttab_swap_grant_ref(op.ref_a, op.ref_b); + } + if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) ) return -EFAULT; guest_handle_add_offset(uop, 1); } @@ -2552,12 +2572,12 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t *cflush, if ( d != owner ) { - spin_lock(&owner->grant_table->lock); + read_lock(&owner->grant_table->lock); ret = grant_map_exists(d, owner->grant_table, mfn, ref_count); if ( ret != 0 ) { - spin_unlock(&owner->grant_table->lock); + read_unlock(&owner->grant_table->lock); rcu_unlock_domain(d); put_page(page); return ret; @@ -2577,7 +2597,7 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t *cflush, ret = 0; if ( d != owner ) - spin_unlock(&owner->grant_table->lock); + read_unlock(&owner->grant_table->lock); unmap_domain_page(v); put_page(page); @@ -2793,7 +2813,8 @@ grant_table_create( goto no_mem_0; /* Simple stuff. */ - spin_lock_init(&t->lock); + rwlock_init(&t->lock); + spin_lock_init(&t->maptrack_lock); t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES; /* Active grant table. */ @@ -2900,7 +2921,7 @@ gnttab_release_mappings( } rgt = rd->grant_table; - spin_lock(&rgt->lock); + read_lock(&rgt->lock); act = &active_entry(rgt, ref); sha = shared_entry_header(rgt, ref); @@ -2960,7 +2981,7 @@ gnttab_release_mappings( if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); - spin_unlock(&rgt->lock); + read_unlock(&rgt->lock); rcu_unlock_domain(rd); @@ -2978,6 +2999,8 @@ grant_table_destroy( if ( t == NULL ) return; + + write_lock(&t->lock); for ( i = 0; i < nr_grant_frames(t); i++ ) free_xenheap_page(t->shared_raw[i]); @@ -2995,6 +3018,8 @@ grant_table_destroy( free_xenheap_page(t->status[i]); xfree(t->status); + write_unlock(&t->lock); + xfree(t); d->grant_table = NULL; } @@ -3008,7 +3033,7 @@ static void gnttab_usage_print(struct domain *rd) printk(" -------- active -------- -------- shared --------\n"); printk("[ref] localdom mfn pin localdom gmfn flags\n"); - spin_lock(>->lock); + read_lock(>->lock); if ( gt->gt_version == 0 ) goto out; @@ -3057,7 +3082,7 @@ static void gnttab_usage_print(struct domain *rd) } out: - spin_unlock(>->lock); + read_unlock(>->lock); if ( first ) printk("grant-table for remote domain:%5d ... " diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index 32f5786..ca49b41 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -82,8 +82,11 @@ struct grant_table { struct grant_mapping **maptrack; unsigned int maptrack_head; unsigned int maptrack_limit; - /* Lock protecting updates to active and shared grant tables. */ - spinlock_t lock; + /* Lock protecting the maptrack page list, head, and limit */ + spinlock_t maptrack_lock; + /* Lock protecting updates to grant table state (version, active + entry list, etc.) */ + rwlock_t lock; /* The defined versions are 1 and 2. Set to 0 if we don't know what version to use yet. */ unsigned gt_version; @@ -101,7 +104,7 @@ gnttab_release_mappings( struct domain *d); /* Increase the size of a domain's grant table. - * Caller must hold d's grant table lock. + * Caller must hold d's grant table write lock. */ int gnttab_grow_table(struct domain *d, unsigned int req_nr_frames); -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |