|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 1/2] gnttab: Introduce rwlock to protect updates to grant table state
Split grant table lock into two separate locks. One to protect
maptrack state and change the other into a rwlock.
The rwlock is used to prevent readers from accessing
inconsistent grant table state such as current
version, partially initialized active table pages,
etc.
Signed-off-by: Matt Wilson <msw@xxxxxxxxxx>
[chegger: ported to xen-staging, split into multiple commits]
v5:
* Addressed locking issue pointed out by Jan Beulich
* Fixed git rebase merge issue introduced in v4
(acquiring locking twice)
* Coding style fixes
v4:
* Coding style nits from Jan Beulich
* Fixup read locks pointed out by Jan Beulich
* renamed double_gt_(un)lock to double_maptrack_(un)lock
per request from Jan Beulich
v3:
* Addressed gnttab_swap_grant_ref() comment from Andrew Cooper
v2:
* Add arm part per request from Julien Grall
Signed-off-by: Christoph Egger <chegger@xxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Julien Grall <julien.grall@xxxxxxxxxx>
Adjust locking
---
docs/misc/grant-tables.txt | 28 +++++++-
xen/arch/arm/mm.c | 4 +-
xen/arch/x86/mm.c | 4 +-
xen/common/grant_table.c | 146 ++++++++++++++++++++++++-----------------
xen/include/xen/grant_table.h | 17 +++--
5 files changed, 127 insertions(+), 72 deletions(-)
diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
index 19db4ec..c9ae8f2 100644
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -74,7 +74,33 @@ is complete.
matching map track entry is then removed, as if unmap had been invoked.
These are not used by the transfer mechanism.
map->domid : owner of the mapped frame
- map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+ map->ref : grant reference
+ map->flags : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+ grant_table->lock : rwlock used to prevent readers from accessing
+ inconsistent grant table state such as current
+ version, partially initialized active table
pages,
+ etc.
+ grant_table->maptrack_lock : spinlock used to protect the maptrack state
+
+ The primary lock for the grant table is a read/write spinlock. All
+ functions that access members of struct grant_table must acquire a
+ read lock around critical sections. Any modification to the members
+ of struct grant_table (e.g., nr_status_frames, nr_grant_frames,
+ active frames, etc.) must only be made if the write lock is
+ held. These elements are read-mostly, and read critical sections can
+ be large, which makes a rwlock a good choice.
+
+ The maptrack state is protected by its own spinlock. Any access (read
+ or write) of struct grant_table members that have a "maptrack_"
+ prefix must be made while holding the maptrack lock. The maptrack
+ state can be rapidly modified under some workloads, and the critical
+ sections are very small, thus we use a spinlock to protect them.
********************************************************************************
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 7d4ba0c..2765683 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1037,7 +1037,7 @@ int xenmem_add_to_physmap_one(
switch ( space )
{
case XENMAPSPACE_grant_table:
- spin_lock(&d->grant_table->lock);
+ write_lock(&d->grant_table->lock);
if ( d->grant_table->gt_version == 0 )
d->grant_table->gt_version = 1;
@@ -1067,7 +1067,7 @@ int xenmem_add_to_physmap_one(
t = p2m_ram_rw;
- spin_unlock(&d->grant_table->lock);
+ write_unlock(&d->grant_table->lock);
break;
case XENMAPSPACE_shared_info:
if ( idx != 0 )
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index d4965da..af4762d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4569,7 +4569,7 @@ int xenmem_add_to_physmap_one(
mfn = virt_to_mfn(d->shared_info);
break;
case XENMAPSPACE_grant_table:
- spin_lock(&d->grant_table->lock);
+ write_lock(&d->grant_table->lock);
if ( d->grant_table->gt_version == 0 )
d->grant_table->gt_version = 1;
@@ -4591,7 +4591,7 @@ int xenmem_add_to_physmap_one(
mfn = virt_to_mfn(d->grant_table->shared_raw[idx]);
}
- spin_unlock(&d->grant_table->lock);
+ write_unlock(&d->grant_table->lock);
break;
case XENMAPSPACE_gmfn_range:
case XENMAPSPACE_gmfn:
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 1a11766..f52a51d 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -229,27 +229,27 @@ static int __get_paged_frame(unsigned long gfn, unsigned
long *frame, struct pag
}
static inline void
-double_gt_lock(struct grant_table *lgt, struct grant_table *rgt)
+double_maptrack_lock(struct grant_table *lgt, struct grant_table *rgt)
{
if ( lgt < rgt )
{
- spin_lock(&lgt->lock);
- spin_lock(&rgt->lock);
+ spin_lock(&lgt->maptrack_lock);
+ spin_lock(&rgt->maptrack_lock);
}
else
{
if ( lgt != rgt )
- spin_lock(&rgt->lock);
- spin_lock(&lgt->lock);
+ spin_lock(&rgt->maptrack_lock);
+ spin_lock(&lgt->maptrack_lock);
}
}
static inline void
-double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
+double_maptrack_unlock(struct grant_table *lgt, struct grant_table *rgt)
{
- spin_unlock(&lgt->lock);
+ spin_unlock(&lgt->maptrack_lock);
if ( lgt != rgt )
- spin_unlock(&rgt->lock);
+ spin_unlock(&rgt->maptrack_lock);
}
static inline int
@@ -267,10 +267,10 @@ static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
{
- spin_lock(&t->lock);
+ spin_lock(&t->maptrack_lock);
maptrack_entry(t, handle).ref = t->maptrack_head;
t->maptrack_head = handle;
- spin_unlock(&t->lock);
+ spin_unlock(&t->maptrack_lock);
}
static inline int
@@ -282,7 +282,7 @@ get_maptrack_handle(
struct grant_mapping *new_mt;
unsigned int new_mt_limit, nr_frames;
- spin_lock(&lgt->lock);
+ spin_lock(&lgt->maptrack_lock);
while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
{
@@ -311,7 +311,7 @@ get_maptrack_handle(
nr_frames + 1);
}
- spin_unlock(&lgt->lock);
+ spin_unlock(&lgt->maptrack_lock);
return handle;
}
@@ -508,7 +508,7 @@ static int grant_map_exists(const struct domain *ld,
const struct active_grant_entry *act;
unsigned int ref, max_iter;
- ASSERT(spin_is_locked(&rgt->lock));
+ ASSERT(rw_is_locked(&rgt->lock));
max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
nr_grant_entries(rgt));
@@ -629,7 +629,7 @@ __gnttab_map_grant_ref(
}
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
PIN_FAIL(unlock_out, GNTST_general_error,
@@ -702,8 +702,6 @@ __gnttab_map_grant_ref(
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
- spin_unlock(&rgt->lock);
-
/* pg may be set, with a refcount included, from __get_paged_frame */
if ( !pg )
{
@@ -778,7 +776,7 @@ __gnttab_map_grant_ref(
goto undo_out;
}
- double_gt_lock(lgt, rgt);
+ double_maptrack_lock(lgt, rgt);
if ( gnttab_need_iommu_mapping(ld) )
{
@@ -801,7 +799,7 @@ __gnttab_map_grant_ref(
}
if ( err )
{
- double_gt_unlock(lgt, rgt);
+ double_maptrack_unlock(lgt, rgt);
rc = GNTST_general_error;
goto undo_out;
}
@@ -814,7 +812,8 @@ __gnttab_map_grant_ref(
mt->ref = op->ref;
mt->flags = op->flags;
- double_gt_unlock(lgt, rgt);
+ double_maptrack_unlock(lgt, rgt);
+ read_unlock(&rgt->lock);
op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
op->handle = handle;
@@ -837,7 +836,7 @@ __gnttab_map_grant_ref(
put_page(pg);
}
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = &active_entry(rgt, op->ref);
@@ -857,7 +856,7 @@ __gnttab_map_grant_ref(
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
op->status = rc;
put_maptrack_handle(lgt, handle);
rcu_unlock_domain(rd);
@@ -907,18 +906,19 @@ __gnttab_unmap_common(
}
op->map = &maptrack_entry(lgt, op->handle);
- spin_lock(&lgt->lock);
+
+ read_lock(&lgt->lock);
if ( unlikely(!op->map->flags) )
{
- spin_unlock(&lgt->lock);
+ read_unlock(&lgt->lock);
gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
op->status = GNTST_bad_handle;
return;
}
dom = op->map->domid;
- spin_unlock(&lgt->lock);
+ read_unlock(&lgt->lock);
if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
{
@@ -939,7 +939,9 @@ __gnttab_unmap_common(
TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom);
rgt = rd->grant_table;
- double_gt_lock(lgt, rgt);
+
+ read_lock(&rgt->lock);
+ double_maptrack_lock(lgt, rgt);
op->flags = op->map->flags;
if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) )
@@ -1009,7 +1011,9 @@ __gnttab_unmap_common(
gnttab_mark_dirty(rd, op->frame);
unmap_out:
- double_gt_unlock(lgt, rgt);
+ double_maptrack_unlock(lgt, rgt);
+ read_unlock(&rgt->lock);
+
op->status = rc;
rcu_unlock_domain(rd);
}
@@ -1039,8 +1043,8 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
rcu_lock_domain(rd);
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
goto unmap_out;
@@ -1104,7 +1108,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
gnttab_clear_flag(_GTF_reading, status);
unmap_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
if ( put_handle )
{
op->map->flags = 0;
@@ -1290,10 +1294,14 @@ gnttab_unpopulate_status_frames(struct domain *d,
struct grant_table *gt)
gt->nr_status_frames = 0;
}
+/*
+ * Grow the grant table. The caller must hold the grant table's
+ * write lock before calling this function.
+ */
int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
{
- /* d's grant table lock must be held by the caller */
+ /* d's grant table write lock must be held by the caller */
struct grant_table *gt = d->grant_table;
unsigned int i;
@@ -1398,7 +1406,7 @@ gnttab_setup_table(
}
gt = d->grant_table;
- spin_lock(>->lock);
+ write_lock(>->lock);
if ( gt->gt_version == 0 )
gt->gt_version = 1;
@@ -1426,7 +1434,7 @@ gnttab_setup_table(
}
out3:
- spin_unlock(>->lock);
+ write_unlock(>->lock);
out2:
rcu_unlock_domain(d);
out1:
@@ -1468,13 +1476,13 @@ gnttab_query_size(
goto query_out_unlock;
}
- spin_lock(&d->grant_table->lock);
+ read_lock(&d->grant_table->lock);
op.nr_frames = nr_grant_frames(d->grant_table);
op.max_nr_frames = max_grant_frames;
op.status = GNTST_okay;
- spin_unlock(&d->grant_table->lock);
+ read_unlock(&d->grant_table->lock);
query_out_unlock:
@@ -1500,7 +1508,7 @@ gnttab_prepare_for_transfer(
union grant_combo scombo, prev_scombo, new_scombo;
int retries = 0;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
{
@@ -1551,11 +1559,11 @@ gnttab_prepare_for_transfer(
scombo = prev_scombo;
}
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return 1;
fail:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return 0;
}
@@ -1747,7 +1755,7 @@ gnttab_transfer(
TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
/* Tell the guest about its new page frame. */
- spin_lock(&e->grant_table->lock);
+ read_lock(&e->grant_table->lock);
if ( e->grant_table->gt_version == 1 )
{
@@ -1765,7 +1773,7 @@ gnttab_transfer(
shared_entry_header(e->grant_table, gop.ref)->flags |=
GTF_transfer_completed;
- spin_unlock(&e->grant_table->lock);
+ read_unlock(&e->grant_table->lock);
rcu_unlock_domain(e);
@@ -1803,7 +1811,7 @@ __release_grant_for_copy(
released_read = 0;
released_write = 0;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = &active_entry(rgt, gref);
sha = shared_entry_header(rgt, gref);
@@ -1844,7 +1852,7 @@ __release_grant_for_copy(
released_read = 1;
}
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
if ( td != rd )
{
@@ -1902,7 +1910,7 @@ __acquire_grant_for_copy(
*page = NULL;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
PIN_FAIL(unlock_out, GNTST_general_error,
@@ -1971,17 +1979,23 @@ __acquire_grant_for_copy(
PIN_FAIL(unlock_out_clear, GNTST_general_error,
"transitive grant referenced bad domain %d\n",
trans_domid);
- spin_unlock(&rgt->lock);
+
+ /*
+ * __acquire_grant_for_copy() could take the read lock on
+ * the right table (if rd == td), so we have to drop the
+ * lock here and reacquire
+ */
+ read_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
readonly, &grant_frame, page,
&trans_page_off, &trans_length, 0);
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
+ read_unlock(&rgt->lock);
rcu_unlock_domain(td);
- spin_unlock(&rgt->lock);
return rc;
}
@@ -1993,7 +2007,7 @@ __acquire_grant_for_copy(
{
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
frame, page, page_off, length,
@@ -2061,7 +2075,7 @@ __acquire_grant_for_copy(
*length = act->length;
*frame = act->frame;
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
unlock_out_clear:
@@ -2073,7 +2087,8 @@ __acquire_grant_for_copy(
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
+
return rc;
}
@@ -2389,7 +2404,7 @@
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
if ( gt->gt_version == op.version )
goto out;
- spin_lock(>->lock);
+ write_lock(>->lock);
/* Make sure that the grant table isn't currently in use when we
change the version number, except for the first 8 entries which
are allowed to be in use (xenstore/xenconsole keeps them mapped).
@@ -2475,7 +2490,7 @@
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
gt->gt_version = op.version;
out_unlock:
- spin_unlock(>->lock);
+ write_unlock(>->lock);
out:
op.version = gt->gt_version;
@@ -2531,7 +2546,7 @@
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
op.status = GNTST_okay;
- spin_lock(>->lock);
+ read_lock(>->lock);
for ( i = 0; i < op.nr_frames; i++ )
{
@@ -2540,7 +2555,7 @@
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
op.status = GNTST_bad_virt_addr;
}
- spin_unlock(>->lock);
+ read_unlock(>->lock);
out2:
rcu_unlock_domain(d);
out1:
@@ -2589,7 +2604,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t
ref_b)
struct active_grant_entry *act;
s16 rc = GNTST_okay;
- spin_lock(>->lock);
+ write_lock(>->lock);
/* Bounds check on the grant refs */
if ( unlikely(ref_a >= nr_grant_entries(d->grant_table)))
@@ -2629,7 +2644,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t
ref_b)
}
out:
- spin_unlock(>->lock);
+ write_unlock(>->lock);
rcu_unlock_domain(d);
@@ -2700,12 +2715,12 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t
*cflush,
if ( d != owner )
{
- spin_lock(&owner->grant_table->lock);
+ read_lock(&owner->grant_table->lock);
ret = grant_map_exists(d, owner->grant_table, mfn, ref_count);
if ( ret != 0 )
{
- spin_unlock(&owner->grant_table->lock);
+ read_unlock(&owner->grant_table->lock);
rcu_unlock_domain(d);
put_page(page);
return ret;
@@ -2725,7 +2740,7 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t
*cflush,
ret = 0;
if ( d != owner )
- spin_unlock(&owner->grant_table->lock);
+ read_unlock(&owner->grant_table->lock);
unmap_domain_page(v);
put_page(page);
@@ -2944,7 +2959,8 @@ grant_table_create(
goto no_mem_0;
/* Simple stuff. */
- spin_lock_init(&t->lock);
+ rwlock_init(&t->lock);
+ spin_lock_init(&t->maptrack_lock);
t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
/* Active grant table. */
@@ -3051,7 +3067,8 @@ gnttab_release_mappings(
}
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
+ double_maptrack_lock(gt, rgt);
act = &active_entry(rgt, ref);
sha = shared_entry_header(rgt, ref);
@@ -3111,7 +3128,8 @@ gnttab_release_mappings(
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
- spin_unlock(&rgt->lock);
+ double_maptrack_unlock(gt, rgt);
+ read_unlock(&rgt->lock);
rcu_unlock_domain(rd);
@@ -3129,7 +3147,9 @@ grant_table_destroy(
if ( t == NULL )
return;
-
+
+ write_lock(&t->lock);
+
for ( i = 0; i < nr_grant_frames(t); i++ )
free_xenheap_page(t->shared_raw[i]);
xfree(t->shared_raw);
@@ -3146,6 +3166,8 @@ grant_table_destroy(
free_xenheap_page(t->status[i]);
xfree(t->status);
+ write_unlock(&t->lock);
+
xfree(t);
d->grant_table = NULL;
}
@@ -3159,7 +3181,7 @@ static void gnttab_usage_print(struct domain *rd)
printk(" -------- active -------- -------- shared --------\n");
printk("[ref] localdom mfn pin localdom gmfn flags\n");
- spin_lock(>->lock);
+ read_lock(>->lock);
if ( gt->gt_version == 0 )
goto out;
@@ -3208,7 +3230,7 @@ static void gnttab_usage_print(struct domain *rd)
}
out:
- spin_unlock(>->lock);
+ read_unlock(>->lock);
if ( first )
printk("grant-table for remote domain:%5d ... "
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 32f5786..298031e 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -82,10 +82,17 @@ struct grant_table {
struct grant_mapping **maptrack;
unsigned int maptrack_head;
unsigned int maptrack_limit;
- /* Lock protecting updates to active and shared grant tables. */
- spinlock_t lock;
- /* The defined versions are 1 and 2. Set to 0 if we don't know
- what version to use yet. */
+ /* Lock protecting the maptrack page list, head, and limit */
+ spinlock_t maptrack_lock;
+ /*
+ * Lock protecting updates to grant table state (version, active
+ * entry list, etc.)
+ */
+ rwlock_t lock;
+ /*
+ * The defined versions are 1 and 2. Set to 0 if we don't know
+ * what version to use yet.
+ */
unsigned gt_version;
};
@@ -101,7 +108,7 @@ gnttab_release_mappings(
struct domain *d);
/* Increase the size of a domain's grant table.
- * Caller must hold d's grant table lock.
+ * Caller must hold d's grant table write lock.
*/
int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames);
--
1.7.9.5
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |