|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCHv2 2/3] grant_table: convert grant table rwlock to percpu rwlock
The per domain grant table read lock suffers from significant contention when
performance multi-queue block or network IO due to the parallel
grant map/unmaps/copies occurring on the DomU's grant table.
On multi-socket systems, the contention results in the locked compare swap
operation failing frequently which results in a tight loop of retries of the
compare swap operation. As the coherency fabric can only support a specific
rate of compare swap operations for a particular data location then taking
the read lock itself becomes a bottleneck for grant operations.
Standard rwlock performance of a single VIF VM-VM transfer with 16 queues
configured was limited to approximately 15 gbit/s on a 2 socket Haswell-EP
host.
Percpu rwlock performance with the same configuration is approximately
48 gbit/s.
Oprofile was used to determine the initial overhead of the read-write locks
and to confirm the overhead was dramatically reduced by the percpu rwlocks.
Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
--
Changes since v1:
- Used new macros provided in updated percpu rwlock v2 patch
- Converted grant table rwlock_t to percpu_rwlock_t
- Patched a missed grant table rwlock_t usage site
---
xen/arch/arm/mm.c | 4 +-
xen/arch/x86/mm.c | 4 +-
xen/common/grant_table.c | 112 +++++++++++++++++++++---------------------
xen/include/xen/grant_table.h | 4 +-
4 files changed, 62 insertions(+), 62 deletions(-)
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 8b6d915..238bb5f 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1055,7 +1055,7 @@ int xenmem_add_to_physmap_one(
switch ( space )
{
case XENMAPSPACE_grant_table:
- write_lock(&d->grant_table->lock);
+ percpu_write_lock(grant_rwlock, &d->grant_table->lock);
if ( d->grant_table->gt_version == 0 )
d->grant_table->gt_version = 1;
@@ -1085,7 +1085,7 @@ int xenmem_add_to_physmap_one(
t = p2m_ram_rw;
- write_unlock(&d->grant_table->lock);
+ percpu_write_unlock(grant_rwlock, &d->grant_table->lock);
break;
case XENMAPSPACE_shared_info:
if ( idx != 0 )
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 92df36f..0ac8e66 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4654,7 +4654,7 @@ int xenmem_add_to_physmap_one(
mfn = virt_to_mfn(d->shared_info);
break;
case XENMAPSPACE_grant_table:
- write_lock(&d->grant_table->lock);
+ percpu_write_lock(grant_rwlock, &d->grant_table->lock);
if ( d->grant_table->gt_version == 0 )
d->grant_table->gt_version = 1;
@@ -4676,7 +4676,7 @@ int xenmem_add_to_physmap_one(
mfn = virt_to_mfn(d->grant_table->shared_raw[idx]);
}
- write_unlock(&d->grant_table->lock);
+ percpu_write_unlock(grant_rwlock, &d->grant_table->lock);
break;
case XENMAPSPACE_gmfn_range:
case XENMAPSPACE_gmfn:
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 5d52d1e..e6f9c25 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -178,6 +178,8 @@ struct active_grant_entry {
#define _active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
+DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
+
static inline void gnttab_flush_tlb(const struct domain *d)
{
if ( !paging_mode_external(d) )
@@ -208,8 +210,6 @@ active_entry_acquire(struct grant_table *t, grant_ref_t e)
{
struct active_grant_entry *act;
- ASSERT(rw_is_locked(&t->lock));
-
act = &_active_entry(t, e);
spin_lock(&act->lock);
@@ -270,23 +270,23 @@ double_gt_lock(struct grant_table *lgt, struct
grant_table *rgt)
*/
if ( lgt < rgt )
{
- write_lock(&lgt->lock);
- write_lock(&rgt->lock);
+ percpu_write_lock(grant_rwlock, &lgt->lock);
+ percpu_write_lock(grant_rwlock, &rgt->lock);
}
else
{
if ( lgt != rgt )
- write_lock(&rgt->lock);
- write_lock(&lgt->lock);
+ percpu_write_lock(grant_rwlock, &rgt->lock);
+ percpu_write_lock(grant_rwlock, &lgt->lock);
}
}
static inline void
double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
{
- write_unlock(&lgt->lock);
+ percpu_write_unlock(grant_rwlock, &lgt->lock);
if ( lgt != rgt )
- write_unlock(&rgt->lock);
+ percpu_write_unlock(grant_rwlock, &rgt->lock);
}
static inline int
@@ -659,8 +659,6 @@ static int grant_map_exists(const struct domain *ld,
unsigned int *ref_count)
{
unsigned int ref, max_iter;
-
- ASSERT(rw_is_locked(&rgt->lock));
max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
nr_grant_entries(rgt));
@@ -703,12 +701,12 @@ static unsigned int mapkind(
* Must have the local domain's grant table write lock when
* iterating over its maptrack entries.
*/
- ASSERT(rw_is_write_locked(&lgt->lock));
+ ASSERT(percpu_rw_is_write_locked(&lgt->lock));
/*
* Must have the remote domain's grant table write lock while
* counting its active entries.
*/
- ASSERT(rw_is_write_locked(&rd->grant_table->lock));
+ ASSERT(percpu_rw_is_write_locked(&rd->grant_table->lock));
for ( handle = 0; !(kind & MAPKIND_WRITE) &&
handle < lgt->maptrack_limit; handle++ )
@@ -796,7 +794,7 @@ __gnttab_map_grant_ref(
}
rgt = rd->grant_table;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
/* Bounds check on the grant ref */
if ( unlikely(op->ref >= nr_grant_entries(rgt)))
@@ -859,7 +857,7 @@ __gnttab_map_grant_ref(
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
/* pg may be set, with a refcount included, from __get_paged_frame */
if ( !pg )
@@ -1006,7 +1004,7 @@ __gnttab_map_grant_ref(
put_page(pg);
}
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
act = active_entry_acquire(rgt, op->ref);
@@ -1029,7 +1027,7 @@ __gnttab_map_grant_ref(
active_entry_release(act);
unlock_out:
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
op->status = rc;
put_maptrack_handle(lgt, handle);
rcu_unlock_domain(rd);
@@ -1080,18 +1078,18 @@ __gnttab_unmap_common(
op->map = &maptrack_entry(lgt, op->handle);
- read_lock(&lgt->lock);
+ percpu_read_lock(grant_rwlock, &lgt->lock);
if ( unlikely(!read_atomic(&op->map->flags)) )
{
- read_unlock(&lgt->lock);
+ percpu_read_unlock(grant_rwlock, &lgt->lock);
gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
op->status = GNTST_bad_handle;
return;
}
dom = op->map->domid;
- read_unlock(&lgt->lock);
+ percpu_read_unlock(grant_rwlock, &lgt->lock);
if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
{
@@ -1113,7 +1111,7 @@ __gnttab_unmap_common(
rgt = rd->grant_table;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
op->flags = read_atomic(&op->map->flags);
if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) )
@@ -1165,7 +1163,7 @@ __gnttab_unmap_common(
act_release_out:
active_entry_release(act);
unmap_out:
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
if ( rc == GNTST_okay && gnttab_need_iommu_mapping(ld) )
{
@@ -1220,7 +1218,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
rcu_lock_domain(rd);
rgt = rd->grant_table;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
if ( rgt->gt_version == 0 )
goto unlock_out;
@@ -1286,7 +1284,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common
*op)
act_release_out:
active_entry_release(act);
unlock_out:
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
if ( put_handle )
{
@@ -1585,7 +1583,7 @@ gnttab_setup_table(
}
gt = d->grant_table;
- write_lock(>->lock);
+ percpu_write_lock(grant_rwlock, >->lock);
if ( gt->gt_version == 0 )
gt->gt_version = 1;
@@ -1613,7 +1611,7 @@ gnttab_setup_table(
}
out3:
- write_unlock(>->lock);
+ percpu_write_unlock(grant_rwlock, >->lock);
out2:
rcu_unlock_domain(d);
out1:
@@ -1655,13 +1653,13 @@ gnttab_query_size(
goto query_out_unlock;
}
- read_lock(&d->grant_table->lock);
+ percpu_read_lock(grant_rwlock, &d->grant_table->lock);
op.nr_frames = nr_grant_frames(d->grant_table);
op.max_nr_frames = max_grant_frames;
op.status = GNTST_okay;
- read_unlock(&d->grant_table->lock);
+ percpu_read_unlock(grant_rwlock, &d->grant_table->lock);
query_out_unlock:
@@ -1687,7 +1685,7 @@ gnttab_prepare_for_transfer(
union grant_combo scombo, prev_scombo, new_scombo;
int retries = 0;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
if ( unlikely(ref >= nr_grant_entries(rgt)) )
{
@@ -1730,11 +1728,11 @@ gnttab_prepare_for_transfer(
scombo = prev_scombo;
}
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
return 1;
fail:
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
return 0;
}
@@ -1925,7 +1923,7 @@ gnttab_transfer(
TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
/* Tell the guest about its new page frame. */
- read_lock(&e->grant_table->lock);
+ percpu_read_lock(grant_rwlock, &e->grant_table->lock);
act = active_entry_acquire(e->grant_table, gop.ref);
if ( e->grant_table->gt_version == 1 )
@@ -1949,7 +1947,7 @@ gnttab_transfer(
GTF_transfer_completed;
active_entry_release(act);
- read_unlock(&e->grant_table->lock);
+ percpu_read_unlock(grant_rwlock, &e->grant_table->lock);
rcu_unlock_domain(e);
@@ -1987,7 +1985,7 @@ __release_grant_for_copy(
released_read = 0;
released_write = 0;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
@@ -2029,7 +2027,7 @@ __release_grant_for_copy(
}
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
if ( td != rd )
{
@@ -2086,7 +2084,7 @@ __acquire_grant_for_copy(
*page = NULL;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
if ( unlikely(gref >= nr_grant_entries(rgt)) )
PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
@@ -2168,20 +2166,20 @@ __acquire_grant_for_copy(
* here and reacquire
*/
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
readonly, &grant_frame, page,
&trans_page_off, &trans_length, 0);
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
act = active_entry_acquire(rgt, gref);
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
return rc;
}
@@ -2194,7 +2192,7 @@ __acquire_grant_for_copy(
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
frame, page, page_off, length,
@@ -2258,7 +2256,7 @@ __acquire_grant_for_copy(
*frame = act->frame;
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
return rc;
unlock_out_clear:
@@ -2273,7 +2271,7 @@ __acquire_grant_for_copy(
active_entry_release(act);
gt_unlock_out:
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
return rc;
}
@@ -2589,7 +2587,7 @@
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
if ( gt->gt_version == op.version )
goto out;
- write_lock(>->lock);
+ percpu_write_lock(grant_rwlock, >->lock);
/*
* Make sure that the grant table isn't currently in use when we
* change the version number, except for the first 8 entries which
@@ -2702,7 +2700,7 @@
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
gt->gt_version = op.version;
out_unlock:
- write_unlock(>->lock);
+ percpu_write_unlock(grant_rwlock, >->lock);
out:
op.version = gt->gt_version;
@@ -2758,7 +2756,7 @@
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
op.status = GNTST_okay;
- read_lock(>->lock);
+ percpu_read_lock(grant_rwlock, >->lock);
for ( i = 0; i < op.nr_frames; i++ )
{
@@ -2767,7 +2765,7 @@
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
op.status = GNTST_bad_virt_addr;
}
- read_unlock(>->lock);
+ percpu_read_unlock(grant_rwlock, >->lock);
out2:
rcu_unlock_domain(d);
out1:
@@ -2817,7 +2815,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t
ref_b)
struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
- write_lock(>->lock);
+ percpu_write_lock(grant_rwlock, >->lock);
/* Bounds check on the grant refs */
if ( unlikely(ref_a >= nr_grant_entries(d->grant_table)))
@@ -2865,7 +2863,7 @@ out:
active_entry_release(act_b);
if ( act_a != NULL )
active_entry_release(act_a);
- write_unlock(>->lock);
+ percpu_write_unlock(grant_rwlock, >->lock);
rcu_unlock_domain(d);
@@ -2936,12 +2934,12 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t
*cflush,
if ( d != owner )
{
- read_lock(&owner->grant_table->lock);
+ percpu_read_lock(grant_rwlock, &owner->grant_table->lock);
ret = grant_map_exists(d, owner->grant_table, mfn, ref_count);
if ( ret != 0 )
{
- read_unlock(&owner->grant_table->lock);
+ percpu_read_unlock(grant_rwlock, &owner->grant_table->lock);
rcu_unlock_domain(d);
put_page(page);
return ret;
@@ -2961,7 +2959,7 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t
*cflush,
ret = 0;
if ( d != owner )
- read_unlock(&owner->grant_table->lock);
+ percpu_read_unlock(grant_rwlock, &owner->grant_table->lock);
unmap_domain_page(v);
put_page(page);
@@ -3180,7 +3178,7 @@ grant_table_create(
goto no_mem_0;
/* Simple stuff. */
- rwlock_init(&t->lock);
+ percpu_rwlock_resource_init(&t->lock);
spin_lock_init(&t->maptrack_lock);
t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
@@ -3282,7 +3280,7 @@ gnttab_release_mappings(
}
rgt = rd->grant_table;
- read_lock(&rgt->lock);
+ percpu_read_lock(grant_rwlock, &rgt->lock);
act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
@@ -3343,7 +3341,7 @@ gnttab_release_mappings(
gnttab_clear_flag(_GTF_reading, status);
active_entry_release(act);
- read_unlock(&rgt->lock);
+ percpu_read_unlock(grant_rwlock, &rgt->lock);
rcu_unlock_domain(rd);
@@ -3360,7 +3358,7 @@ void grant_table_warn_active_grants(struct domain *d)
#define WARN_GRANT_MAX 10
- read_lock(>->lock);
+ percpu_read_lock(grant_rwlock, >->lock);
for ( ref = 0; ref != nr_grant_entries(gt); ref++ )
{
@@ -3382,7 +3380,7 @@ void grant_table_warn_active_grants(struct domain *d)
printk(XENLOG_G_DEBUG "Dom%d has too many (%d) active grants to
report\n",
d->domain_id, nr_active);
- read_unlock(>->lock);
+ percpu_read_unlock(grant_rwlock, >->lock);
#undef WARN_GRANT_MAX
}
@@ -3432,7 +3430,7 @@ static void gnttab_usage_print(struct domain *rd)
printk(" -------- active -------- -------- shared --------\n");
printk("[ref] localdom mfn pin localdom gmfn flags\n");
- read_lock(>->lock);
+ percpu_read_lock(grant_rwlock, >->lock);
for ( ref = 0; ref != nr_grant_entries(gt); ref++ )
{
@@ -3475,7 +3473,7 @@ static void gnttab_usage_print(struct domain *rd)
active_entry_release(act);
}
- read_unlock(>->lock);
+ percpu_read_unlock(grant_rwlock, >->lock);
if ( first )
printk("grant-table for remote domain:%5d ... "
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 1c29cee..59d2819 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -51,13 +51,15 @@
/* The maximum size of a grant table. */
extern unsigned int max_grant_frames;
+DECLARE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
+
/* Per-domain grant information. */
struct grant_table {
/*
* Lock protecting updates to grant table state (version, active
* entry list, etc.)
*/
- rwlock_t lock;
+ percpu_rwlock_t lock;
/* Table size. Number of frames shared with guest */
unsigned int nr_grant_frames;
/* Shared grant table (see include/public/grant_table.h). */
--
1.7.12.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |