|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] scaling problem with writable pagetables
We started to test a Linux dom0 up to 16-way (4 sockets, dual core, with HT) and we began to see some serious scaling issues, compared to scaling baremetal Linux to 16-way. We took some profiles and saw that functions in xen/arch/x86/mm.c were using disproportionately more CPU time as we scaled up the number of CPUs. Taking a quick look and those functions (for example, like update_va_mapping and do_mmu_update) it became kind of obvious that the locking probably does not scale. It appears we lock domain-wide on many of thee functions. This, IMO, causes a serious problem when running an SMP domain which happens to page fault a lot. So, I got to thinking, just how much protection do we really need in these functions? The OS should already provide quite a bit of protection to page table writes. Is Xen imposing an even more deliberate and possibly unnecessary protection here? So I made some changes to when we lock/unlock in most of these functions in mm.c (patch attached). Warning: I am pretty much making a shot in the dark here. I do not know this code nearly well enough to say this is the right thing to do. However, I can say without a doubt the changes make a significant change in performance: benchmark throughput increase with lock reduction SDET 19% reaim_shared 65% reaim_fserver 16%Below are per-function ratios of CPU time rev8830/rev8830-lock-reduction (derived from oprofile diffs) SDET: 9.84/1 restore_all_guest 1.45/1 mod_l1_entry 2.59/1 do_softirq 1.63/1 test_guest_events 1.09/1 syscall_enter 1.35/1 propagate_page_fault 1.18/1 process_guest_except 1.13/1 timer_softirq_action 1.04/1 alloc_page_type 1.05/1 revalidate_l1 1.08/1 do_set_segment_base 1.10/1 get_s_time 1.19/1 __context_switch 1.09/1 switch_to_kernel 1.11/1 FLT4 1.62/1 xen_l3_entry_update 1.27/1 xen_invlpg_mask reaim_shared: 1.43/1 do_update_va_mapping 1.44/1 do_page_fault 1.47/1 do_mmu_update 6.75/1 restore_all_guest 1.43/1 do_mmuext_op 1.37/1 sedf_do_schedule 1.20/1 mod_l1_entry 2.46/1 do_softirq 1.27/1 t_timer_fn 1.34/1 do_set_segment_base 1.20/1 timer_softirq_action 1.24/1 process_guest_except 1.12/1 timer_interrupt 1.14/1 evtchn_send reaim_fserver: 1.16/1 do_update_va_mapping 1.13/1 do_page_fault 8.41/1 restore_all_guest 1.17/1 do_mmu_update 1.56/1 mod_l1_entry 2.48/1 do_softirq 1.02/1 do_mmuext_op 1.14/1 sedf_do_schedule 1.12/1 t_timer_fn 1.23/1 do_set_segment_base 1.11/1 device_not_available 1.11/1 timer_softirq_action 1.13/1 process_guest_except 1.20/1 timer_interrupt 1.15/1 copy_from_user 1.11/1 propagate_page_fault Any comments greatly appreciated. -Andrew <signed-off-by: habanero@xxxxxxxxxx> diff -Naurp xen-unstable.hg-8830/xen/arch/x86/mm.c
xen-unstable.hg-8830-lockfix/xen/arch/x86/mm.c
--- xen-unstable.hg-8830/xen/arch/x86/mm.c 2006-02-15 16:30:31.000000000
-0600
+++ xen-unstable.hg-8830-lockfix/xen/arch/x86/mm.c 2006-02-15
16:32:48.000000000 -0600
@@ -1729,6 +1729,8 @@ int do_mmuext_op(
cleanup_writable_pagetable(d);
+ UNLOCK_BIGLOCK(d);
+
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
{
count &= ~MMU_UPDATE_PREEMPTED;
@@ -1957,7 +1959,6 @@ int do_mmuext_op(
if ( unlikely(pdone != NULL) )
__put_user(done + i, pdone);
- UNLOCK_BIGLOCK(d);
return rc;
}
@@ -1982,6 +1983,8 @@ int do_mmu_update(
cleanup_writable_pagetable(d);
+ UNLOCK_BIGLOCK(d);
+
if ( unlikely(shadow_mode_enabled(d)) )
check_pagetable(v, "pre-mmu"); /* debug */
@@ -2206,7 +2209,6 @@ int do_mmu_update(
if ( unlikely(shadow_mode_enabled(d)) )
check_pagetable(v, "post-mmu"); /* debug */
- UNLOCK_BIGLOCK(d);
return rc;
}
@@ -2503,6 +2505,8 @@ int do_update_va_mapping(unsigned long v
cleanup_writable_pagetable(d);
+ UNLOCK_BIGLOCK(d);
+
if ( unlikely(shadow_mode_enabled(d)) )
check_pagetable(v, "pre-va"); /* debug */
@@ -2574,8 +2578,6 @@ int do_update_va_mapping(unsigned long v
process_deferred_ops(cpu);
- UNLOCK_BIGLOCK(d);
-
return rc;
}
@@ -2675,13 +2677,9 @@ long do_set_gdt(unsigned long *frame_lis
if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
return -EFAULT;
- LOCK_BIGLOCK(current->domain);
-
if ( (ret = set_gdt(current, frames, entries)) == 0 )
local_flush_tlb();
- UNLOCK_BIGLOCK(current->domain);
-
return ret;
}
@@ -2700,21 +2698,18 @@ long do_update_descriptor(u64 pa, u64 de
*(u64 *)&d = desc;
- LOCK_BIGLOCK(dom);
if ( !VALID_MFN(mfn = gmfn_to_mfn(dom, gmfn)) ||
(((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
!mfn_valid(mfn) ||
!check_descriptor(&d) )
{
- UNLOCK_BIGLOCK(dom);
return -EINVAL;
}
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, dom)) )
{
- UNLOCK_BIGLOCK(dom);
return -EINVAL;
}
@@ -2760,8 +2755,6 @@ long do_update_descriptor(u64 pa, u64 de
out:
put_page(page);
- UNLOCK_BIGLOCK(dom);
-
return ret;
}
@@ -2793,7 +2786,6 @@ long arch_memory_op(int op, void *arg)
return -ESRCH;
}
- LOCK_BIGLOCK(d);
if ( d->arch.first_reserved_pfn == 0 )
{
d->arch.first_reserved_pfn = pfn = d->max_pages;
@@ -2803,7 +2795,6 @@ long arch_memory_op(int op, void *arg)
guest_physmap_add_page(
d, pfn + 1 + i, gnttab_shared_mfn(d, d->grant_table, i));
}
- UNLOCK_BIGLOCK(d);
xrpa.first_gpfn = d->arch.first_reserved_pfn;
xrpa.nr_gpfns = 32;
_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |