[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic
Recently we found one performance bug when doing network test with VTd assigned devices - in some extreme case, the network performance in HVM using new Linux kernel could be 1/20 of native. Root cause is one of our sync-tsc-under-deep-C-state patches brings extra kilo-TSC drift between pCPUs and let check-tsc-sync logic in HVM failed. The result is the kernel fails to use platform timer (HPET, PMtimer) for gettimeofday instead of TSC and brings very frequent costly IOport access VMExit - triple per one call. We provides below 2 patches to address the issue: tsc1.patch: Minimize the TSC drift between pCPUs by letting BSP/AP setTSC at the same time in time_calibration_rendezvous(). Looping a few times before writing tsc sounds better, but it may be too costly. Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx>tsc2.patch: only do TSC-sync if really necessary, which narrows its effect a lot. Signed-off-by: Wei Gang <wei.gang@xxxxxxxxx> Thanks, Xiaowei diff -r 0b0e7c2b4eef xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue Jan 20 21:21:16 2009 +0800 +++ b/xen/arch/x86/time.c Mon Feb 09 02:21:50 2009 +0800 @@ -1095,22 +1095,21 @@ static void time_calibration_rendezvous( while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) ) cpu_relax(); r->master_stime = read_platform_stime(); - rdtscll(r->master_tsc_stamp); + if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + rdtscll(r->master_tsc_stamp); mb(); /* write r->master_* /then/ signal */ atomic_inc(&r->nr_cpus); - c->local_tsc_stamp = r->master_tsc_stamp; } else { atomic_inc(&r->nr_cpus); while ( atomic_read(&r->nr_cpus) != total_cpus ) - cpu_relax(); - mb(); /* receive signal /then/ read r->master_* */ - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) - wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp); - rdtscll(c->local_tsc_stamp); - } - + mb(); /* receive signal /then/ read r->master_* */ + } + + if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp); + rdtscll(c->local_tsc_stamp); c->stime_local_stamp = get_s_time(); c->stime_master_stamp = r->master_stime; diff -r 246ecf354c85 xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Mon Feb 16 12:21:52 2009 +0800 +++ b/xen/arch/x86/acpi/cpu_idle.c Mon Feb 16 12:57:08 2009 +0800 @@ -737,6 +737,15 @@ long set_cx_pminfo(uint32_t cpu, struct if ( cpu_id == 0 && pm_idle_save == NULL ) { + int deepest_cx = acpi_power->states[acpi_power->count - 1].type; + if ( max_cstate >= 3 && deepest_cx >= ACPI_STATE_C3 ) + tsc_may_stop = 1; + else if ( max_cstate >= 2 && deepest_cx >= ACPI_STATE_C2 + && !local_apic_timer_c2_ok ) + tsc_may_stop = 1; + else + tsc_may_stop = 0; + pm_idle_save = pm_idle; pm_idle = acpi_processor_idle; } diff -r 246ecf354c85 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Mon Feb 16 12:21:52 2009 +0800 +++ b/xen/arch/x86/time.c Mon Feb 16 13:10:24 2009 +0800 @@ -1091,6 +1091,8 @@ struct calibration_rendezvous { u64 master_tsc_stamp; }; +int tsc_may_stop __read_mostly = 0; + static void time_calibration_rendezvous(void *_r) { struct cpu_calibration *c = &this_cpu(cpu_calibration); @@ -1102,7 +1104,9 @@ static void time_calibration_rendezvous( while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) ) cpu_relax(); r->master_stime = read_platform_stime(); - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + if ( !boot_cpu_has(X86_FEATURE_NOSTOP_TSC) + && boot_cpu_has(X86_FEATURE_CONSTANT_TSC) + && tsc_may_stop ) rdtscll(r->master_tsc_stamp); mb(); /* write r->master_* /then/ signal */ atomic_inc(&r->nr_cpus); @@ -1114,7 +1118,7 @@ static void time_calibration_rendezvous( mb(); /* receive signal /then/ read r->master_* */ } - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + if ( r->master_tsc_stamp ) wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp); rdtscll(c->local_tsc_stamp); c->stime_local_stamp = get_s_time(); @@ -1127,7 +1131,8 @@ static void time_calibration(void *unuse { struct calibration_rendezvous r = { .cpu_calibration_map = cpu_online_map, - .nr_cpus = ATOMIC_INIT(0) + .nr_cpus = ATOMIC_INIT(0), + .master_tsc_stamp = 0 }; /* @wait=1 because we must wait for all cpus before freeing @r. */ diff -r 246ecf354c85 xen/include/asm-x86/time.h --- a/xen/include/asm-x86/time.h Mon Feb 16 12:21:52 2009 +0800 +++ b/xen/include/asm-x86/time.h Mon Feb 16 12:57:08 2009 +0800 @@ -41,4 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic uint64_t acpi_pm_tick_to_ns(uint64_t ticks); uint64_t ns_to_acpi_pm_tick(uint64_t ns); +extern int tsc_may_stop; + #endif /* __X86_TIME_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |