[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic



Recently we found one performance bug when doing network test with VTd
assigned devices - in some extreme case, the network performance in HVM
using new Linux kernel could be 1/20 of native. Root cause is one of our
sync-tsc-under-deep-C-state patches brings extra kilo-TSC drift between
pCPUs and let check-tsc-sync logic in HVM failed. The result is the
kernel fails to use platform timer (HPET, PMtimer) for gettimeofday
instead of TSC and brings very frequent costly IOport access VMExit -
triple per one call.

We provides below 2 patches to address the issue:

tsc1.patch: Minimize the TSC drift between pCPUs by letting BSP/AP set
TSC at the same time in time_calibration_rendezvous(). Looping a few times before writing tsc sounds better, but it may be too costly.
Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx>

tsc2.patch: only do TSC-sync if really necessary, which narrows its effect a lot.
Signed-off-by: Wei Gang <wei.gang@xxxxxxxxx>


Thanks,
Xiaowei

diff -r 0b0e7c2b4eef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue Jan 20 21:21:16 2009 +0800
+++ b/xen/arch/x86/time.c       Mon Feb 09 02:21:50 2009 +0800
@@ -1095,22 +1095,21 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        rdtscll(r->master_tsc_stamp);
+        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+            rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_* /then/ signal */
         atomic_inc(&r->nr_cpus);
-        c->local_tsc_stamp = r->master_tsc_stamp;
     }
     else
     {
         atomic_inc(&r->nr_cpus);
         while ( atomic_read(&r->nr_cpus) != total_cpus )
-            cpu_relax();
-        mb(); /* receive signal /then/ read r->master_* */
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-            wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
-        rdtscll(c->local_tsc_stamp);
-    }
-
+            mb(); /* receive signal /then/ read r->master_* */
+    }
+
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
+    rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
diff -r 246ecf354c85 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/acpi/cpu_idle.c      Mon Feb 16 12:57:08 2009 +0800
@@ -737,6 +737,15 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
     if ( cpu_id == 0 && pm_idle_save == NULL )
     {
+        int deepest_cx = acpi_power->states[acpi_power->count - 1].type;
+        if ( max_cstate >= 3 && deepest_cx >= ACPI_STATE_C3 )
+            tsc_may_stop = 1;
+        else if ( max_cstate >= 2 && deepest_cx >= ACPI_STATE_C2
+                  && !local_apic_timer_c2_ok )
+            tsc_may_stop = 1;
+        else
+            tsc_may_stop = 0;
+
         pm_idle_save = pm_idle;
         pm_idle = acpi_processor_idle;
     }
diff -r 246ecf354c85 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/time.c       Mon Feb 16 13:10:24 2009 +0800
@@ -1091,6 +1091,8 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+int tsc_may_stop __read_mostly = 0;
+
 static void time_calibration_rendezvous(void *_r)
 {
     struct cpu_calibration *c = &this_cpu(cpu_calibration);
@@ -1102,7 +1104,9 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        if ( !boot_cpu_has(X86_FEATURE_NOSTOP_TSC)
+             && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
+             && tsc_may_stop )
             rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_* /then/ signal */
         atomic_inc(&r->nr_cpus);
@@ -1114,7 +1118,7 @@ static void time_calibration_rendezvous(
             mb(); /* receive signal /then/ read r->master_* */
     }
 
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    if ( r->master_tsc_stamp )
         wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
@@ -1127,7 +1131,8 @@ static void time_calibration(void *unuse
 {
     struct calibration_rendezvous r = {
         .cpu_calibration_map = cpu_online_map,
-        .nr_cpus = ATOMIC_INIT(0)
+        .nr_cpus = ATOMIC_INIT(0),
+        .master_tsc_stamp = 0
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
diff -r 246ecf354c85 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/include/asm-x86/time.h        Mon Feb 16 12:57:08 2009 +0800
@@ -41,4 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
 uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
 uint64_t ns_to_acpi_pm_tick(uint64_t ns);
 
+extern int tsc_may_stop;
+
 #endif /* __X86_TIME_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.