[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] verify TSC sync



Verify TSC sync even on systems with constant and non-stop TSC.
We now reserve X86_FEATURE_TSC_RELIABLE for those systems
that have been verified.

For the record... Jeremy was right!  (there, I said it ;-)

See linux patch described here:
http://patchwork.kernel.org/patch/68397/

(Note, the bulk of this patch is moving 100 lines within the
same file to avoid forward references to warp check code.)

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>

diff -r 1a911fd65e52 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/arch/x86/cpu/amd.c    Fri Dec 18 11:40:25 2009 -0700
@@ -465,8 +465,6 @@ static void __devinit init_amd(struct cp
                if (c->x86_power & (1<<8)) {
                        set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
                        set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
-                       if (c->x86 != 0x11)
-                               set_bit(X86_FEATURE_TSC_RELIABLE, 
c->x86_capability);
                }
        }
 
diff -r 1a911fd65e52 xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/arch/x86/cpu/intel.c  Fri Dec 18 11:40:25 2009 -0700
@@ -212,7 +212,6 @@ static void __devinit init_intel(struct 
        if (cpuid_edx(0x80000007) & (1u<<8)) {
                set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
                set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
-               set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
        }
        if ((c->cpuid_level >= 0x00000006) &&
            (cpuid_eax(0x00000006) & (1u<<2)))
diff -r 1a911fd65e52 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/arch/x86/time.c       Fri Dec 18 11:40:25 2009 -0700
@@ -1135,6 +1135,107 @@ static void local_time_calibration(void)
 }
 
 /*
+ * TSC Reliability check
+ */
+
+/*
+ * The Linux original version of this function is
+ * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar
+ */
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
+{
+#define rdtsc_barrier() mb()
+    static DEFINE_SPINLOCK(sync_lock);
+    static cycles_t last_tsc;
+
+    cycles_t start, now, prev, end;
+    int i;
+
+    rdtsc_barrier();
+    start = get_cycles();
+    rdtsc_barrier();
+
+    /* The measurement runs for 20 msecs: */
+    end = start + tsc_khz * 20ULL;
+    now = start;
+
+    for ( i = 0; ; i++ )
+    {
+        /*
+         * We take the global lock, measure TSC, save the
+         * previous TSC that was measured (possibly on
+         * another CPU) and update the previous TSC timestamp.
+         */
+        spin_lock(&sync_lock);
+        prev = last_tsc;
+        rdtsc_barrier();
+        now = get_cycles();
+        rdtsc_barrier();
+        last_tsc = now;
+        spin_unlock(&sync_lock);
+
+        /*
+         * Be nice every now and then (and also check whether measurement is 
+         * done [we also insert a 10 million loops safety exit, so we dont 
+         * lock up in case the TSC readout is totally broken]):
+         */
+        if ( unlikely(!(i & 7)) )
+        {
+            if ( (now > end) || (i > 10000000) )
+                break;
+            cpu_relax();
+            /*touch_nmi_watchdog();*/
+        }
+
+        /*
+         * Outside the critical section we can now see whether we saw a 
+         * time-warp of the TSC going backwards:
+         */
+        if ( unlikely(prev > now) )
+        {
+            spin_lock(&sync_lock);
+            if ( *max_warp < prev - now )
+                *max_warp = prev - now;
+            spin_unlock(&sync_lock);
+        }
+    }
+}
+
+static unsigned long tsc_max_warp, tsc_check_count;
+static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;
+
+static void tsc_check_slave(void *unused)
+{
+    unsigned int cpu = smp_processor_id();
+    local_irq_disable();
+    while ( !cpu_isset(cpu, tsc_check_cpumask) )
+        mb();
+    check_tsc_warp(cpu_khz, &tsc_max_warp);
+    cpu_clear(cpu, tsc_check_cpumask);
+    local_irq_enable();
+}
+
+void tsc_check_reliability(void)
+{
+    unsigned int cpu = smp_processor_id();
+    static DEFINE_SPINLOCK(lock);
+
+    spin_lock(&lock);
+
+    tsc_check_count++;
+    smp_call_function(tsc_check_slave, NULL, 0);
+    tsc_check_cpumask = cpu_online_map;
+    local_irq_disable();
+    check_tsc_warp(cpu_khz, &tsc_max_warp);
+    cpu_clear(cpu, tsc_check_cpumask);
+    local_irq_enable();
+    while ( !cpus_empty(tsc_check_cpumask) )
+        cpu_relax();
+
+    spin_unlock(&lock);
+}
+
+/*
  * Rendezvous for all CPUs in IRQ context.
  * Master CPU snapshots the platform timer.
  * All CPUS snapshot their local TSC and extrapolation of system time.
@@ -1271,16 +1372,30 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
+    extern unsigned int max_cstate;
+
     /* If we have constant-rate TSCs then scale factor can be shared. */
     if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
     {
         int cpu;
         for_each_possible_cpu ( cpu )
             per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
-        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
-        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+    }
+    if ( (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && max_cstate <= 2) ||
+         boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
+    {
+        /*
+         * Sadly, despite processor vendors' best design guidance efforts,
+         * on some systems, cpus may come out of reset improperly
+         * synchronized.  So we must verify there is no warp and we
+         * can't do that until all CPUs are booted
+         */
+        tsc_check_reliability();
+        if ( tsc_max_warp == 0 )
+            set_boot_cpu_bit(X86_FEATURE_TSC_RELIABLE);
+    }
+    if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
             time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous;
-    }
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1481,107 +1596,6 @@ struct tm wallclock_time(void)
 }
 
 /*
- * TSC Reliability check
- */
-
-/*
- * The Linux original version of this function is
- * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar
- */
-void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
-{
-#define rdtsc_barrier() mb()
-    static DEFINE_SPINLOCK(sync_lock);
-    static cycles_t last_tsc;
-
-    cycles_t start, now, prev, end;
-    int i;
-
-    rdtsc_barrier();
-    start = get_cycles();
-    rdtsc_barrier();
-
-    /* The measurement runs for 20 msecs: */
-    end = start + tsc_khz * 20ULL;
-    now = start;
-
-    for ( i = 0; ; i++ )
-    {
-        /*
-         * We take the global lock, measure TSC, save the
-         * previous TSC that was measured (possibly on
-         * another CPU) and update the previous TSC timestamp.
-         */
-        spin_lock(&sync_lock);
-        prev = last_tsc;
-        rdtsc_barrier();
-        now = get_cycles();
-        rdtsc_barrier();
-        last_tsc = now;
-        spin_unlock(&sync_lock);
-
-        /*
-         * Be nice every now and then (and also check whether measurement is 
-         * done [we also insert a 10 million loops safety exit, so we dont 
-         * lock up in case the TSC readout is totally broken]):
-         */
-        if ( unlikely(!(i & 7)) )
-        {
-            if ( (now > end) || (i > 10000000) )
-                break;
-            cpu_relax();
-            /*touch_nmi_watchdog();*/
-        }
-
-        /*
-         * Outside the critical section we can now see whether we saw a 
-         * time-warp of the TSC going backwards:
-         */
-        if ( unlikely(prev > now) )
-        {
-            spin_lock(&sync_lock);
-            if ( *max_warp < prev - now )
-                *max_warp = prev - now;
-            spin_unlock(&sync_lock);
-        }
-    }
-}
-
-static unsigned long tsc_max_warp, tsc_check_count;
-static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;
-
-static void tsc_check_slave(void *unused)
-{
-    unsigned int cpu = smp_processor_id();
-    local_irq_disable();
-    while ( !cpu_isset(cpu, tsc_check_cpumask) )
-        mb();
-    check_tsc_warp(cpu_khz, &tsc_max_warp);
-    cpu_clear(cpu, tsc_check_cpumask);
-    local_irq_enable();
-}
-
-void tsc_check_reliability(void)
-{
-    unsigned int cpu = smp_processor_id();
-    static DEFINE_SPINLOCK(lock);
-
-    spin_lock(&lock);
-
-    tsc_check_count++;
-    smp_call_function(tsc_check_slave, NULL, 0);
-    tsc_check_cpumask = cpu_online_map;
-    local_irq_disable();
-    check_tsc_warp(cpu_khz, &tsc_max_warp);
-    cpu_clear(cpu, tsc_check_cpumask);
-    local_irq_enable();
-    while ( !cpus_empty(tsc_check_cpumask) )
-        cpu_relax();
-
-    spin_unlock(&lock);
-}
-
-/*
  * PV SoftTSC Emulation.
  */
 
@@ -1616,19 +1630,10 @@ void pv_soft_rdtsc(struct vcpu *v, struc
 
 static int host_tsc_is_safe(void)
 {
-    extern unsigned int max_cstate;
-
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
         return 1;
     if ( num_online_cpus() == 1 )
         return 1;
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && max_cstate <= 2 )
-    {
-        if ( !tsc_check_count )
-            tsc_check_reliability();
-        if ( tsc_max_warp == 0 )
-            return 1;
-    }
     return 0;
 }
 
diff -r 1a911fd65e52 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h  Fri Dec 18 07:53:27 2009 +0000
+++ b/xen/include/asm-x86/cpufeature.h  Fri Dec 18 11:40:25 2009 -0700
@@ -132,6 +132,7 @@
 
 #define cpu_has(c, bit)                test_bit(bit, (c)->x86_capability)
 #define boot_cpu_has(bit)      test_bit(bit, boot_cpu_data.x86_capability)
+#define set_boot_cpu_bit(bit)  set_bit(bit, boot_cpu_data.x86_capability)
 
 #ifdef __i386__
 #define cpu_has_vme            boot_cpu_has(X86_FEATURE_VME)

Attachment: verify-tsc-sync.patch
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.