[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [Xen-devel] Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
Here is the updated patch for constant-tsc case. -Jimmy CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, exit deepC, tsc-restore], the system error is quite easy to be accumulated. Once the workloads between cpus are not balanced, the tsc skew between cpus will eventually become bigger & begger - more than 10 seconds can be observed. Then we remove the tsc-save step, and just based on percpu t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore after exit from deepC. It make the accumulating slower, but can't remove it. Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp during the booting/s3 resuming, which is based on the platform stime. All cpus need only to do tsc-restore relative to the initial stamp after exit deepC. The base and tsc->ns scale are fixed and same for all cpus, so it can avoid accumulated tsc-skew. BTW, bypass the percpu tsc scale calibration for constant-tsc case. Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx> diff -r 045f70d1acdb xen/arch/x86/time.c --- a/xen/arch/x86/time.c Sat Dec 13 17:44:20 2008 +0000 +++ b/xen/arch/x86/time.c Mon Dec 15 10:35:11 2008 +0800 @@ -69,8 +69,11 @@ static DEFINE_PER_CPU(struct cpu_time, c #define EPOCH MILLISECS(1000) static struct timer calibration_timer; -/* TSC is invariant on C state entry? */ -static bool_t tsc_invariant; +/* TSC will not stop during deep C state? */ +static bool_t tsc_nostop; +/* TSC will be constant rate, independent with P/T state? */ +static int constant_tsc = 0; +boolean_param("const_tsc", constant_tsc); /* * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter. @@ -551,6 +554,10 @@ static u64 plt_stamp; /* hard static u64 plt_stamp; /* hardware-width platform counter stamp */ static struct timer plt_overflow_timer; +/* following 2 variables are for deep C state TSC restore usage */ +static u64 initial_tsc_stamp; /* initial tsc stamp while plt starting */ +static s_time_t initial_stime_platform_stamp; /* initial stime stamp */ + static void plt_overflow(void *unused) { u64 count; @@ -664,25 +671,41 @@ static void init_platform_timer(void) freq_string(pts->frequency), pts->name); } -void cstate_restore_tsc(void) +static void cstate_init_stamp(void) +{ + if ( tsc_nostop || !constant_tsc ) + return; + + initial_stime_platform_stamp = read_platform_stime(); + rdtscll(initial_tsc_stamp); +} + +static inline void __restore_tsc(s_time_t plt_stime) { struct cpu_time *t = &this_cpu(cpu_time); struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale); s_time_t stime_delta; u64 tsc_delta; - if ( tsc_invariant ) + if ( tsc_nostop ) return; - stime_delta = read_platform_stime() - t->stime_master_stamp; + stime_delta = plt_stime - + (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp); + if ( stime_delta < 0 ) stime_delta = 0; tsc_delta = scale_delta(stime_delta, &sys_to_tsc); - wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta); + wrmsrl(MSR_IA32_TSC, + (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta); } +void cstate_restore_tsc(void) +{ + __restore_tsc(read_platform_stime()); +} /*************************************************************************** * CMOS Timer functions ***************************************************************************/ @@ -960,6 +983,18 @@ static void local_time_calibration(void) curr_master_stime - curr_local_stime); #endif + if ( constant_tsc ) + { + local_irq_disable(); + t->local_tsc_stamp = curr_tsc; + t->stime_local_stamp = curr_master_stime; + t->stime_master_stamp = curr_master_stime; + local_irq_enable(); + + update_vcpu_system_time(current); + goto out; + } + /* Local time warps forward if it lags behind master time. */ if ( curr_local_stime < curr_master_stime ) curr_local_stime = curr_master_stime; @@ -1082,6 +1117,8 @@ static void time_calibration_rendezvous( mb(); /* receive signal /then/ read r->master_stime */ } + __restore_tsc(r->master_stime); + rdtscll(c->local_tsc_stamp); c->stime_local_stamp = get_s_time(); c->stime_master_stamp = r->master_stime; @@ -1125,9 +1162,23 @@ void init_percpu_time(void) /* Late init function (after all CPUs are booted). */ int __init init_xen_time(void) { - /* Is TSC invariant during deep C state? */ + /* for recent intel x86 model, the tsc increments at a constant rate */ + if ( (current_cpu_data.x86 == 0xf && current_cpu_data.x86_model >= 0x03) || + (current_cpu_data.x86 == 0x6 && current_cpu_data.x86_model >= 0x0e) ) + { + int cpu; + + constant_tsc = 1; + + for_each_cpu(cpu) + { + per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale; + } + } + + /* Is TSC not stop during deep C state ? */ if ( cpuid_edx(0x80000007) & (1u<<8) ) - tsc_invariant = 1; + tsc_nostop = 1; open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration); @@ -1139,6 +1190,8 @@ int __init init_xen_time(void) stime_platform_stamp = NOW(); init_platform_timer(); + + cstate_init_stamp(); init_percpu_time(); @@ -1260,6 +1313,8 @@ int time_resume(void) disable_pit_irq(); init_percpu_time(); + + cstate_init_stamp(); do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW()); Attachment:
tsc-skew-20081213-1.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |