|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v4 17/47] x86/kvm: Mark TSC as reliable when it's constant and nonstop
Mark the TSC as reliable if the hypervisor (KVM) has enumerated the TSC
as constant and nonstop, and the admin hasn't explicitly marked the TSC
as unstable. Like most (all?) virtualization setups, any secondary
clocksource that's used as a watchdog is guaranteed to be less reliable
than a constant, nonstop TSC, as all clocksources the kernel uses as a
watchdog are all but guaranteed to be emulated when running as a KVM
guest. I.e. any observed discrepancies between the TSC and watchdog will
be due to jitter in the watchdog.
This is especially true for KVM, as the watchdog clocksource is usually
emulated in host userspace, i.e. reading the clock incurs a roundtrip
cost of thousands of cycles.
Marking the TSC reliable addresses a flaw where the TSC will occasionally
be marked unstable if the host is under moderate/heavy load.
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_para.h | 2 +-
arch/x86/kernel/kvm.c | 16 +++++++++++++++-
arch/x86/kernel/kvmclock.c | 15 +++++----------
3 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 4a47c16e2df8..4a49fc286b4c 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -118,7 +118,7 @@ static inline long kvm_sev_hypercall3(unsigned int nr,
unsigned long p1,
}
#ifdef CONFIG_KVM_GUEST
-void kvmclock_init(void);
+void kvmclock_init(bool prefer_tsc);
void kvmclock_disable(void);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 909d3e5e5bcd..4fe9c69bf40b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -978,6 +978,7 @@ static void __init kvm_init_platform(void)
.mask_hi = (BIT_ULL(boot_cpu_data.x86_phys_bits) - 1) >> 32,
};
u32 timing_info_leaf;
+ bool tsc_is_reliable;
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
@@ -1040,7 +1041,20 @@ static void __init kvm_init_platform(void)
}
}
- kvmclock_init();
+ /*
+ * If the TSC counts at a constant frequency across P/T states, counts
+ * in deep C-states, and the TSC hasn't been marked unstable, treat the
+ * TSC reliable, as guaranteed by KVM. Note, the TSC unstable check
+ * exists purely to honor the TSC being marked unstable via command
+ * line, any runtime detection of an unstable will happen after this.
+ */
+ tsc_is_reliable = boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+ boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
+ !check_tsc_unstable();
+ if (tsc_is_reliable)
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+
+ kvmclock_init(tsc_is_reliable);
x86_platform.apic_post_init = kvm_apic_init;
/*
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 404f60741aa8..69a15fbfb779 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -285,7 +285,7 @@ static int kvmclock_setup_percpu(unsigned int cpu)
return p ? 0 : -ENOMEM;
}
-void __init kvmclock_init(void)
+void __init kvmclock_init(bool prefer_tsc)
{
u8 flags;
@@ -334,16 +334,11 @@ void __init kvmclock_init(void)
kvm_get_preset_lpj();
/*
- * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate
- * with P/T states and does not stop in deep C-states.
- *
- * Invariant TSC exposed by host means kvmclock is not necessary:
- * can use TSC as clocksource.
- *
+ * If TSC is preferred over kvmlock, drop kvmclock's rating so that TSC
+ * is chosen as the clocksource (but still register kvmclock in case
+ * the kernel doesn't want to use TSC for whatever reason).
*/
- if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
- boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
- !check_tsc_unstable())
+ if (prefer_tsc)
kvm_clock.rating = 299;
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
--
2.54.0.823.g6e5bcc1fc9-goog
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |