[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 16/47] x86/kvm: Obtain TSC frequency from PV CPUID if present



From: David Woodhouse <dwmw@xxxxxxxxxxxx>

In https://lkml.org/lkml/2008/10/1/246 a proposal was made for generic
CPUID conventions across hypervisors. It was mostly shot down in flames,
but the leaf at 0x40000010 containing timing information didn't die.

It's used by XNU and FreeBSD guests under all hypervisors¹² to determine
the TSC frequency, and also exposed by the EC2 Nitro hypervisor (as
well as, presumably, VMware). FreeBSD's Bhyve is probably just about
to start exposing it too.

Use it under KVM to obtain the TSC frequency more accurately, instead of
reverse-calculating the frequency from the mul/shift values in the KVM
clock.  Use the information to get the CPU frequency as well (kvmclock
feeds in kvm_get_tsc_khz() for both TSC and CPU calibration), as the info
from CPUID is superior in every way; whether or not kvmclock should be
overriding CPU calibration in the first place is an entirely different
question.

Use the info from CPUID even if the user explicitly disables kvmclock, or
if it's unsupported.  The PV CPUID leaf has no dependency on kvmclock, and
is in fact more useful if kvmclock is disabled since the kernel won't be
able to use kvmclock to derive a derive the TSC frequency.

Before:
[    0.000020] tsc: Detected 2900.014 MHz processor

After:
[    0.000020] tsc: Detected 2900.015 MHz processor

$ cpuid -1 -l 0x40000010
CPU:
   hypervisor generic timing information (0x40000010):
      TSC frequency (Hz) = 2900015
      bus frequency (Hz) = 1000000

Note!  *Independently* query for non-null get_{cpu,tsc}_khz() overrides so
that kvmclock doesn't clobber x86_init.hyper.get_cpu_khz() if/when KVM adds
support for getting the CPU frequency separately from the TSC frequency.

¹ https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/cpuid.c
² https://github.com/freebsd/freebsd-src/commit/4a432614f68

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
 arch/x86/kernel/kvm.c      | 33 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/kvmclock.c |  6 ++++--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dcef84da304b..909d3e5e5bcd 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,6 +49,8 @@
 #include <asm/svm.h>
 #include <asm/e820/api.h>
 
+static unsigned int kvm_tsc_khz_cpuid __initdata;
+
 DEFINE_STATIC_KEY_FALSE_RO(kvm_async_pf_enabled);
 
 static int kvmapf = 1;
@@ -911,6 +913,21 @@ bool kvm_para_available(void)
 }
 EXPORT_SYMBOL_GPL(kvm_para_available);
 
+static u32 __init kvm_cpuid_timing_info_leaf(void)
+{
+       u32 base = kvm_cpuid_base();
+
+       if (!base || cpuid_eax(base) < (base | KVM_CPUID_TIMING_INFO))
+               return 0;
+
+       return base | KVM_CPUID_TIMING_INFO;
+}
+
+static unsigned int __init kvm_get_tsc_khz(void)
+{
+       return kvm_tsc_khz_cpuid;
+}
+
 unsigned int kvm_arch_para_features(void)
 {
        return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
@@ -960,6 +977,7 @@ static void __init kvm_init_platform(void)
                .mask_lo = (u32)(~(SZ_4G - tolud - 1)) | MTRR_PHYSMASK_V,
                .mask_hi = (BIT_ULL(boot_cpu_data.x86_phys_bits) - 1) >> 32,
        };
+       u32 timing_info_leaf;
 
        if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
            kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
@@ -1007,6 +1025,21 @@ static void __init kvm_init_platform(void)
                        wrmsrq(MSR_KVM_MIGRATION_CONTROL,
                               KVM_MIGRATION_READY);
        }
+
+       /*
+        * If KVM advertises the frequency directly in CPUID, use that instead
+        * of reverse-calculating it from the KVM clock data, or worse, trying
+        * to calibratate the TSC using an emulated device.
+        */
+       timing_info_leaf = kvm_cpuid_timing_info_leaf();
+       if (timing_info_leaf) {
+               kvm_tsc_khz_cpuid = cpuid_eax(timing_info_leaf);
+               if (kvm_tsc_khz_cpuid) {
+                       x86_init.hyper.get_tsc_khz = kvm_get_tsc_khz;
+                       x86_init.hyper.get_cpu_khz = kvm_get_tsc_khz;
+               }
+       }
+
        kvmclock_init();
        x86_platform.apic_post_init = kvm_apic_init;
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index c4a782a0c903..404f60741aa8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -320,8 +320,10 @@ void __init kvmclock_init(void)
        flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
        kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
 
-       x86_init.hyper.get_tsc_khz = kvmclock_get_tsc_khz;
-       x86_init.hyper.get_cpu_khz = kvmclock_get_tsc_khz;
+       if (!x86_init.hyper.get_tsc_khz)
+               x86_init.hyper.get_tsc_khz = kvmclock_get_tsc_khz;
+       if (!x86_init.hyper.get_cpu_khz)
+               x86_init.hyper.get_cpu_khz = kvmclock_get_tsc_khz;
        x86_platform.get_wallclock = kvm_get_wallclock;
        x86_platform.set_wallclock = kvm_set_wallclock;
 #ifdef CONFIG_X86_LOCAL_APIC
-- 
2.54.0.823.g6e5bcc1fc9-goog




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.