[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 13/30] KVM: x86: Fix KVM clock precision in get_kvmclock() with TSC scaling



From: David Woodhouse <dwmw@xxxxxxxxxxxx>

When in master clock mode, the KVM clock is defined in terms of the
guest TSC. But get_kvmclock() was computing it from the host TSC
without applying TSC scaling, leading to a systemic drift from the
values the guest computes from its own TSC.

Store the VM's TSC scaling ratio in kvm_arch and precompute the
guest-TSC-based mul/shift in pvclock_update_vm_gtod_copy(). Use these
in get_kvmclock() to scale the host TSC delta to guest TSC before
converting to nanoseconds.

This avoids "definition C" of the KVM clock described in the
earlier commit "KVM: x86/xen: Do not corrupt KVM clock in
kvm_xen_shared_info_init()".

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |  4 +++
 arch/x86/kvm/x86.c              | 50 +++++++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 37264212c7df..5348fd5ea3f3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1490,6 +1490,7 @@ struct kvm_arch {
        u64 last_tsc_write;
        u32 last_tsc_khz;
        u64 last_tsc_offset;
+       u64 last_tsc_scaling_ratio;
        u64 cur_tsc_nsec;
        u64 cur_tsc_write;
        u64 cur_tsc_offset;
@@ -1504,6 +1505,9 @@ struct kvm_arch {
        bool use_master_clock;
        u64 master_kernel_ns;
        u64 master_cycle_now;
+       u64 master_tsc_scaling_ratio;
+       s8  master_tsc_shift;
+       u32 master_tsc_mul;
 
 #ifdef CONFIG_KVM_HYPERV
        struct kvm_hv hyperv;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f2653eaccdf8..09b00906b1de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2781,6 +2781,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, 
u64 offset, u64 tsc,
        kvm->arch.last_tsc_write = tsc;
        kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
        kvm->arch.last_tsc_offset = offset;
+       kvm->arch.last_tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
 
        vcpu->arch.last_guest_tsc = tsc;
 
@@ -3109,6 +3110,8 @@ static bool kvm_get_walltime_and_clockread(struct 
timespec64 *ts,
  *
  */
 
+static unsigned long get_cpu_tsc_khz(void);
+
 static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 {
 #ifdef CONFIG_X86_64
@@ -3132,9 +3135,28 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
                                && !ka->backwards_tsc_observed
                                && !ka->boot_vcpu_runs_old_kvmclock;
 
-       if (ka->use_master_clock)
+       if (ka->use_master_clock) {
+               u64 tsc_hz;
+
                atomic_set(&kvm_guest_has_master_clock, 1);
 
+               /*
+                * Copy the scaling ratio and precompute the mul/shift for
+                * converting guest TSC to nanoseconds. These are used by
+                * get_kvmclock() to compute kvmclock from the host TSC
+                * without needing a vCPU reference.
+                */
+               ka->master_tsc_scaling_ratio = ka->last_tsc_scaling_ratio;
+               tsc_hz = (u64)get_cpu_tsc_khz() * 1000;
+               if (tsc_hz && kvm_caps.has_tsc_control)
+                       tsc_hz = kvm_scale_tsc(tsc_hz,
+                                              ka->master_tsc_scaling_ratio);
+               if (tsc_hz)
+                       kvm_get_time_scale(NSEC_PER_SEC, tsc_hz,
+                                          &ka->master_tsc_shift,
+                                          &ka->master_tsc_mul);
+       }
+
        vclock_mode = pvclock_gtod_data.clock.vclock_mode;
        trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
                                        vcpus_matched);
@@ -3235,10 +3257,28 @@ static void get_kvmclock(struct kvm *kvm, struct 
kvm_clock_data *data)
                        data->flags |= KVM_CLOCK_TSC_STABLE;
                        hv_clock.tsc_timestamp = ka->master_cycle_now;
                        hv_clock.system_time = ka->master_kernel_ns + 
ka->kvmclock_offset;
-                       kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 
1000LL,
-                                          &hv_clock.tsc_shift,
-                                          &hv_clock.tsc_to_system_mul);
-                       data->clock = __pvclock_read_cycles(&hv_clock, 
data->host_tsc);
+
+                       /*
+                        * Use the precomputed guest-TSC-based mul/shift
+                        * so that the kvmclock value matches what the
+                        * guest computes from its own TSC.
+                        */
+                       hv_clock.tsc_shift = ka->master_tsc_shift;
+                       hv_clock.tsc_to_system_mul = ka->master_tsc_mul;
+
+                       if (kvm_caps.has_tsc_control) {
+                               u64 tsc_delta = data->host_tsc - 
ka->master_cycle_now;
+
+                               tsc_delta = kvm_scale_tsc(tsc_delta,
+                                                         
ka->master_tsc_scaling_ratio);
+                               data->clock = hv_clock.system_time +
+                                       pvclock_scale_delta(tsc_delta,
+                                                           
hv_clock.tsc_to_system_mul,
+                                                           hv_clock.tsc_shift);
+                       } else {
+                               data->clock = __pvclock_read_cycles(&hv_clock,
+                                                                   
data->host_tsc);
+                       }
 
                        put_cpu();
                } else {
-- 
2.51.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.