|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v4 05/30] KVM: selftests: Add KVM/PV clock selftest to prove timer correction
From: Jack Allister <jalliste@xxxxxxxxxx>
A VM's KVM/PV clock has an inherent relationship to its TSC. When either
the host system live-updates or the VM is live-migrated this pairing of
the two clock sources should stay the same. In reality this is not the
case without some correction taking place.
The KVM_GET_CLOCK_GUEST/KVM_SET_CLOCK_GUEST ioctls can be used to
perform a correction on the PVTI (PV time information) structure held by
KVM to effectively fix up the kvmclock_offset prior to the guest VM
resuming in either a live-update/migration scenario.
This test proves that without the necessary fixup there is a perceived
change in the guest TSC and KVM/PV clock relationship before and after a
simulated LU/LM takes place, and that the correction eliminates it.
The test:
1. Snapshots the PVTI at boot (PVTI0).
2. Induces a change in PVTI data (KVM_REQ_MASTERCLOCK_UPDATE).
3. Snapshots the PVTI after the change (PVTI1).
4. Requests correction via KVM_SET_CLOCK_GUEST using PVTI0.
5. Snapshots the PVTI after correction (PVTI2).
Then samples the TSC at a single point in time and calculates the KVM
clock using each PVTI snapshot. The corrected clock should match the
boot clock to within ±1ns.
The test enumerates multiple TSC frequencies from 1GHz to 5GHz at 500MHz
steps, crossing the 32-bit boundary, to exercise the scaling path at
various ratios. The sleep duration between snapshots is configurable via
the -s/--sleep command line option.
Co-developed-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Signed-off-by: Jack Allister <jalliste@xxxxxxxxxx>
Reviewed-by: Paul Durrant <paul@xxxxxxx>
Cc: Dongli Zhang <dongli.zhang@xxxxxxxxxx>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../testing/selftests/kvm/x86/pvclock_test.c | 415 ++++++++++++++++++
2 files changed, 416 insertions(+)
create mode 100644 tools/testing/selftests/kvm/x86/pvclock_test.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm
b/tools/testing/selftests/kvm/Makefile.kvm
index 9118a5a51b89..fb935ae3bf38 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -105,6 +105,7 @@ TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/pvclock_test
TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
TEST_GEN_PROGS_x86 += x86/set_sregs_test
TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
diff --git a/tools/testing/selftests/kvm/x86/pvclock_test.c
b/tools/testing/selftests/kvm/x86/pvclock_test.c
new file mode 100644
index 000000000000..1a3d52923c71
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/pvclock_test.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © Amazon.com, Inc. or its affiliates.
+ *
+ * Tests for pvclock API
+ * KVM_SET_CLOCK_GUEST/KVM_GET_CLOCK_GUEST
+ */
+#include <getopt.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <asm/pvclock-abi.h>
+
+/*
+ * Reproduce the pvclock calculation the guest uses to convert TSC to
+ * nanoseconds. This must match the kernel's __pvclock_read_cycles().
+ */
+static inline uint64_t pvclock_scale_delta(uint64_t delta, uint32_t mul,
+ int8_t shift)
+{
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+ return ((__uint128_t)delta * mul) >> 32;
+}
+
+static inline uint64_t pvclock_read_cycles(struct pvclock_vcpu_time_info *src,
+ uint64_t tsc)
+{
+ uint64_t delta = tsc - src->tsc_timestamp;
+
+ return src->system_time + pvclock_scale_delta(delta,
+ src->tsc_to_system_mul,
+ src->tsc_shift);
+}
+
+enum {
+ STAGE_FIRST_BOOT,
+ STAGE_UNCORRECTED,
+ STAGE_CORRECTED
+};
+
+#define KVMCLOCK_GPA 0xc0000000ull
+#define KVMCLOCK_SIZE sizeof(struct pvclock_vcpu_time_info)
+
+static void trigger_pvti_update(void)
+{
+ /*
+ * Toggle between KVM's old and new system time methods to coerce KVM
+ * into updating the fields in the PV time info struct.
+ */
+ wrmsr(MSR_KVM_SYSTEM_TIME, KVMCLOCK_GPA | KVM_MSR_ENABLED);
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, KVMCLOCK_GPA | KVM_MSR_ENABLED);
+}
+
+static void guest_code(void)
+{
+ struct pvclock_vcpu_time_info *pvti =
+ (void *)(unsigned long)KVMCLOCK_GPA;
+ struct pvclock_vcpu_time_info pvti_boot;
+ struct pvclock_vcpu_time_info pvti_uncorrected;
+ struct pvclock_vcpu_time_info pvti_corrected;
+ uint64_t tsc_guest;
+ uint64_t clk_boot, clk_uncorrected, clk_corrected;
+ int64_t delta_corrected;
+
+ /* Set up kvmclock and snapshot the initial pvclock parameters. */
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, KVMCLOCK_GPA | KVM_MSR_ENABLED);
+ pvti_boot = *pvti;
+ GUEST_SYNC(STAGE_FIRST_BOOT);
+
+ /*
+ * Trigger an update of the PVTI. Calculating the KVM clock using this
+ * updated structure will show a delta from the original.
+ */
+ trigger_pvti_update();
+ pvti_uncorrected = *pvti;
+ GUEST_SYNC(STAGE_UNCORRECTED);
+
+ /*
+ * Snapshot the corrected time (the host does KVM_SET_CLOCK_GUEST when
+ * handling STAGE_UNCORRECTED).
+ */
+ pvti_corrected = *pvti;
+
+ /*
+ * Sample the TSC at a single point in time, then calculate the
+ * effective KVM clock using the PVTI from each stage. Verify that the
+ * corrected clock matches the boot clock to within ±1ns.
+ */
+ tsc_guest = rdtsc();
+
+ clk_boot = pvclock_read_cycles(&pvti_boot, tsc_guest);
+ clk_uncorrected = pvclock_read_cycles(&pvti_uncorrected, tsc_guest);
+ clk_corrected = pvclock_read_cycles(&pvti_corrected, tsc_guest);
+
+ delta_corrected = clk_boot - clk_corrected;
+
+ __GUEST_ASSERT(delta_corrected >= -2 && delta_corrected <= 2,
+ "corrected delta %ld out of range (boot=%lu
uncorrected=%lu corrected=%lu)",
+ delta_corrected, clk_boot, clk_uncorrected,
clk_corrected);
+
+ GUEST_SYNC(STAGE_CORRECTED);
+}
+
+static void run_test(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ unsigned int sleep_sec)
+{
+ struct pvclock_vcpu_time_info pvti_before;
+ struct ucall uc;
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall");
+ }
+
+ switch (uc.args[1]) {
+ case STAGE_FIRST_BOOT:
+ /* Save the pvclock parameters before the update. */
+ vcpu_ioctl(vcpu, KVM_GET_CLOCK_GUEST, &pvti_before);
+
+ /* Sleep to let the clocks diverge. */
+ sleep(sleep_sec);
+ break;
+
+ case STAGE_UNCORRECTED:
+ /* Restore the original pvclock parameters. */
+ vcpu_ioctl(vcpu, KVM_SET_CLOCK_GUEST, &pvti_before);
+ break;
+
+ case STAGE_CORRECTED:
+ /* Guest verified the delta in-guest. */
+ return;
+
+ default:
+ TEST_FAIL("Unknown stage %lu", uc.args[1]);
+ }
+ }
+}
+
+static void configure_pvclock(struct kvm_vm *vm)
+{
+ unsigned int nr_pages;
+
+ nr_pages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, KVMCLOCK_SIZE);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ KVMCLOCK_GPA, 1, nr_pages, 0);
+ virt_map(vm, KVMCLOCK_GPA, KVMCLOCK_GPA, nr_pages);
+}
+
+static void run_at_frequency(uint64_t tsc_khz, unsigned int sleep_sec)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ pr_info("Testing at TSC frequency %lu kHz\n", tsc_khz);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ configure_pvclock(vm);
+ vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *)tsc_khz);
+ run_test(vm, vcpu, sleep_sec);
+ kvm_vm_release(vm);
+}
+
+static void test_tsc_stable_bit(void);
+static void test_clock_guest_with_offsets(void);
+
+static void usage(const char *name)
+{
+ printf("Usage: %s [options]\n"
+ " -s, --sleep SEC sleep duration between snapshots
(default: 2)\n"
+ " -h, --help show this help\n", name);
+}
+
+int main(int argc, char *argv[])
+{
+ static const struct option long_opts[] = {
+ { "sleep", required_argument, NULL, 's' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL, 0, NULL, 0 },
+ };
+ unsigned int sleep_sec = 2;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t host_khz;
+ uint64_t freq;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "s:h", long_opts, NULL)) != -1) {
+ switch (opt) {
+ case 's':
+ sleep_sec = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ return opt == 'h' ? 0 : 1;
+ }
+ }
+
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ configure_pvclock(vm);
+
+ /* First run at native frequency (no scaling). */
+ run_test(vm, vcpu, sleep_sec);
+
+ /*
+ * Then enumerate a range of TSC frequencies crossing the 32-bit
+ * boundary, to exercise the scaling path at various ratios.
+ */
+ host_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+ kvm_vm_release(vm);
+
+ for (freq = 1000000; freq <= 5000000; freq += 500000) {
+ if (freq == host_khz)
+ continue;
+ run_at_frequency(freq, sleep_sec);
+ }
+
+ test_tsc_stable_bit();
+ test_clock_guest_with_offsets();
+
+ return 0;
+}
+
+static void guest_code_stable_bit(void)
+{
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, KVMCLOCK_GPA | KVM_MSR_ENABLED);
+ GUEST_SYNC(0);
+ GUEST_SYNC(0);
+ GUEST_SYNC(0);
+}
+
+static void set_tsc_offset(struct kvm_vcpu *vcpu, uint64_t offset)
+{
+ struct kvm_device_attr attr = {
+ .group = KVM_VCPU_TSC_CTRL,
+ .attr = KVM_VCPU_TSC_OFFSET,
+ .addr = (__u64)(uintptr_t)&offset,
+ };
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &attr);
+}
+
+static void run_vcpu_once(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall");
+ }
+}
+
+static void test_tsc_stable_bit(void)
+{
+ struct pvclock_vcpu_time_info pvti;
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+ int ret;
+
+ pr_info("Testing PVCLOCK_TSC_STABLE_BIT with matched/unmatched TSCs\n");
+
+ vm = vm_create_with_vcpus(2, guest_code_stable_bit, vcpus);
+ configure_pvclock(vm);
+
+ /*
+ * Case 1: All TSCs matched (same frequency and offset).
+ * Master clock should be active, PVCLOCK_TSC_STABLE_BIT set.
+ */
+ run_vcpu_once(vcpus[0]);
+
+ ret = __vcpu_ioctl(vcpus[0], KVM_GET_CLOCK_GUEST, &pvti);
+ TEST_ASSERT(!ret, "GET_CLOCK_GUEST should succeed with matched TSCs");
+ TEST_ASSERT(pvti.flags & PVCLOCK_TSC_STABLE_BIT,
+ "PVCLOCK_TSC_STABLE_BIT should be set with matched TSCs");
+
+ /*
+ * Case 2: Different TSC offset, same frequency.
+ * Master clock should still be active (frequency matches), but
+ * PVCLOCK_TSC_STABLE_BIT should be cleared (offsets differ).
+ */
+ set_tsc_offset(vcpus[1], 12345678);
+ run_vcpu_once(vcpus[1]);
+ run_vcpu_once(vcpus[0]);
+
+ ret = __vcpu_ioctl(vcpus[0], KVM_GET_CLOCK_GUEST, &pvti);
+ if (ret) {
+ /* Master clock disabled by offset mismatch — old kernel */
+ pr_info(" Skipping offset tests (master clock requires matched
offsets)\n");
+ goto out_stable;
+ }
+ TEST_ASSERT(!(pvti.flags & PVCLOCK_TSC_STABLE_BIT),
+ "PVCLOCK_TSC_STABLE_BIT should be clear with
offset-mismatched TSCs");
+
+ /*
+ * Case 3: Different TSC frequency.
+ * Master clock should be disabled entirely.
+ */
+ vcpu_ioctl(vcpus[1], KVM_SET_TSC_KHZ,
+ (void *)(unsigned long)(__vcpu_ioctl(vcpus[1],
KVM_GET_TSC_KHZ, NULL) / 2));
+ /* Write TSC to trigger kvm_synchronize_tsc / kvm_track_tsc_matching */
+ vcpu_set_msr(vcpus[1], MSR_IA32_TSC, 0);
+ run_vcpu_once(vcpus[1]);
+
+ ret = __vcpu_ioctl(vcpus[0], KVM_GET_CLOCK_GUEST, &pvti);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "GET_CLOCK_GUEST should fail with frequency-mismatched
TSCs, got %d (errno %d)",
+ ret, errno);
+
+out_stable:
+ kvm_vm_release(vm);
+}
+
+static void test_clock_guest_with_offsets(void)
+{
+ struct pvclock_vcpu_time_info pvti0, pvti1, pvti1_after;
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+ int64_t delta;
+ int ret;
+
+ pr_info("Testing KVM_[GS]ET_CLOCK_GUEST with different TSC offsets\n");
+
+ vm = vm_create_with_vcpus(2, guest_code_stable_bit, vcpus);
+ configure_pvclock(vm);
+
+ /* Set different TSC offsets on the two vCPUs */
+ set_tsc_offset(vcpus[0], 0);
+ set_tsc_offset(vcpus[1], 1000000000ull);
+
+ /* Run both to establish kvmclock */
+ run_vcpu_once(vcpus[0]);
+ run_vcpu_once(vcpus[1]);
+
+ /* GET_CLOCK_GUEST on both — should succeed (master clock active) */
+ ret = __vcpu_ioctl(vcpus[0], KVM_GET_CLOCK_GUEST, &pvti0);
+ if (ret) {
+ pr_info(" Skipping (master clock requires matched offsets on
this kernel)\n");
+ kvm_vm_release(vm);
+ return;
+ }
+ ret = __vcpu_ioctl(vcpus[1], KVM_GET_CLOCK_GUEST, &pvti1);
+ TEST_ASSERT(!ret, "GET_CLOCK_GUEST on vcpu1 failed");
+
+ /* The tsc_timestamps should differ (different offsets) */
+ TEST_ASSERT(pvti0.tsc_timestamp != pvti1.tsc_timestamp,
+ "tsc_timestamps should differ with different offsets");
+
+ /* Sleep to let time elapse, then restore vcpu0's clock */
+ sleep(1);
+ vcpu_ioctl(vcpus[0], KVM_SET_CLOCK_GUEST, &pvti0);
+
+ /* Run vcpu0 to process the clock update */
+ run_vcpu_once(vcpus[0]);
+
+ /* GET_CLOCK_GUEST on vcpu1 — should reflect the correction */
+ ret = __vcpu_ioctl(vcpus[1], KVM_GET_CLOCK_GUEST, &pvti1_after);
+ TEST_ASSERT(!ret, "GET_CLOCK_GUEST on vcpu1 after SET failed");
+
+ /*
+ * After SET on vcpu0, verify the correction worked by getting
+ * the clock on vcpu0 again. The mul/shift should be the same,
+ * and computing kvmclock at the same TSC should give the same
+ * result as the original (within ±2ns).
+ */
+ {
+ struct pvclock_vcpu_time_info pvti0_after;
+ uint64_t tsc_now, clk_from_old, clk_from_new;
+
+ ret = __vcpu_ioctl(vcpus[0], KVM_GET_CLOCK_GUEST, &pvti0_after);
+ TEST_ASSERT(!ret, "GET_CLOCK_GUEST on vcpu0 after SET failed");
+
+ tsc_now = pvti0_after.tsc_timestamp;
+ clk_from_old = pvclock_read_cycles(&pvti0, tsc_now);
+ clk_from_new = pvclock_read_cycles(&pvti0_after, tsc_now);
+
+ delta = (int64_t)clk_from_new - (int64_t)clk_from_old;
+ TEST_ASSERT(delta >= -2 && delta <= 2,
+ "clock correction delta should be <=2ns, got %ld
ns",
+ delta);
+ }
+
+ /*
+ * Also verify that vcpu1's clock is still accessible (master
+ * clock still active with different offsets).
+ */
+ ret = __vcpu_ioctl(vcpus[1], KVM_GET_CLOCK_GUEST, &pvti1_after);
+ TEST_ASSERT(!ret, "GET_CLOCK_GUEST on vcpu1 after SET failed");
+
+ kvm_vm_release(vm);
+}
--
2.51.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |