|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v1.1 5/5] tests: Introduce a TSX test
On Mon, 2021-06-14 at 11:47 +0100, Andrew Cooper wrote:
> See the comment at the top of test-tsx.c for details.
>
> This covers various complexities encountered while trying to address
> the
> recent TSX deprecation on client parts.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> CC: Wei Liu <wl@xxxxxxx>
>
> v1.1:
> * Set alternative guest policy, and check.
> * Cope with !HAP configurations.
> * Complete the comment at the top of test-tsx.c
> ---
> tools/tests/Makefile | 1 +
> tools/tests/tsx/.gitignore | 1 +
> tools/tests/tsx/Makefile | 43 ++++
> tools/tests/tsx/test-tsx.c | 515
> +++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 560 insertions(+)
> create mode 100644 tools/tests/tsx/.gitignore
> create mode 100644 tools/tests/tsx/Makefile
> create mode 100644 tools/tests/tsx/test-tsx.c
>
> diff --git a/tools/tests/Makefile b/tools/tests/Makefile
> index 8746aabe6b..25531a984a 100644
> --- a/tools/tests/Makefile
> +++ b/tools/tests/Makefile
> @@ -5,6 +5,7 @@ SUBDIRS-y :=
> SUBDIRS-y += resource
> SUBDIRS-$(CONFIG_X86) += cpu-policy
> SUBDIRS-$(CONFIG_X86) += mce-test
> +SUBDIRS-$(CONFIG_X86) += tsx
> ifneq ($(clang),y)
> SUBDIRS-$(CONFIG_X86) += x86_emulator
> endif
> diff --git a/tools/tests/tsx/.gitignore b/tools/tests/tsx/.gitignore
> new file mode 100644
> index 0000000000..97ec4db7ff
> --- /dev/null
> +++ b/tools/tests/tsx/.gitignore
> @@ -0,0 +1 @@
> +test-tsx
> diff --git a/tools/tests/tsx/Makefile b/tools/tests/tsx/Makefile
> new file mode 100644
> index 0000000000..7381a4f5a4
> --- /dev/null
> +++ b/tools/tests/tsx/Makefile
> @@ -0,0 +1,43 @@
> +XEN_ROOT = $(CURDIR)/../../..
> +include $(XEN_ROOT)/tools/Rules.mk
> +
> +TARGET := test-tsx
> +
> +.PHONY: all
> +all: $(TARGET)
> +
> +.PHONY: run
> +run: $(TARGET)
> + ./$(TARGET)
> +
> +.PHONY: clean
> +clean:
> + $(RM) -f -- *.o $(TARGET) $(DEPS_RM)
> +
> +.PHONY: distclean
> +distclean: clean
> + $(RM) -f -- *~
> +
> +.PHONY: install
> +install: all
> +
> +.PHONY: uninstall
> +uninstall:
> +
> +CFLAGS += -Werror -std=gnu11
> +CFLAGS += $(CFLAGS_xeninclude)
> +CFLAGS += $(CFLAGS_libxenctrl)
> +CFLAGS += $(CFLAGS_libxenguest)
> +CFLAGS += -I$(XEN_ROOT)/tools/libs/ctrl
> -I$(XEN_ROOT)/tools/libs/guest
> +CFLAGS += $(APPEND_CFLAGS)
> +
> +LDFLAGS += $(LDLIBS_libxenctrl)
> +LDFLAGS += $(LDLIBS_libxenguest)
> +LDFLAGS += $(APPEND_LDFLAGS)
> +
> +test-tsx.o: Makefile
> +
> +test-tsx: test-tsx.o
> + $(CC) -o $@ $< $(LDFLAGS)
> +
> +-include $(DEPS_INCLUDE)
> diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c
> new file mode 100644
> index 0000000000..036b36e797
> --- /dev/null
> +++ b/tools/tests/tsx/test-tsx.c
> @@ -0,0 +1,515 @@
> +/*
> + * TSX settings and consistency tests
> + *
> + * This tests various behaviours and invariants with regards to
> TSX. It
> + * ideally wants running for several microcode versions, and all
> applicable
> + * tsx= commandline settings, on a single CPU, including after an S3
> + * suspend/resume event.
> + *
> + * It tests specifically:
> + * - The consistency of MSR_TSX_CTRL/MSR_TSX_FORCE_ABORT values
> across the
> + * system, and their accessibility WRT data in the host CPU
> policy.
> + * - The actual behaviour of RTM on the system.
> + * - Cross-check the default/max policies based on the actual RTM
> behaviour.
> + * - Create some guests, check their defaults, and check that the
> defaults
> + * can be changed.
> + */
> +
> +#define _GNU_SOURCE
> +
> +#include <err.h>
> +#include <errno.h>
> +#include <inttypes.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/ucontext.h>
> +
> +#include <xenctrl.h>
> +#include <xenguest.h>
> +#include <xen-tools/libs.h>
> +
> +#include "xg_private.h"
> +
> +enum {
> +#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
> +#include <xen/arch-x86/cpufeatureset.h>
> +};
> +#define bitmaskof(idx) (1u << ((idx) & 31))
> +
> +#define MSR_ARCH_CAPABILITIES 0x0000010a
> +#define ARCH_CAPS_TSX_CTRL (1 << 7)
> +#define MSR_TSX_FORCE_ABORT 0x0000010f
> +#define MSR_TSX_CTRL 0x00000122
> +
> +static unsigned int nr_failures;
> +#define fail(fmt, ...) \
> +({ \
> + nr_failures++; \
> + (void)printf(fmt, ##__VA_ARGS__); \
> +})
> +
> +static xc_interface *xch;
> +
> +/*
> + * Policies, arranged as an array for easy collection of all of
> them. We
> + * don't care about the raw policy (index 0) so reuse that for the
> guest
> + * policy.
> + */
> +static struct xc_cpu_policy policies[6];
> +#define guest_policy policies[0]
> +#define host policies[XEN_SYSCTL_cpu_policy_host]
> +#define pv_max policies[XEN_SYSCTL_cpu_policy_pv_max]
> +#define hvm_max policies[XEN_SYSCTL_cpu_policy_hvm_max]
> +#define pv_default policies[XEN_SYSCTL_cpu_policy_pv_default]
> +#define hvm_default policies[XEN_SYSCTL_cpu_policy_hvm_default]
> +
> +static bool xen_has_pv = true, xen_has_hvm = true;
> +
> +static xc_physinfo_t physinfo;
> +
> +static enum rtm_behaviour {
> + RTM_UD,
> + RTM_OK,
> + RTM_ABORT,
> +} rtm_behaviour;
> +
> +/*
> + * Test a specific TSX MSR for consistency across the system, taking
> into
> + * account whether it ought to be accessable or not.
> + *
> + * We can't query offline CPUs, so skip those if encountered. We
> don't care
> + * particularly for the exact MSR value, but we do care that it is
> the same
> + * everywhere.
> + */
> +static void test_tsx_msr_consistency(unsigned int msr, bool
> accessable)
> +{
> + uint64_t cpu0_val = ~0;
> +
> + for ( unsigned int cpu = 0; cpu <= physinfo.max_cpu_id; ++cpu )
> + {
> + xc_resource_entry_t ent = {
> + .u.cmd = XEN_RESOURCE_OP_MSR_READ,
> + .idx = msr,
> + };
> + xc_resource_op_t op = {
> + .cpu = cpu,
> + .entries = &ent,
> + .nr_entries = 1,
> + };
> + int rc = xc_resource_op(xch, 1, &op);
> +
> + if ( rc < 0 )
> + {
> + /* Don't emit a message for offline CPUs */
> + if ( errno != ENODEV )
> + fail(" xc_resource_op() for CPU%u failed: rc %d,
> errno %d - %s\n",
> + cpu, rc, errno, strerror(errno));
> + continue;
> + }
> +
> + if ( accessable )
> + {
> + if ( rc != 1 )
> + {
> + fail(" Expected 1 result, got %u\n", rc);
> + continue;
> + }
> + if ( ent.u.ret != 0 )
> + {
> + fail(" Expected ok, got %d\n", ent.u.ret);
> + continue;
> + }
> + }
> + else
> + {
> + if ( rc != 0 )
> + fail(" Expected 0 results, got %u\n", rc);
> + else if ( ent.u.ret != -EPERM )
> + fail(" Expected -EPERM, got %d\n", ent.u.ret);
> + continue;
> + }
> +
> + if ( cpu == 0 )
> + {
> + cpu0_val = ent.val;
> + printf(" CPU0 val %#"PRIx64"\n", cpu0_val);
> + }
> + else if ( ent.val != cpu0_val )
> + fail(" CPU%u val %#"PRIx64" differes from CPU0
> %#"PRIx64"\n",
Typo: differs?
> + cpu, ent.val, cpu0_val);
> + }
> +}
> +
> +/*
> + * Check all TSX MSRs, and in particular that their accessibility
> matches what
> + * is expressed in the host CPU policy.
> + */
> +static void test_tsx_msrs(void)
> +{
> + printf("Testing MSR_TSX_FORCE_ABORT consistency\n");
> + test_tsx_msr_consistency(
> + MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort);
> +
> + printf("Testing MSR_TSX_CTRL consistency\n");
> + test_tsx_msr_consistency(
> + MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl);
> +}
This is great, could we extend the test to all MSRs that Xen knows
about and are expected to be identical? Particularly
MSR_SPEC_CTRL, MSR_MCU_OPT_CTRL, and I see some MSRs used for errata
workarounds like MSR_MCU_OPT_CTRL, possiblye more.
> +
> +/*
> + * Probe for how RTM behaves, deliberately not inspecting CPUID.
> + * Distinguishes between "no support at all" (i.e. XBEGIN suffers
> #UD),
> + * working ok, and appearing to always abort.
> + */
> +static enum rtm_behaviour probe_rtm_behaviour(void)
> +{
> + for ( int i = 0; i < 1000; ++i )
> + {
> + /*
> + * Opencoding the RTM infrastructure from immintrin.h,
> because we
> + * still support older versions of GCC. ALso so we can
> include #UD
> + * detection logic.
> + */
> +#define XBEGIN_STARTED -1
> +#define XBEGIN_UD -2
> + unsigned int status = XBEGIN_STARTED;
> +
> + asm volatile (".Lxbegin: .byte 0xc7,0xf8,0,0,0,0" /* XBEGIN
> 1f; 1: */
> + : "+a" (status) :: "memory");
> + if ( status == XBEGIN_STARTED )
> + {
> + asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); /*
> XEND */
> + return RTM_OK;
> + }
> + else if ( status == XBEGIN_UD )
> + return RTM_UD;
> + }
> +
> + return RTM_ABORT;
> +}
> +
> +static struct sigaction old_sigill;
> +
> +static void sigill_handler(int signo, siginfo_t *info, void *extra)
> +{
> + extern char xbegin_label[] asm(".Lxbegin");
> +
> + if ( info->si_addr == xbegin_label ||
> + memcmp(info->si_addr, "\xc7\xf8\x00\x00\x00\x00", 6) == 0 )
> + {
> + ucontext_t *context = extra;
> +
> + /*
> + * Found the XBEGIN instruction. Step over it, and update
> `status` to
> + * signal #UD.
> + */
> +#ifdef __x86_64__
> + context->uc_mcontext.gregs[REG_RIP] += 6;
> + context->uc_mcontext.gregs[REG_RAX] = XBEGIN_UD;
> +#else
> + context->uc_mcontext.gregs[REG_EIP] += 6;
> + context->uc_mcontext.gregs[REG_EAX] = XBEGIN_UD;
> +#endif
> + }
> + else
> + {
> + /*
> + * Not the SIGILL we're looking for... Restore the old
> handler and
> + * try again. Will likely coredump as a result.
> + */
> + sigaction(SIGILL, &old_sigill, NULL);
> + }
> +}
> +
> +static void test_rtm_behaviour(void)
> +{
> + struct sigaction new_sigill = {
> + .sa_flags = SA_SIGINFO,
> + .sa_sigaction = sigill_handler,
> + };
> + const char *str;
> +
> + printf("Testing RTM behaviour\n");
> +
> + /*
> + * Install a custom SIGILL handler while probing for RTM
> behaviour, as the
> + * XBEGIN instruction might suffer #UD.
> + */
> + sigaction(SIGILL, &new_sigill, &old_sigill);
> + rtm_behaviour = probe_rtm_behaviour();
> + sigaction(SIGILL, &old_sigill, NULL);
> +
> + switch ( rtm_behaviour )
> + {
> + case RTM_UD: str = "#UD"; break;
> + case RTM_OK: str = "OK"; break;
> + case RTM_ABORT: str = "Abort"; break;
> + default: str = NULL; break;
> + }
> +
> + if ( str )
> + printf(" Got %s\n", str);
> + else
> + return fail(" Got unexpected behaviour %d\n",
> rtm_behaviour);
> +
> + if ( host.cpuid.feat.rtm )
> + {
> + if ( rtm_behaviour == RTM_UD )
> + fail(" Host reports RTM, but appears unavailable\n");
> + }
> + else
> + {
> + if ( rtm_behaviour != RTM_UD )
> + fail(" Host reports no RTM, but appears available\n");
> + }
> +}
> +
> +static void dump_tsx_details(const struct xc_cpu_policy *p, const
> char *pref)
> +{
> + printf(" %s RTM %u, HLE %u, TSX_FORCE_ABORT %u,
> RTM_ALWAYS_ABORT %u, TSX_CTRL %u\n",
> + pref,
> + p->cpuid.feat.rtm,
> + p->cpuid.feat.hle,
> + p->cpuid.feat.tsx_force_abort,
> + p->cpuid.feat.rtm_always_abort,
> + p->msr.arch_caps.tsx_ctrl);
> +}
> +
> +/* Sanity test various invariants we expect in the default/max
> policies. */
> +static void test_guest_policies(const struct xc_cpu_policy *max,
> + const struct xc_cpu_policy *def)
> +{
> + const struct cpuid_policy *cm = &max->cpuid;
> + const struct cpuid_policy *cd = &def->cpuid;
> + const struct msr_policy *mm = &max->msr;
> + const struct msr_policy *md = &def->msr;
> +
> + dump_tsx_details(max, "Max:");
> + dump_tsx_details(def, "Def:");
> +
> + if ( ((cm->feat.raw[0].d | cd->feat.raw[0].d) &
> + (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) |
> + bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT))) ||
> + ((mm->arch_caps.raw | md->arch_caps.raw) &
> ARCH_CAPS_TSX_CTRL) )
> + fail(" Xen-only TSX controls offered to guest\n");
> +
> + switch ( rtm_behaviour )
> + {
> + case RTM_UD:
> + if ( (cm->feat.raw[0].b | cd->feat.raw[0].b) &
> + (bitmaskof(X86_FEATURE_HLE) |
> bitmaskof(X86_FEATURE_RTM)) )
> + fail(" HLE/RTM offered to guests despite not being
> available\n");
> + break;
> +
> + case RTM_ABORT:
> + if ( cd->feat.raw[0].b &
> + (bitmaskof(X86_FEATURE_HLE) |
> bitmaskof(X86_FEATURE_RTM)) )
> + fail(" HLE/RTM offered to guests by default despite
> not being usable\n");
> + break;
> +
> + case RTM_OK:
> + if ( !cm->feat.rtm || !cd->feat.rtm )
> + fail(" RTM not offered to guests despite being
> available\n");
> + break;
> + }
> +
> + if ( cd->feat.hle )
> + fail(" Fail: HLE offered in default policy\n");
> +}
> +
> +static void test_def_max_policies(void)
> +{
> + if ( xen_has_pv )
> + {
> + printf("Testing PV default/max policies\n");
> + test_guest_policies(&pv_max, &pv_default);
> + }
> +
> + if ( xen_has_hvm )
> + {
> + printf("Testing HVM default/max policies\n");
> + test_guest_policies(&hvm_max, &hvm_default);
> + }
> +}
> +
> +static void test_guest(struct xen_domctl_createdomain *c)
> +{
> + uint32_t domid = 0;
> + int rc;
> +
> + rc = xc_domain_create(xch, &domid, c);
> + if ( rc )
> + return fail(" Domain create failure: %d - %s\n",
> + errno, strerror(errno));
> +
> + printf(" Created d%u\n", domid);
> +
> + rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
> + if ( rc )
> + {
> + fail(" Failed to obtain domain policy: %d - %s\n",
> + errno, strerror(errno));
> + goto out;
> + }
> +
> + dump_tsx_details(&guest_policy, "Cur:");
> +
> + /*
> + * Check defaults given to the guest.
> + */
> + if ( guest_policy.cpuid.feat.rtm != (rtm_behaviour == RTM_OK) )
> + fail(" RTM %u in guest, despite rtm behaviour\n",
> + guest_policy.cpuid.feat.rtm);
> +
> + if ( guest_policy.cpuid.feat.hle ||
> + guest_policy.cpuid.feat.tsx_force_abort ||
> + guest_policy.cpuid.feat.rtm_always_abort ||
> + guest_policy.msr.arch_caps.tsx_ctrl )
> + fail(" Unexpected features advertised\n");
> +
> + if ( host.cpuid.feat.rtm )
> + {
> + unsigned int _7b0;
> +
> + /*
> + * If host RTM is available, all combinations of guest flags
> should be
> + * possible. Flip both HLE/RTM to check non-default
> settings.
> + */
> + _7b0 = (guest_policy.cpuid.feat.raw[0].b ^=
> + (bitmaskof(X86_FEATURE_HLE) |
> bitmaskof(X86_FEATURE_RTM)));
> +
> + /* Set the new policy. */
> + rc = xc_cpu_policy_set_domain(xch, domid, &guest_policy);
> + if ( rc )
> + {
> + fail(" Failed to set domain policy: %d - %s\n",
> + errno, strerror(errno));
> + goto out;
> + }
> +
> + /* Re-get the new policy. */
> + rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
> + if ( rc )
> + {
> + fail(" Failed to obtain domain policy: %d - %s\n",
> + errno, strerror(errno));
> + goto out;
> + }
> +
> + dump_tsx_details(&guest_policy, "Cur:");
> +
> + if ( guest_policy.cpuid.feat.raw[0].b != _7b0 )
> + {
> + fail(" Expected CPUID.7[1].b 0x%08x differes from
> actual 0x%08x\n",
> + _7b0, guest_policy.cpuid.feat.raw[0].b);
> + goto out;
> + }
> + }
> +
> + out:
> + rc = xc_domain_destroy(xch, domid);
> + if ( rc )
> + fail(" Failed to destroy domain: %d - %s\n",
> + errno, strerror(errno));
> +}
> +
> +static void test_guests(void)
> +{
> + if ( xen_has_pv )
> + {
> + struct xen_domctl_createdomain c = {
> + .max_vcpus = 1,
> + .max_grant_frames = 1,
> + };
> +
> + printf("Testing PV guest\n");
> + test_guest(&c);
> + }
> +
> + if ( xen_has_hvm )
> + {
> + struct xen_domctl_createdomain c = {
> + .flags = XEN_DOMCTL_CDF_hvm,
> + .max_vcpus = 1,
> + .max_grant_frames = 1,
> + .arch = {
> + .emulation_flags = XEN_X86_EMU_LAPIC,
> + },
> + };
> +
> + if ( physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hap )
> + c.flags |= XEN_DOMCTL_CDF_hap;
> + else if ( !(physinfo.capabilities &
> XEN_SYSCTL_PHYSCAP_shadow) )
> + return fail(" HVM available, but neither HAP nor
> Shadow\n");
> +
> + printf("Testing HVM guest\n");
> + test_guest(&c);
> + }
> +}
> +
> +/* Obtain some general data, then run the tests. */
> +static void test_tsx(void)
> +{
> + int rc;
> +
> + /* Read all policies except raw. */
> + for ( int i = XEN_SYSCTL_cpu_policy_host;
> + i <= XEN_SYSCTL_cpu_policy_hvm_default; ++i )
> + {
> + rc = xc_cpu_policy_get_system(xch, i, &policies[i]);
> +
> + if ( rc == -1 && errno == EOPNOTSUPP )
> + {
> + /*
> + * Use EOPNOTSUPP to spot Xen missing CONFIG_{PV,HVM},
> and adjust
> + * later testing accordingly.
> + */
> + switch ( i )
> + {
> + case XEN_SYSCTL_cpu_policy_pv_max:
> + case XEN_SYSCTL_cpu_policy_pv_default:
> + if ( xen_has_pv )
> + printf(" Xen doesn't support PV\n");
> + xen_has_pv = false;
> + continue;
> +
> + case XEN_SYSCTL_cpu_policy_hvm_max:
> + case XEN_SYSCTL_cpu_policy_hvm_default:
> + if ( xen_has_hvm )
> + printf(" Xen doesn't support HVM\n");
> + xen_has_hvm = false;
> + continue;
> + }
> + }
> + if ( rc )
> + return fail("Failed to obtain policy[%u]: %d - %s\n",
> + i, errno, strerror(errno));
> + }
> +
> + rc = xc_physinfo(xch, &physinfo);
> + if ( rc )
> + return fail("Failed to obtain physinfo: %d - %s\n",
> + errno, strerror(errno));
> +
> + printf(" Got %u CPUs\n", physinfo.max_cpu_id + 1);
> +
> + test_tsx_msrs();
> + test_rtm_behaviour();
> + test_def_max_policies();
> + test_guests();
> +}
> +
> +int main(int argc, char **argv)
> +{
> + printf("TSX tests\n");
> +
> + xch = xc_interface_open(NULL, NULL, 0);
> +
> + if ( !xch )
> + err(1, "xc_interface_open");
> +
> + test_tsx();
> +
> + return !!nr_failures;
> +}
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |