[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v3 12/22] x86/CPUID: enable AMX leaves
This requires bumping the number of basic leaves we support. Apart from this the logic is modeled as closely as possible to that of leaf 7 handling. The checks in x86_cpu_policies_are_compatible() may be more strict than they ultimately need to be, but I'd rather start being on the safe side. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v2: New. --- It's not clear to me in how far libxl_cpuid.c would want extending: It doesn't look to offer a way to override the maximum subleaf of leaf 7. In fact I can't seem to be able to spot a max extended leaf override mechanism either. --- a/tools/tests/cpu-policy/test-cpu-policy.c +++ b/tools/tests/cpu-policy/test-cpu-policy.c @@ -190,6 +190,40 @@ static void test_cpuid_serialise_success }, .nr_leaves = 4 + 0xd + 1 + 1, }, + + /* Leaf 0x1d serialisation stops at max_palette. */ + { + .name = "empty leaf 0x1d", + .p = { + .basic.max_leaf = 0x1d, + }, + .nr_leaves = 4 + 0x1d + 1, + }, + { + .name = "partial leaf 0x1d", + .p = { + .basic.max_leaf = 0x1d, + .tile.max_palette = 1, + }, + .nr_leaves = 4 + 0x1d + 1 + 1, + }, + + /* Leaf 0x1e serialisation stops at 0. */ + { + .name = "empty leaf 0x1e", + .p = { + .basic.max_leaf = 0x1e, + }, + .nr_leaves = 4 + 0x1e + 1, + }, + { + .name = "partial leaf 0x1e", + .p = { + .basic.max_leaf = 0x1e, + .tmul.maxk = 16, + }, + .nr_leaves = 4 + 0x1e + 1, + }, }; printf("Testing CPUID serialise success:\n"); @@ -321,6 +355,14 @@ static void test_cpuid_deserialise_failu .leaf = { .leaf = 0xd, .subleaf = CPUID_GUEST_NR_XSTATE }, }, { + .name = "OoB tile leaf", + .leaf = { .leaf = 0x1d, .subleaf = CPUID_GUEST_NR_PALETTE }, + }, + { + .name = "OoB tmul leaf", + .leaf = { .leaf = 0x1e, .subleaf = CPUID_GUEST_NR_TMUL }, + }, + { .name = "OoB extd leaf", .leaf = { .leaf = 0x80000000 | CPUID_GUEST_NR_EXTD }, }, @@ -432,6 +474,8 @@ static void test_cpuid_out_of_range_clea .topo.raw[0].a = 0xc2, .xstate.raw[0].a = 0xc2, .xstate.raw[1].a = 0xc2, + .tile.raw[0].a = 0xc2, + .tmul.raw[0].a = 0xc2, }, }, { @@ -447,6 +491,8 @@ static void test_cpuid_out_of_range_clea .topo.raw[0].a = 0xc2, .xstate.raw[0].a = 0xc2, .xstate.raw[1].a = 0xc2, + .tile.raw[0].a = 0xc2, + .tmul.raw[0].a = 0xc2, }, }, { @@ -461,6 +507,8 @@ static void test_cpuid_out_of_range_clea .topo.raw[0].a = 0xc2, .xstate.raw[0].a = 0xc2, .xstate.raw[1].a = 0xc2, + .tile.raw[0].a = 0xc2, + .tmul.raw[0].a = 0xc2, }, }, { @@ -474,6 +522,8 @@ static void test_cpuid_out_of_range_clea .topo.raw[1].b = 0xc2, .xstate.raw[0].a = 0xc2, .xstate.raw[1].a = 0xc2, + .tile.raw[0].a = 0xc2, + .tmul.raw[0].a = 0xc2, }, }, { @@ -488,6 +538,8 @@ static void test_cpuid_out_of_range_clea .xstate.raw[2].b = 0xc2, .xstate.raw[3].b = 0xc2, + .tile.raw[0].a = 0xc2, + .tmul.raw[0].a = 0xc2, }, }, { @@ -530,6 +582,34 @@ static void test_cpuid_out_of_range_clea }, }, { + .name = "tile no palette", + .nr_markers = 0, + .p = { + /* First two subleaves invalid as a pair. Others cleared. */ + .basic.max_leaf = 0x1d, + .xstate.xcr0_low = XSTATE_FP_SSE, + + .tile.raw[0].a = 0xc2, + .tile.raw[1].b = 0xc2, + .tmul.raw[0].a = 0xc2, + }, + }, + { + .name = "tile palette 1", + .nr_markers = 1, + .p = { + /* First two subleaves valid as a pair. Others cleared. */ + .basic.max_leaf = 0x1d, + .feat.amx_tile = 1, + .xstate.xcr0_low = XSTATE_FP_SSE | X86_XCR0_TILECFG | + X86_XCR0_TILEDATA, + .tile.raw[0].a = 1, + .tile.raw[1].b = 0xc2, + + .tmul.raw[0].a = 0xc2, + }, + }, + { .name = "extd", .nr_markers = 1, .p = { @@ -624,6 +704,24 @@ static void test_cpuid_maximum_leaf_shri }, }, { + .name = "tile", + .p = { + /* Subleaf 1 only with some valid value. */ + .basic.max_leaf = 0x1d, + .tile.raw[0].a = 1, + .tile.raw[1].a = 1024, + }, + }, + { + .name = "tmul", + .p = { + /* Subleaf 0 only with some valid values. */ + .basic.max_leaf = 0x1e, + .tmul.maxk = 16, + .tmul.maxn = 16, + }, + }, + { .name = "extd", .p = { /* Commonly available information only. */ @@ -643,6 +741,7 @@ static void test_cpuid_maximum_leaf_shri p->basic.max_leaf = ARRAY_SIZE(p->basic.raw) - 1; p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1; + p->tile.max_palette = ARRAY_SIZE(p->tile.raw) - 1; p->extd.max_leaf = 0x80000000 | (ARRAY_SIZE(p->extd.raw) - 1); x86_cpuid_policy_shrink_max_leaves(p); @@ -660,6 +759,10 @@ static void test_cpuid_maximum_leaf_shri fail(" Test %s feat fail - expected %#x, got %#x\n", t->name, t->p.feat.max_subleaf, p->feat.max_subleaf); + if ( p->tile.max_palette != t->p.tile.max_palette ) + fail(" Test %s tile fail - expected %#x, got %#x\n", + t->name, t->p.tile.max_palette, p->tile.max_palette); + free(p); } } --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -233,6 +233,29 @@ static void recalculate_xstate(struct cp } } +static void recalculate_tile(struct cpuid_policy *p) +{ + unsigned int i; + + if ( !p->feat.amx_tile ) + { + memset(&p->tile, 0, sizeof(p->tile)); + memset(&p->tmul, 0, sizeof(p->tmul)); + return; + } + + p->tile.raw[0].b = p->tile.raw[0].c = p->tile.raw[0].d = 0; + + for ( i = 1; i <= p->tile.max_palette; ++i ) + { + p->tile.raw[i].c &= 0x0000ffff; + p->tile.raw[i].d = 0; + } + + p->tmul.raw[0].a = p->tmul.raw[0].c = p->tmul.raw[0].d = 0; + p->tmul.raw[0].b &= 0x00ffffff; +} + /* * Misc adjustments to the policy. Mostly clobbering reserved fields and * duplicating shared fields. Intentionally hidden fields are annotated. @@ -252,6 +275,8 @@ static void recalculate_misc(struct cpui p->basic.raw[0xc] = EMPTY_LEAF; + zero_leaves(p->basic.raw, 0xe, 0x1c); + p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; /* Most of Power/RAS hidden from guests. */ @@ -326,6 +351,7 @@ static void __init calculate_host_policy cpuid_featureset_to_policy(boot_cpu_data.x86_capability, p); recalculate_xstate(p); + recalculate_tile(p); recalculate_misc(p); /* When vPMU is disabled, drop it from the host policy. */ @@ -413,6 +439,7 @@ static void __init calculate_pv_max_poli sanitise_featureset(pv_featureset); cpuid_featureset_to_policy(pv_featureset, p); recalculate_xstate(p); + recalculate_tile(p); p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ @@ -437,6 +464,7 @@ static void __init calculate_pv_def_poli sanitise_featureset(pv_featureset); cpuid_featureset_to_policy(pv_featureset, p); recalculate_xstate(p); + recalculate_tile(p); x86_cpuid_policy_shrink_max_leaves(p); } @@ -504,6 +532,7 @@ static void __init calculate_hvm_max_pol sanitise_featureset(hvm_featureset); cpuid_featureset_to_policy(hvm_featureset, p); recalculate_xstate(p); + recalculate_tile(p); x86_cpuid_policy_shrink_max_leaves(p); } @@ -530,6 +559,7 @@ static void __init calculate_hvm_def_pol sanitise_featureset(hvm_featureset); cpuid_featureset_to_policy(hvm_featureset, p); recalculate_xstate(p); + recalculate_tile(p); x86_cpuid_policy_shrink_max_leaves(p); } @@ -600,6 +630,7 @@ void recalculate_cpuid_policy(struct dom p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf); p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf); + p->tile.max_palette = min(p->tile.max_palette, max->tile.max_palette); p->extd.max_leaf = 0x80000000 | min(p->extd.max_leaf & 0xffff, ((p->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) @@ -690,6 +721,7 @@ void recalculate_cpuid_policy(struct dom p->extd.maxlinaddr = p->extd.lm ? 48 : 32; recalculate_xstate(p); + recalculate_tile(p); recalculate_misc(p); for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) @@ -812,6 +844,22 @@ void guest_cpuid(const struct vcpu *v, u *res = array_access_nospec(p->xstate.raw, subleaf); break; + case 0x1d: + ASSERT(p->tile.max_palette < ARRAY_SIZE(p->tile.raw)); + if ( subleaf > min_t(uint32_t, p->tile.max_palette, + ARRAY_SIZE(p->tile.raw) - 1) ) + return; + + *res = array_access_nospec(p->tile.raw, subleaf); + break; + + case 0x1e: + if ( subleaf >= ARRAY_SIZE(p->tmul.raw) ) + return; + + *res = array_access_nospec(p->tmul.raw, subleaf); + break; + default: *res = array_access_nospec(p->basic.raw, leaf); break; @@ -1145,6 +1193,8 @@ static void __init __maybe_unused build_ sizeof(raw_cpuid_policy.feat.raw)); BUILD_BUG_ON(sizeof(raw_cpuid_policy.xstate) != sizeof(raw_cpuid_policy.xstate.raw)); + BUILD_BUG_ON(sizeof(raw_cpuid_policy.tile) != + sizeof(raw_cpuid_policy.tile.raw)); BUILD_BUG_ON(sizeof(raw_cpuid_policy.extd) != sizeof(raw_cpuid_policy.extd.raw)); } --- a/xen/include/xen/lib/x86/cpuid.h +++ b/xen/include/xen/lib/x86/cpuid.h @@ -78,11 +78,13 @@ unsigned int x86_cpuid_lookup_vendor(uin */ const char *x86_cpuid_vendor_to_str(unsigned int vendor); -#define CPUID_GUEST_NR_BASIC (0xdu + 1) +#define CPUID_GUEST_NR_BASIC (0x1eu + 1) #define CPUID_GUEST_NR_CACHE (5u + 1) #define CPUID_GUEST_NR_FEAT (1u + 1) #define CPUID_GUEST_NR_TOPO (1u + 1) #define CPUID_GUEST_NR_XSTATE (62u + 1) +#define CPUID_GUEST_NR_PALETTE (1u + 1) +#define CPUID_GUEST_NR_TMUL (0u + 1) #define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1) #define CPUID_GUEST_NR_EXTD_AMD (0x1cu + 1) #define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \ @@ -225,6 +227,35 @@ struct cpuid_policy } comp[CPUID_GUEST_NR_XSTATE]; } xstate; + /* Structured tile information leaf: 0x00000001d[xx] */ + union { + struct cpuid_leaf raw[CPUID_GUEST_NR_PALETTE]; + struct { + /* Subleaf 0. */ + uint32_t max_palette; + uint32_t /* b */:32, /* c */:32, /* d */:32; + }; + + /* Per-palette common state. Valid for i >= 1. */ + struct { + uint16_t tot_bytes, bytes_per_tile; + uint16_t bytes_per_row, num_regs; + uint16_t max_rows, :16; + uint32_t /* d */:32; + } palette[CPUID_GUEST_NR_PALETTE]; + } tile; + + /* Structured tmul information leaf: 0x00000001e[xx] */ + union { + struct cpuid_leaf raw[CPUID_GUEST_NR_TMUL]; + struct { + /* Subleaf 0. */ + uint32_t /* a */:32; + uint32_t maxk:8, maxn:16, :8; + uint32_t /* c */:32, /* d */:32; + }; + } tmul; + /* Extended leaves: 0x800000xx */ union { struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD]; --- a/xen/lib/x86/cpuid.c +++ b/xen/lib/x86/cpuid.c @@ -170,6 +170,18 @@ void x86_cpuid_policy_fill_native(struct } } + if ( p->basic.max_leaf >= 0x1d ) + { + cpuid_count_leaf(0x1d, 0, &p->tile.raw[0]); + + for ( i = 1; i <= MIN(p->tile.max_palette, + ARRAY_SIZE(p->tile.raw) - 1); ++i ) + cpuid_count_leaf(0x1d, i, &p->tile.raw[i]); + } + + if ( p->basic.max_leaf >= 0x1e ) + cpuid_count_leaf(0x1e, 0, &p->tmul.raw[0]); + /* Extended leaves. */ cpuid_leaf(0x80000000, &p->extd.raw[0]); for ( i = 1; i <= MIN(p->extd.max_leaf & 0xffffU, @@ -232,6 +244,19 @@ void x86_cpuid_policy_clear_out_of_range ARRAY_SIZE(p->xstate.raw) - 1); } + if ( p->basic.max_leaf < 0x1d || + (cpuid_policy_xstates(p) & + (X86_XCR0_TILECFG | X86_XCR0_TILEDATA)) != + (X86_XCR0_TILECFG | X86_XCR0_TILEDATA) ) + memset(p->tile.raw, 0, sizeof(p->tile.raw)); + else + zero_leaves(p->tile.raw, p->tile.max_palette + 1, + ARRAY_SIZE(p->tile.raw) - 1); + + if ( p->basic.max_leaf < 0x1e || !p->tile.max_palette || + (!p->feat.amx_int8 && !p->feat.amx_bf16) ) + memset(p->tmul.raw, 0, sizeof(p->tmul.raw)); + zero_leaves(p->extd.raw, ((p->extd.max_leaf >> 16) == 0x8000 ? (p->extd.max_leaf & 0xffff) + 1 : 0), @@ -244,6 +269,8 @@ void x86_cpuid_policy_bound_max_leaves(s min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); p->feat.max_subleaf = min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1); + p->tile.max_palette = + min_t(uint32_t, p->tile.max_palette, ARRAY_SIZE(p->tile.raw) - 1); p->extd.max_leaf = 0x80000000 | min_t(uint32_t, p->extd.max_leaf & 0xffff, ARRAY_SIZE(p->extd.raw) - 1); } @@ -271,6 +298,21 @@ void x86_cpuid_policy_shrink_max_leaves( */ p->basic.raw[0xd] = p->xstate.raw[0]; + for ( i = p->tile.max_palette; i; --i ) + if ( p->tile.raw[i].a | p->tile.raw[i].b | + p->tile.raw[i].c | p->tile.raw[i].d ) + break; + if ( i ) + p->tile.max_palette = i; + else + { + ASSERT(!p->feat.amx_tile); + zero_leaves(p->tile.raw, 0, 0); + } + p->basic.raw[0x1d] = p->tile.raw[0]; + + p->basic.raw[0x1e] = p->tmul.raw[0]; + for ( i = p->basic.max_leaf; i; --i ) if ( p->basic.raw[i].a | p->basic.raw[i].b | p->basic.raw[i].c | p->basic.raw[i].d ) @@ -404,6 +446,19 @@ int x86_cpuid_copy_to_buffer(const struc break; } + case 0x1d: + for ( subleaf = 0; + subleaf <= MIN(p->tile.max_palette, + ARRAY_SIZE(p->tile.raw) - 1); ++subleaf ) + COPY_LEAF(leaf, subleaf, &p->tile.raw[subleaf]); + break; + + case 0x1e: + for ( subleaf = 0; + subleaf <= ARRAY_SIZE(p->tmul.raw) - 1; ++subleaf ) + COPY_LEAF(leaf, subleaf, &p->tmul.raw[subleaf]); + break; + default: COPY_LEAF(leaf, XEN_CPUID_NO_SUBLEAF, &p->basic.raw[leaf]); break; @@ -496,6 +551,20 @@ int x86_cpuid_copy_from_buffer(struct cp array_access_nospec(p->xstate.raw, data.subleaf) = l; break; + case 0x1d: + if ( data.subleaf >= ARRAY_SIZE(p->tile.raw) ) + goto out_of_range; + + array_access_nospec(p->tile.raw, data.subleaf) = l; + break; + + case 0x1e: + if ( data.subleaf >= ARRAY_SIZE(p->tmul.raw) ) + goto out_of_range; + + array_access_nospec(p->tmul.raw, data.subleaf) = l; + break; + default: if ( data.subleaf != XEN_CPUID_NO_SUBLEAF ) goto out_of_range; --- a/xen/lib/x86/policy.c +++ b/xen/lib/x86/policy.c @@ -7,6 +7,7 @@ int x86_cpu_policies_are_compatible(cons struct cpu_policy_errors *err) { struct cpu_policy_errors e = INIT_CPU_POLICY_ERRORS; + unsigned int i; int ret = -EINVAL; #define NA XEN_CPUID_NO_SUBLEAF @@ -21,6 +22,31 @@ int x86_cpu_policies_are_compatible(cons if ( guest->cpuid->feat.max_subleaf > host->cpuid->feat.max_subleaf ) FAIL_CPUID(7, 0); + if ( (guest->cpuid->feat.amx_tile && !guest->cpuid->tile.max_palette) || + guest->cpuid->tile.max_palette > host->cpuid->tile.max_palette ) + FAIL_CPUID(0x1d, 0); + + for ( i = 1; i <= guest->cpuid->tile.max_palette; ++i ) + { + const typeof(guest->cpuid->tile.palette[0]) *gt, *ht; + + gt = &guest->cpuid->tile.palette[i]; + ht = &host->cpuid->tile.palette[i]; + + if ( gt->tot_bytes != ht->tot_bytes || + gt->bytes_per_tile != ht->bytes_per_tile || + gt->bytes_per_row != ht->bytes_per_row || + !gt->num_regs || gt->num_regs > ht->num_regs || + !gt->max_rows || gt->max_rows > ht->max_rows ) + FAIL_CPUID(0x1d, i); + } + + if ( ((guest->cpuid->feat.amx_int8 || guest->cpuid->feat.amx_bf16) && + (!guest->cpuid->tmul.maxk || !guest->cpuid->tmul.maxn)) || + guest->cpuid->tmul.maxk > host->cpuid->tmul.maxk || + guest->cpuid->tmul.maxn > host->cpuid->tmul.maxn ) + FAIL_CPUID(0x1e, 0); + if ( guest->cpuid->extd.max_leaf > host->cpuid->extd.max_leaf ) FAIL_CPUID(0x80000000, NA); --- a/xen/lib/x86/private.h +++ b/xen/lib/x86/private.h @@ -17,13 +17,17 @@ #else +#include <assert.h> #include <errno.h> #include <inttypes.h> #include <stdbool.h> #include <stddef.h> #include <string.h> +#define ASSERT assert + #include <xen/asm/msr-index.h> +#include <xen/asm/x86-defns.h> #include <xen/asm/x86-vendors.h> #include <xen-tools/libs.h>
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |