[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 10/17] x86/xstate: enable AMX components



These being controlled by XCR0, enabling support is relatively
straightforward. Note however that there won't be any use of them until
their dependent ISA extension CPUID flags get exposed, not the least due
to the way recalculate_xstate() handles the dependencies in kind of a
reverse manner.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: New.

--- a/tools/libs/light/libxl_cpuid.c
+++ b/tools/libs/light/libxl_cpuid.c
@@ -219,6 +219,9 @@ int libxl_cpuid_parse_config(libxl_cpuid
         {"md-clear",     0x00000007,  0, CPUID_REG_EDX, 10,  1},
         {"serialize",    0x00000007,  0, CPUID_REG_EDX, 14,  1},
         {"cet-ibt",      0x00000007,  0, CPUID_REG_EDX, 20,  1},
+        {"amx-bf16",     0x00000007,  0, CPUID_REG_EDX, 22,  1},
+        {"amx-tile",     0x00000007,  0, CPUID_REG_EDX, 24,  1},
+        {"amx-int8",     0x00000007,  0, CPUID_REG_EDX, 25,  1},
         {"ibrsb",        0x00000007,  0, CPUID_REG_EDX, 26,  1},
         {"stibp",        0x00000007,  0, CPUID_REG_EDX, 27,  1},
         {"l1d-flush",    0x00000007,  0, CPUID_REG_EDX, 28,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -167,7 +167,8 @@ static const char *const str_7d0[32] =
 
     [18] = "pconfig",
     [20] = "cet-ibt",
-
+    [22] = "amx-bf16",
+    [24] = "amx-tile",      [25] = "amx-int8",
     [26] = "ibrsb",         [27] = "stibp",
     [28] = "l1d-flush",     [29] = "arch-caps",
     [30] = "core-caps",     [31] = "ssbd",
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -195,6 +195,14 @@ static void recalculate_xstate(struct cp
                           xstate_size(X86_XCR0_PKRU_POS));
     }
 
+    if ( p->feat.amx_tile )
+    {
+        xstates |= X86_XCR0_TILECFG | X86_XCR0_TILEDATA;
+        xstate_size = max(xstate_size,
+                          xstate_offset(X86_XCR0_TILEDATA_POS) +
+                          xstate_size(X86_XCR0_TILEDATA_POS));
+    }
+
     p->xstate.max_size  =  xstate_size;
     p->xstate.xcr0_low  =  xstates & ~XSTATE_XSAVES_ONLY;
     p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32;
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -642,6 +642,10 @@ static bool valid_xcr0(uint64_t xcr0)
     if ( !(xcr0 & X86_XCR0_BNDREGS) != !(xcr0 & X86_XCR0_BNDCSR) )
         return false;
 
+    /* TILECFG and TILEDATA must be the same. */
+    if ( !(xcr0 & X86_XCR0_TILECFG) != !(xcr0 & X86_XCR0_TILEDATA) )
+        return false;
+
     return true;
 }
 
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -96,6 +96,10 @@
 #define X86_XCR0_HI_ZMM           (1ULL << X86_XCR0_HI_ZMM_POS)
 #define X86_XCR0_PKRU_POS         9
 #define X86_XCR0_PKRU             (1ULL << X86_XCR0_PKRU_POS)
+#define X86_XCR0_TILECFG_POS      17
+#define X86_XCR0_TILECFG          (1ULL << X86_XCR0_TILECFG_POS)
+#define X86_XCR0_TILEDATA_POS     18
+#define X86_XCR0_TILEDATA         (1ULL << X86_XCR0_TILEDATA_POS)
 #define X86_XCR0_LWP_POS          62
 #define X86_XCR0_LWP              (1ULL << X86_XCR0_LWP_POS)
 
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -265,6 +265,9 @@ XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /
 XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
 XEN_CPUFEATURE(SERIALIZE,     9*32+14) /*a  SERIALIZE insn */
 XEN_CPUFEATURE(CET_IBT,       9*32+20) /*   CET - Indirect Branch Tracking */
+XEN_CPUFEATURE(AMX_BF16,      9*32+22) /*   AMX BFloat16 instructions */
+XEN_CPUFEATURE(AMX_TILE,      9*32+24) /*   AMX tile architecture */
+XEN_CPUFEATURE(AMX_INT8,      9*32+25) /*   AMX 8-bit integer instructions */
 XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by 
Intel) */
 XEN_CPUFEATURE(STIBP,         9*32+27) /*A  STIBP */
 XEN_CPUFEATURE(L1D_FLUSH,     9*32+28) /*S  MSR_FLUSH_CMD and L1D flush. */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -222,7 +222,7 @@ def crunch_numbers(state):
         # instruction groups which are specified to require XSAVE for state
         # management.
         XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
-                AVX, MPX, PKU, LWP],
+                AVX, MPX, PKU, AMX_TILE, LWP],
 
         # AVX is taken to mean hardware support for 256bit registers (which in
         # practice depends on the VEX prefix to encode), and the instructions
@@ -288,6 +288,11 @@ def crunch_numbers(state):
 
         # In principle the TSXLDTRK insns could also be considered independent.
         RTM: [TSXLDTRK],
+
+        # AMX-TILE means hardware support for tile registers and general non-
+        # computational instructions.  All further AMX features are built on 
top
+        # of AMX-TILE.
+        AMX_TILE: [AMX_BF16, AMX_INT8],
     }
 
     deep_features = tuple(sorted(deps.keys()))




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.