[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v3 05/16] x86emul: AVX10.1 testing
Re-use respective AVX512 tests, by suitably adjusting the predicate functions. This leaves test names ("Testing ... NN-bit code sequence") somewhat misleading, but I think we can live with that. Note that the AVX512{BW,DQ} opmask tests cannot be run as-is for the AVX10/256 case, as they include 512-bit vector <-> opmask insn tests. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- SDE: -gnr / -gnr256 --- TBD: For AVX10.1/256 need to somehow guarantee that the generated blobs really don't use 512-bit insns (it's uncertain whether passing -mprefer-vector-width= is enough). Right now according to my testing on SDE this is all fine. May need to probe for support of the new -mno-evex512 compiler option. The AVX512{BW,DQ} opmask tests could of course be cloned (i.e. rebuilt another time with -mavx512vl passed) accordingly, but the coverage gain wouldbe pretty marginal. --- v2: Drop SDE 9.27.0 workaround. Re-base over dropping of Xeon Phi support. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -999,7 +999,11 @@ static void test_group(const struct test for ( j = 0; j < nr_vl; ++j ) { if ( vl[0] == VL_512 && vl[j] != VL_512 && - !cpu_policy.feat.avx512vl ) + !cpu_policy.feat.avx512vl && !cpu_policy.feat.avx10 ) + continue; + + if ( vl[j] == VL_512 && !cpu_policy.feat.avx512f && + !cpu_policy.avx10.vsz512 ) continue; switch ( tests[i].esz ) @@ -1050,6 +1054,27 @@ static void test_group(const struct test } } +/* AVX512 (sub)features implied by AVX10. */ +#define avx10_has_avx512f true +#define avx10_has_avx512bw true +#define avx10_has_avx512cd true +#define avx10_has_avx512dq true +#define avx10_has_avx512_bf16 true +#define avx10_has_avx512_bitalg true +#define avx10_has_avx512_fp16 true +#define avx10_has_avx512_ifma true +#define avx10_has_avx512_vbmi true +#define avx10_has_avx512_vbmi2 true +#define avx10_has_avx512_vnni true +#define avx10_has_avx512_vpopcntdq true + +/* AVX512 sub-features /not/ implied by AVX10. */ +#define avx10_has_avx512er false +#define avx10_has_avx512pf false +#define avx10_has_avx512_4fmaps false +#define avx10_has_avx512_4vnniw false +#define avx10_has_avx512_vp2intersect false + void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { @@ -1057,8 +1082,8 @@ void evex_disp8_test(void *instr, struct emulops.read = read; emulops.write = write; -#define RUN(feat, vl) do { \ - if ( cpu_has_##feat ) \ +#define run(cond, feat, vl) do { \ + if ( cond ) \ { \ printf("%-40s", "Testing " #feat "/" #vl " disp8 handling..."); \ test_group(feat ## _ ## vl, ARRAY_SIZE(feat ## _ ## vl), \ @@ -1067,6 +1092,12 @@ void evex_disp8_test(void *instr, struct } \ } while ( false ) +#define RUN(feat, vl) \ + run(cpu_has_ ## feat || \ + (cpu_has_avx10_1 && cpu_policy.avx10.vsz256 && avx10_has_ ## feat && \ + (ARRAY_SIZE(vl_ ## vl) > 1 || &vl_ ## vl[0] != &vl_512[0])), \ + feat, vl) + RUN(avx512f, all); RUN(avx512f, 128); RUN(avx512f, no128); @@ -1089,10 +1120,15 @@ void evex_disp8_test(void *instr, struct RUN(avx512_fp16, all); RUN(avx512_fp16, 128); - if ( cpu_has_avx512f ) +#undef RUN + + if ( cpu_has_avx512f || cpu_has_avx10_1 ) { +#define RUN(feat, vl) run(cpu_has_ ## feat, feat, vl) RUN(gfni, all); RUN(vaes, all); RUN(vpclmulqdq, all); +#undef RUN } +#undef run } --- a/tools/tests/x86_emulator/testcase.mk +++ b/tools/tests/x86_emulator/testcase.mk @@ -4,7 +4,27 @@ include $(XEN_ROOT)/tools/Rules.mk $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) -CFLAGS += -fno-builtin -g0 $($(TESTCASE)-cflags) +ifneq ($(filter -mavx512%,$($(TESTCASE)-cflags)),) + +cflags-vsz64 := +cflags-vsz32 := -mprefer-vector-width=256 +cflags-vsz16 := -mprefer-vector-width=128 +# Scalar tests don't set VEC_SIZE (and VEC_MAX is used by S/G ones only) +cflags-vsz := -mprefer-vector-width=128 + +ifneq ($(filter -DVEC_SIZE=%,$($(TESTCASE)-cflags)),) +CFLAGS-VSZ := $(cflags-vsz$(patsubst -DVEC_SIZE=%,%,$(filter -DVEC_SIZE=%,$($(TESTCASE)-cflags)))) +else +CFLAGS-VSZ := $(cflags-vsz$(patsubst -DVEC_MAX=%,%,$(filter -DVEC_MAX=%,$($(TESTCASE)-cflags)))) +endif + +else + +CFLAGS-VSZ := + +endif + +CFLAGS += -fno-builtin -g0 $($(TESTCASE)-cflags) $(CFLAGS-VSZ) LDFLAGS_DIRECT += $(shell { $(LD) -v --warn-rwx-segments; } >/dev/null 2>&1 && echo --no-warn-rwx-segments) --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -124,52 +124,61 @@ static bool simd_check_avx_pclmul(void) static bool simd_check_avx512f(void) { - return cpu_has_avx512f; + return cpu_has_avx512f || cpu_has_avx10_1_512; } -#define simd_check_avx512f_opmask simd_check_avx512f #define simd_check_avx512f_sg simd_check_avx512f +static bool simd_check_avx512f_sc(void) +{ + return cpu_has_avx512f || cpu_has_avx10_1; +} +#define simd_check_avx512f_opmask simd_check_avx512f_sc + static bool simd_check_avx512f_vl(void) { - return cpu_has_avx512f && cpu_policy.feat.avx512vl; + return (cpu_has_avx512f && cpu_policy.feat.avx512vl) || + cpu_has_avx10_1_256; } #define simd_check_avx512vl_sg simd_check_avx512f_vl static bool simd_check_avx512dq(void) { - return cpu_has_avx512dq; + return cpu_has_avx512dq || cpu_has_avx10_1_512; } #define simd_check_avx512dq_opmask simd_check_avx512dq static bool simd_check_avx512dq_vl(void) { - return cpu_has_avx512dq && cpu_policy.feat.avx512vl; + return (cpu_has_avx512dq && cpu_policy.feat.avx512vl) || + cpu_has_avx10_1_256; } static bool simd_check_avx512bw(void) { - return cpu_has_avx512bw; + return cpu_has_avx512bw || cpu_has_avx10_1_512; } #define simd_check_avx512bw_opmask simd_check_avx512bw static bool simd_check_avx512bw_vl(void) { - return cpu_has_avx512bw && cpu_policy.feat.avx512vl; + return (cpu_has_avx512bw && cpu_policy.feat.avx512vl) || + cpu_has_avx10_1_256; } static bool simd_check_avx512vbmi(void) { - return cpu_has_avx512_vbmi; + return cpu_has_avx512_vbmi || cpu_has_avx10_1_512; } static bool simd_check_avx512vbmi_vl(void) { - return cpu_has_avx512_vbmi && cpu_policy.feat.avx512vl; + return (cpu_has_avx512_vbmi && cpu_policy.feat.avx512vl) || + cpu_has_avx10_1_256; } static bool simd_check_avx512vbmi2(void) { - return cpu_has_avx512_vbmi2; + return cpu_has_avx512_vbmi2 || cpu_has_avx10_1_512; } static bool simd_check_sse4_sha(void) @@ -250,17 +259,23 @@ static bool simd_check_avx512bw_gf_vl(vo static bool simd_check_avx512vnni(void) { - return cpu_has_avx512_vnni; + return cpu_has_avx512_vnni || cpu_has_avx10_1_512; } static bool simd_check_avx512fp16(void) { - return cpu_has_avx512_fp16; + return cpu_has_avx512_fp16 || cpu_has_avx10_1_512; +} + +static bool simd_check_avx512fp16_sc(void) +{ + return cpu_has_avx512_fp16 || cpu_has_avx10_1; } static bool simd_check_avx512fp16_vl(void) { - return cpu_has_avx512_fp16 && cpu_policy.feat.avx512vl; + return (cpu_has_avx512_fp16 && cpu_policy.feat.avx512vl) || + cpu_has_avx10_1_256; } static void simd_set_regs(struct cpu_user_regs *regs) @@ -433,9 +448,13 @@ static const struct { SIMD(OPMASK+DQ/w, avx512dq_opmask, 2), SIMD(OPMASK+BW/d, avx512bw_opmask, 4), SIMD(OPMASK+BW/q, avx512bw_opmask, 8), - SIMD(AVX512F f32 scalar, avx512f, f4), +#define avx512f_sc_x86_32_D_f4 avx512f_x86_32_D_f4 +#define avx512f_sc_x86_64_D_f4 avx512f_x86_64_D_f4 + SIMD(AVX512F f32 scalar, avx512f_sc, f4), SIMD(AVX512F f32x16, avx512f, 64f4), - SIMD(AVX512F f64 scalar, avx512f, f8), +#define avx512f_sc_x86_32_D_f8 avx512f_x86_32_D_f8 +#define avx512f_sc_x86_64_D_f8 avx512f_x86_64_D_f8 + SIMD(AVX512F f64 scalar, avx512f_sc, f8), SIMD(AVX512F f64x8, avx512f, 64f8), SIMD(AVX512F s32x16, avx512f, 64i4), SIMD(AVX512F u32x16, avx512f, 64u4), @@ -523,7 +542,9 @@ static const struct { AVX512VL(_VBMI+VL u16x8, avx512vbmi, 16u2), AVX512VL(_VBMI+VL s16x16, avx512vbmi, 32i2), AVX512VL(_VBMI+VL u16x16, avx512vbmi, 32u2), - SIMD(AVX512_FP16 f16 scal,avx512fp16, f2), +#define avx512fp16_sc_x86_32_D_f2 avx512fp16_x86_32_D_f2 +#define avx512fp16_sc_x86_64_D_f2 avx512fp16_x86_64_D_f2 + SIMD(AVX512_FP16 f16 scal,avx512fp16_sc, f2), SIMD(AVX512_FP16 f16x32, avx512fp16, 64f2), AVX512VL(_FP16+VL f16x8, avx512fp16, 16f2), AVX512VL(_FP16+VL f16x16,avx512fp16, 32f2), @@ -3205,7 +3226,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovq %xmm1,32(%edx)..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovq_to_mem); @@ -3229,7 +3250,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovq 32(%edx),%xmm0..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovq_from_mem); @@ -3241,11 +3262,22 @@ int main(int argc, char **argv) rc = x86_emulate(&ctxt, &emulops); if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovq_from_mem) ) goto fail; - asm ( "vmovq %1, %%xmm1\n\t" - "vpcmpeqq %%zmm0, %%zmm1, %%k0\n" - "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) ); - if ( rc != 0xff ) - goto fail; + if ( simd_check_avx512f() ) + { + asm ( "vmovq %1, %%xmm1\n\t" + "vpcmpeqq %%zmm0, %%zmm1, %%k0\n" + "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0x00ff ) + goto fail; + } + else + { + asm ( "vmovq %1, %%xmm1\n\t" + "vpcmpeqq %%xmm0, %%xmm1, %%k0\n" + "kmovb %%k0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0x03 ) + goto fail; + } printf("okay\n"); } else @@ -3567,7 +3599,7 @@ int main(int argc, char **argv) printf("%-40s", "Testing vmovsd %xmm5,16(%ecx){%k3}..."); memset(res, 0x88, 128); memset(res + 20, 0x77, 8); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(vmovsd_masked_to_mem); @@ -3785,7 +3817,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovd %xmm3,32(%ecx)..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovd_to_mem); @@ -3810,7 +3842,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovd 32(%ecx),%xmm4..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovd_from_mem); @@ -3823,11 +3855,22 @@ int main(int argc, char **argv) rc = x86_emulate(&ctxt, &emulops); if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovd_from_mem) ) goto fail; - asm ( "vmovd %1, %%xmm0\n\t" - "vpcmpeqd %%zmm4, %%zmm0, %%k0\n\t" - "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) ); - if ( rc != 0xffff ) - goto fail; + if ( simd_check_avx512f() ) + { + asm ( "vmovd %1, %%xmm0\n\t" + "vpcmpeqd %%zmm4, %%zmm0, %%k0\n\t" + "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0xffff ) + goto fail; + } + else + { + asm ( "vmovd %1, %%xmm0\n\t" + "vpcmpeqd %%xmm4, %%xmm0, %%k0\n\t" + "kmovb %%k0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0x0f ) + goto fail; + } printf("okay\n"); } else @@ -4000,7 +4043,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovd %xmm2,%ebx..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovd_to_reg); @@ -4026,7 +4069,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovd %ebx,%xmm1..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovd_from_reg); @@ -4040,11 +4083,22 @@ int main(int argc, char **argv) rc = x86_emulate(&ctxt, &emulops); if ( (rc != X86EMUL_OKAY) || !check_eip(evex_vmovd_from_reg) ) goto fail; - asm ( "vmovd %1, %%xmm0\n\t" - "vpcmpeqd %%zmm1, %%zmm0, %%k0\n\t" - "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) ); - if ( rc != 0xffff ) - goto fail; + if ( simd_check_avx512f() ) + { + asm ( "vmovd %1, %%xmm0\n\t" + "vpcmpeqd %%zmm1, %%zmm0, %%k0\n\t" + "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0xffff ) + goto fail; + } + else + { + asm ( "vmovd %1, %%xmm0\n\t" + "vpcmpeqd %%xmm1, %%xmm0, %%k0\n\t" + "kmovb %%k0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0x0f ) + goto fail; + } printf("okay\n"); } else @@ -4128,7 +4182,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing {evex} vmovq %xmm11,32(%ecx)..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovq_to_mem2); @@ -4218,7 +4272,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing vmovq %xmm22,%rbx..."); - if ( stack_exec && simd_check_avx512f() ) + if ( stack_exec && simd_check_avx512f_sc() ) { decl_insn(evex_vmovq_to_reg); @@ -5509,7 +5563,7 @@ int main(int argc, char **argv) printf("skipped\n"); printf("%-40s", "Testing vmovsh 8(%ecx),%xmm5..."); - if ( stack_exec && simd_check_avx512fp16() ) + if ( stack_exec && simd_check_avx512fp16_sc() ) { decl_insn(vmovsh_from_mem); decl_insn(vmovw_to_gpr); @@ -5527,14 +5581,28 @@ int main(int argc, char **argv) rc = x86_emulate(&ctxt, &emulops); if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_from_mem) ) goto fail; - asm volatile ( "kmovw %2, %%k1\n\t" - "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t" - "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t" - "kmovw %%k0, %0" - : "=g" (rc) - : "m" (res[2]), "r" (1) ); - if ( rc != 0xffff ) - goto fail; + if ( simd_check_avx512fp16() ) + { + asm volatile ( "kmovw %2, %%k1\n\t" + "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t" + "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) + : "m" (res[2]), "r" (1) ); + if ( rc != 0xffff ) + goto fail; + } + else + { + asm volatile ( "kmovb %2, %%k1\n\t" + "vmovdqu16 %1, %%xmm4%{%%k1%}%{z%}\n\t" + "vpcmpeqw %%xmm4, %%xmm5, %%k0\n\t" + "kmovb %%k0, %0" + : "=g" (rc) + : "m" (res[2]), "r" (1) ); + if ( rc != 0xff ) + goto fail; + } printf("okay\n"); printf("%-40s", "Testing vmovsh %xmm4,2(%eax){%k3}..."); --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -244,7 +244,7 @@ int emul_test_get_fpu( break; case X86EMUL_FPU_opmask: case X86EMUL_FPU_zmm: - if ( cpu_has_avx512f ) + if ( cpu_has_avx512f || cpu_has_avx10_1 ) break; default: return X86EMUL_UNHANDLEABLE; --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -207,6 +207,12 @@ void wrpkru(unsigned int val); xcr0_mask(6)) #define cpu_has_avx_vnni_int16 (cpu_policy.feat.avx_vnni_int16 && \ xcr0_mask(6)) + /* TBD: Is bit 6 (ZMM_Hi256) really needed here? */ +#define cpu_has_avx10_1 (cpu_policy.feat.avx10 && xcr0_mask(0xe6)) +#define cpu_has_avx10_1_256 (cpu_has_avx10_1 && \ + (cpu_policy.avx10.vsz256 || \ + cpu_policy.avx10.vsz512)) +#define cpu_has_avx10_1_512 (cpu_has_avx10_1 && cpu_policy.avx10.vsz512) #define cpu_has_xgetbv1 (cpu_has_xsave && cpu_policy.xstate.xgetbv1)
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |