[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH for-4.21 v2] x86/AMD: avoid REP MOVSB for Zen3/4
Along with Zen2 (which doesn't expose ERMS), both families reportedly suffer from sub-optimal aliasing detection when deciding whether REP MOVSB can actually be carried out the accelerated way. Therefore we want to avoid its use in the common case of memcpy(); copy_page_hot() is fine, as its two pointers are always going to be having the same low 5 bits. Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Jason Andryuk <jason.andryuk@xxxxxxx> Acked-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> Release-Acked-by: Oleksii Kurochko<oleksii.kurochko@xxxxxxxxx> --- Trying to amend the comment in cpufeatures.h (e.g. "..., i.e. ERMS minus the Zen3/4 pointer aliasing issue") makes it get longish, so I kept it at the shortened form. --- v2: Leave page copying alone. --- a/xen/arch/x86/cpu/amd.c +++ b/xen/arch/x86/cpu/amd.c @@ -1386,6 +1386,10 @@ static void cf_check init_amd(struct cpu check_syscfg_dram_mod_en(); + if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS) + && c->family != 0x19 /* Zen3/4 */) + setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB); + amd_log_freq(c); } --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -684,6 +684,9 @@ static void cf_check init_intel(struct c */ if (c == &boot_cpu_data && c->vfm == INTEL_SKYLAKE_X) setup_clear_cpu_cap(X86_FEATURE_CLWB); + + if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS)) + setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB); } const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = { --- a/xen/arch/x86/include/asm/cpufeatures.h +++ b/xen/arch/x86/include/asm/cpufeatures.h @@ -7,7 +7,7 @@ #define FSCAPINTS FEATURESET_NR_ENTRIES /* Synthetic words follow the featureset words. */ -#define X86_NR_SYNTH 1 +#define X86_NR_SYNTH 2 #define X86_SYNTH(x) (FSCAPINTS * 32 + (x)) /* Synthetic features */ @@ -43,6 +43,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_PV, X86_SY XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */ XEN_CPUFEATURE(USE_VMCALL, X86_SYNTH(30)) /* Use VMCALL instead of VMMCALL */ XEN_CPUFEATURE(PDX_COMPRESSION, X86_SYNTH(31)) /* PDX compression */ +XEN_CPUFEATURE(XEN_REP_MOVSB, X86_SYNTH(32)) /* REP MOVSB used for memcpy() */ /* Bug words follow the synthetic words. */ #define X86_NR_BUG 1 --- a/xen/arch/x86/memcpy.S +++ b/xen/arch/x86/memcpy.S @@ -10,7 +10,7 @@ FUNC(memcpy) * precautions were taken). */ ALTERNATIVE "and $7, %edx; shr $3, %rcx", \ - STR(rep movsb; RET), X86_FEATURE_ERMS + STR(rep movsb; RET), X86_FEATURE_XEN_REP_MOVSB rep movsq or %edx, %ecx jz 1f
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |