[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging-4.21] x86/AMD: avoid REP MOVSB for Zen3/4



commit f805b61268311017902aff6e956c12c32b099d3e
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Jan 13 15:41:10 2026 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Jan 13 15:41:10 2026 +0100

    x86/AMD: avoid REP MOVSB for Zen3/4
    
    Zen2 (which doesn't expose ERMS) through Zen4 have sub-optimal aliasing
    detection for REP MOVS, and fall back to a unit-at-a-time loop when the
    two pointers have differing bottom 5 bits.  While both forms are
    affected, this makes REP MOVSB 8 times slower than REP MOVSQ.
    
    memcpy() has a high likelihood of encountering this slowpath, so avoid
    using REP MOVSB.  This undoes the ERMS optimisation added in commit
    d6397bd0e11c which turns out to be an anti-optimisation on these
    microarchitectures.
    
    However, retain the use of ERMS-based REP MOVSB in other cases such as
    copy_page_hot() where there parameter alignment is known to avoid the
    slowpath.
    
    Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Jason Andryuk <jason.andryuk@xxxxxxx>
    Acked-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    master commit: cb3ae9445ab07261188f1d4a54ca5741bc962cf2
    master date: 2026-01-07 14:45:27 +0100
---
 xen/arch/x86/cpu/amd.c                 | 4 ++++
 xen/arch/x86/cpu/intel.c               | 3 +++
 xen/arch/x86/include/asm/cpufeatures.h | 3 ++-
 xen/arch/x86/memcpy.S                  | 2 +-
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 35b9dbf149..fb5c6acded 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -1378,6 +1378,10 @@ static void cf_check init_amd(struct cpuinfo_x86 *c)
 
        check_syscfg_dram_mod_en();
 
+       if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS)
+           && c->family != 0x19 /* Zen3/4 */)
+               setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
+
        amd_log_freq(c);
 }
 
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index ecca11f04d..b5b0b31eb9 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -684,6 +684,9 @@ static void cf_check init_intel(struct cpuinfo_x86 *c)
         */
        if (c == &boot_cpu_data && c->vfm == INTEL_SKYLAKE_X)
                setup_clear_cpu_cap(X86_FEATURE_CLWB);
+
+       if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS))
+               setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
 }
 
 const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = {
diff --git a/xen/arch/x86/include/asm/cpufeatures.h 
b/xen/arch/x86/include/asm/cpufeatures.h
index 0a98676c16..65534863c7 100644
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -7,7 +7,7 @@
 #define FSCAPINTS FEATURESET_NR_ENTRIES
 
 /* Synthetic words follow the featureset words. */
-#define X86_NR_SYNTH 1
+#define X86_NR_SYNTH 2
 #define X86_SYNTH(x) (FSCAPINTS * 32 + (x))
 
 /* Synthetic features */
@@ -43,6 +43,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_PV,     X86_SYNTH(28)) /* 
MSR_PRED_CMD used by Xen for
 XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen 
for HVM */
 XEN_CPUFEATURE(USE_VMCALL,        X86_SYNTH(30)) /* Use VMCALL instead of 
VMMCALL */
 XEN_CPUFEATURE(PDX_COMPRESSION,   X86_SYNTH(31)) /* PDX compression */
+XEN_CPUFEATURE(XEN_REP_MOVSB,     X86_SYNTH(32)) /* REP MOVSB used for 
memcpy() */
 
 /* Bug words follow the synthetic words. */
 #define X86_NR_BUG 1
diff --git a/xen/arch/x86/memcpy.S b/xen/arch/x86/memcpy.S
index aaee012126..eefbb8c9ee 100644
--- a/xen/arch/x86/memcpy.S
+++ b/xen/arch/x86/memcpy.S
@@ -10,7 +10,7 @@ FUNC(memcpy)
          * precautions were taken).
          */
         ALTERNATIVE "and $7, %edx; shr $3, %rcx", \
-                    STR(rep movsb; RET), X86_FEATURE_ERMS
+                    STR(rep movsb; RET), X86_FEATURE_XEN_REP_MOVSB
         rep movsq
         or      %edx, %ecx
         jz      1f
--
generated by git-patchbot for /home/xen/git/xen.git#staging-4.21



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.