[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] pdx: allow per-arch optimization of PDX conversion helpers



commit bd0b3a876d805b95c5cfcf292d7651334c8dba80
Author:     Roger Pau Monne <roger.pau@xxxxxxxxxx>
AuthorDate: Fri Jun 20 09:50:51 2025 +0200
Commit:     Roger Pau Monne <roger.pau@xxxxxxxxxx>
CommitDate: Mon Aug 11 18:45:52 2025 +0200

    pdx: allow per-arch optimization of PDX conversion helpers
    
    There are four performance critical PDX conversion helpers that do the PFN
    to/from PDX and the physical addresses to/from directmap offsets
    translations.
    
    In the absence of an active PDX compression, those functions would still do
    the calculations needed, just to return the same input value as no
    translation is in place and hence PFN and PDX spaces are identity mapped.
    
    To reduce the overhead of having to do the pointless calculations allow
    architectures to implement the translation helpers in a per-arch header.
    Rename the existing conversion functions to add a trailing _xlate suffix,
    so that the per-arch headers can define the non suffixed versions.
    
    Currently only x86 implements meaningful custom handlers to short circuit
    the translation when not active, using asm goto.  Other architectures use
    generic macros that map the non-xlate to the xlate variants to keep the
    previous behavior.
    
    Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/include/asm/cpufeatures.h |  1 +
 xen/arch/x86/include/asm/pdx.h         | 71 ++++++++++++++++++++++++++++++++++
 xen/arch/x86/srat.c                    |  6 ++-
 xen/common/pdx.c                       | 10 +++--
 xen/include/xen/pdx.h                  | 29 +++++++++++---
 5 files changed, 106 insertions(+), 11 deletions(-)

diff --git a/xen/arch/x86/include/asm/cpufeatures.h 
b/xen/arch/x86/include/asm/cpufeatures.h
index bc108c3819..71308d9daf 100644
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -42,6 +42,7 @@ XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses 
CET Indirect Branch
 XEN_CPUFEATURE(IBPB_ENTRY_PV,     X86_SYNTH(28)) /* MSR_PRED_CMD used by Xen 
for PV */
 XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen 
for HVM */
 XEN_CPUFEATURE(USE_VMCALL,        X86_SYNTH(30)) /* Use VMCALL instead of 
VMMCALL */
+XEN_CPUFEATURE(PDX_COMPRESSION,   X86_SYNTH(31)) /* PDX compression */
 
 /* Bug words follow the synthetic words. */
 #define X86_NR_BUG 1
diff --git a/xen/arch/x86/include/asm/pdx.h b/xen/arch/x86/include/asm/pdx.h
new file mode 100644
index 0000000000..6be7e1185e
--- /dev/null
+++ b/xen/arch/x86/include/asm/pdx.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef X86_PDX_H
+#define X86_PDX_H
+
+#include <asm/alternative.h>
+
+/*
+ * Introduce a macro to avoid repeating the same asm goto block in each helper.
+ * Note the macro is strictly tied to the code in the helpers.
+ */
+#define PDX_ASM_GOTO(label)                         \
+    asm_inline goto (                               \
+        ALTERNATIVE(                                \
+            "",                                     \
+            "jmp %l0",                              \
+            ALT_NOT(X86_FEATURE_PDX_COMPRESSION))   \
+        : : : : label )
+
+static inline unsigned long pfn_to_pdx(unsigned long pfn)
+{
+    PDX_ASM_GOTO(skip);
+
+    return pfn_to_pdx_xlate(pfn);
+
+ skip:
+    return pfn;
+}
+
+static inline unsigned long pdx_to_pfn(unsigned long pdx)
+{
+    PDX_ASM_GOTO(skip);
+
+    return pdx_to_pfn_xlate(pdx);
+
+ skip:
+    return pdx;
+}
+
+static inline unsigned long maddr_to_directmapoff(paddr_t ma)
+{
+    PDX_ASM_GOTO(skip);
+
+    return maddr_to_directmapoff_xlate(ma);
+
+ skip:
+    return ma;
+}
+
+static inline paddr_t directmapoff_to_maddr(unsigned long offset)
+{
+    PDX_ASM_GOTO(skip);
+
+    return directmapoff_to_maddr_xlate(offset);
+
+ skip:
+    return offset;
+}
+
+#undef PDX_ASM_GOTO_SKIP
+
+#endif /* X86_PDX_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
index 747607439f..42ccb0c0f3 100644
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -298,7 +298,8 @@ void __init srat_parse_regions(paddr_t addr)
        acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
                              srat_parse_region, 0);
 
-       pfn_pdx_compression_setup(addr);
+       if (!pfn_pdx_compression_setup(addr))
+               return;
 
        /* Ensure all RAM ranges in the e820 are covered. */
        for (i = 0; i < e820.nr_map; i++) {
@@ -318,6 +319,9 @@ void __init srat_parse_regions(paddr_t addr)
                        return;
                }
        }
+
+       /* If we got this far compression is working as expected. */
+       setup_force_cpu_cap(X86_FEATURE_PDX_COMPRESSION);
 }
 
 unsigned int numa_node_to_arch_nid(nodeid_t n)
diff --git a/xen/common/pdx.c b/xen/common/pdx.c
index f4a3dcf6cb..c9ec867291 100644
--- a/xen/common/pdx.c
+++ b/xen/common/pdx.c
@@ -215,7 +215,7 @@ static uint64_t __init pdx_init_mask(uint64_t base_addr)
                          (uint64_t)1 << (MAX_ORDER + PAGE_SHIFT)) - 1);
 }
 
-void __init pfn_pdx_compression_setup(paddr_t base)
+bool __init pfn_pdx_compression_setup(paddr_t base)
 {
     unsigned int i, j, bottom_shift = 0, hole_shift = 0;
     unsigned long mask = pdx_init_mask(base) >> PAGE_SHIFT;
@@ -224,7 +224,7 @@ void __init pfn_pdx_compression_setup(paddr_t base)
     {
         printk(XENLOG_DEBUG "PFN compression disabled%s\n",
                pdx_compress ? ": no ranges provided" : "");
-        return;
+        return false;
     }
 
     if ( nr_ranges > ARRAY_SIZE(ranges) )
@@ -232,7 +232,7 @@ void __init pfn_pdx_compression_setup(paddr_t base)
         printk(XENLOG_WARNING
                "Too many PFN ranges (%u > %zu), not attempting PFN 
compression\n",
                nr_ranges, ARRAY_SIZE(ranges));
-        return;
+        return false;
     }
 
     for ( i = 0; i < nr_ranges; i++ )
@@ -263,7 +263,7 @@ void __init pfn_pdx_compression_setup(paddr_t base)
         }
     }
     if ( !hole_shift )
-        return;
+        return false;
 
     printk(KERN_INFO "PFN compression on bits %u...%u\n",
            bottom_shift, bottom_shift + hole_shift - 1);
@@ -274,6 +274,8 @@ void __init pfn_pdx_compression_setup(paddr_t base)
     pfn_hole_mask       = ((1UL << hole_shift) - 1) << bottom_shift;
     pfn_top_mask        = ~(pfn_pdx_bottom_mask | pfn_hole_mask);
     ma_top_mask         = pfn_top_mask << PAGE_SHIFT;
+
+    return true;
 }
 
 void __init pfn_pdx_compression_reset(void)
diff --git a/xen/include/xen/pdx.h b/xen/include/xen/pdx.h
index 10153da98b..425d45e9f0 100644
--- a/xen/include/xen/pdx.h
+++ b/xen/include/xen/pdx.h
@@ -114,7 +114,7 @@ extern unsigned long pfn_top_mask, ma_top_mask;
  * @param pfn Frame number
  * @return Obtained pdx after compressing the pfn
  */
-static inline unsigned long pfn_to_pdx(unsigned long pfn)
+static inline unsigned long pfn_to_pdx_xlate(unsigned long pfn)
 {
     return (pfn & pfn_pdx_bottom_mask) |
            ((pfn & pfn_top_mask) >> pfn_pdx_hole_shift);
@@ -126,7 +126,7 @@ static inline unsigned long pfn_to_pdx(unsigned long pfn)
  * @param pdx Page index
  * @return Obtained pfn after decompressing the pdx
  */
-static inline unsigned long pdx_to_pfn(unsigned long pdx)
+static inline unsigned long pdx_to_pfn_xlate(unsigned long pdx)
 {
     return (pdx & pfn_pdx_bottom_mask) |
            ((pdx << pfn_pdx_hole_shift) & pfn_top_mask);
@@ -139,7 +139,7 @@ static inline unsigned long pdx_to_pfn(unsigned long pdx)
  * @return Offset on the direct map where that
  *         machine address can be accessed
  */
-static inline unsigned long maddr_to_directmapoff(paddr_t ma)
+static inline unsigned long maddr_to_directmapoff_xlate(paddr_t ma)
 {
     return (((ma & ma_top_mask) >> pfn_pdx_hole_shift) |
             (ma & ma_va_bottom_mask));
@@ -151,7 +151,7 @@ static inline unsigned long maddr_to_directmapoff(paddr_t 
ma)
  * @param offset Offset into the direct map
  * @return Corresponding machine address of that virtual location
  */
-static inline paddr_t directmapoff_to_maddr(unsigned long offset)
+static inline paddr_t directmapoff_to_maddr_xlate(unsigned long offset)
 {
     return ((((paddr_t)offset << pfn_pdx_hole_shift) & ma_top_mask) |
             (offset & ma_va_bottom_mask));
@@ -181,8 +181,9 @@ static inline void pfn_pdx_add_region(paddr_t base, paddr_t 
size)
 {
 }
 
-static inline void pfn_pdx_compression_setup(paddr_t base)
+static inline bool pfn_pdx_compression_setup(paddr_t base)
 {
+    return false;
 }
 
 static inline void pfn_pdx_compression_reset(void)
@@ -191,6 +192,21 @@ static inline void pfn_pdx_compression_reset(void)
 
 #else /* !CONFIG_PDX_NONE */
 
+/*
+ * Allow each architecture to define its (possibly optimized) versions of the
+ * translation functions.
+ *
+ * Do not use _xlate suffixed functions, always use the non _xlate variants.
+ */
+#if __has_include(<asm/pdx.h>)
+# include <asm/pdx.h>
+#else
+# define pdx_to_pfn pdx_to_pfn_xlate
+# define pfn_to_pdx pfn_to_pdx_xlate
+# define maddr_to_directmapoff maddr_to_directmapoff_xlate
+# define directmapoff_to_maddr directmapoff_to_maddr_xlate
+#endif
+
 /* Shared functions implemented by all PDX compressions. */
 
 /**
@@ -215,8 +231,9 @@ void pfn_pdx_add_region(paddr_t base, paddr_t size);
  * range of the current memory regions.
  *
  * @param base address to start compression from.
+ * @return True if PDX compression has been enabled.
  */
-void pfn_pdx_compression_setup(paddr_t base);
+bool pfn_pdx_compression_setup(paddr_t base);
 
 /**
  * Reset the global variables to it's default values, thus disabling PFN
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.