|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XEN PATCH v2 2/3] xen/arm: optimize stage-1,2 combined TLBI in presence of FEAT_nTLBPA
From: Haseeb Ashraf <haseeb.ashraf@xxxxxxxxxxx>
FEAT_nTLBPA (quoting definition) introduces a mechanism to identify
if the intermediate caching of translation table walks does not
include non-coherent caches of previous valid translation table
entries since the last completed TLBI applicable to the PE.
As there won't be any non-coherent caches since the last completed
TLBI, stage-1 TLBI won't be required while performing stage-2 TLBI.
This feature is optionally available in both arm32 and arm64.
Suggested-by: Mohamed Mediouni <mohamed@xxxxxxxxxxxxxxxx>
Signed-off-by: Haseeb Ashraf <haseeb.ashraf@xxxxxxxxxxx>
---
xen/arch/arm/cpufeature.c | 19 ++++++++++
xen/arch/arm/include/asm/arm32/flushtlb.h | 18 +++++-----
xen/arch/arm/include/asm/arm64/flushtlb.h | 42 ++++++++++++++++++++---
xen/arch/arm/include/asm/cpufeature.h | 24 +++++++++++--
xen/arch/arm/include/asm/processor.h | 7 ++++
5 files changed, 95 insertions(+), 15 deletions(-)
diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c
index 1a80738571..9fa1c45869 100644
--- a/xen/arch/arm/cpufeature.c
+++ b/xen/arch/arm/cpufeature.c
@@ -17,7 +17,19 @@ DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS);
struct cpuinfo_arm __read_mostly domain_cpuinfo;
+#ifdef CONFIG_ARM_32
+static bool has_ntlbpa(const struct arm_cpu_capabilities *entry)
+{
+ return system_cpuinfo.mm32.ntlbpa == MM32_NTLBPA_SUPPORT_IMP;
+}
+#endif
+
#ifdef CONFIG_ARM_64
+static bool has_ntlbpa(const struct arm_cpu_capabilities *entry)
+{
+ return system_cpuinfo.mm64.ntlbpa == MM64_NTLBPA_SUPPORT_IMP;
+}
+
static bool has_sb_instruction(const struct arm_cpu_capabilities *entry)
{
return system_cpuinfo.isa64.sb;
@@ -25,6 +37,13 @@ static bool has_sb_instruction(const struct
arm_cpu_capabilities *entry)
#endif
static const struct arm_cpu_capabilities arm_features[] = {
+#if defined(CONFIG_ARM_32) || defined(CONFIG_ARM_64)
+ {
+ .desc = "Intermediate caching of translation table walks (nTLBPA)",
+ .capability = ARM_HAS_NTLBPA,
+ .matches = has_ntlbpa,
+ },
+#endif
#ifdef CONFIG_ARM_64
{
.desc = "Speculation barrier instruction (SB)",
diff --git a/xen/arch/arm/include/asm/arm32/flushtlb.h
b/xen/arch/arm/include/asm/arm32/flushtlb.h
index 70a8b1dad6..db15f29216 100644
--- a/xen/arch/arm/include/asm/arm32/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm32/flushtlb.h
@@ -58,11 +58,12 @@ static inline void flush_guest_tlb_s1_local(void)
* See ARMv8 (DDI 0487A.e): G4-4126 Table G4-24.
*
* The following macros should be used where intention is to
- * clear only stage-1 TLBs. This would be helpful in future in
- * identifying which stage-1 TLB flushes can be skipped such as
- * in present of FEAT_nTLBPA.
+ * clear only stage-1 TLBs. This would be helpful in identifying
+ * which stage-1 TLB flushes can be skipped in present of
+ * FEAT_nTLBPA.
*/
- return flush_guest_tlb_local();
+ if ( !cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ flush_guest_tlb_local();
}
/*
@@ -78,11 +79,12 @@ static inline void flush_guest_tlb_s1(void)
* See ARMv8 (DDI 0487A.e): G4-4126 Table G4-24.
*
* The following macros should be used where intention is to
- * clear only stage-1 TLBs. This would be helpful in future in
- * identifying which stage-1 TLB flushes can be skipped such as
- * in present of FEAT_nTLBPA.
+ * clear only stage-1 TLBs. This would be helpful in identifying
+ * which stage-1 TLB flushes can be skipped in present of
+ * FEAT_nTLBPA.
*/
- return flush_guest_tlb();
+ if ( !cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ flush_guest_tlb();
}
/* Flush TLB of local processor for address va. */
diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h
b/xen/arch/arm/include/asm/arm64/flushtlb.h
index fff76375a3..a2f1af833e 100644
--- a/xen/arch/arm/include/asm/arm64/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm64/flushtlb.h
@@ -47,6 +47,24 @@ static inline void name(void) \
: : : "memory"); \
}
+#define TLB_HELPER_NTLBPA(name, tlbop, sh) \
+static inline void name(void) \
+{ \
+ if ( !cpus_have_const_cap(ARM_HAS_NTLBPA) ) \
+ asm_inline volatile ( \
+ "dsb " # sh "st;" \
+ "tlbi " # tlbop ";" \
+ ALTERNATIVE( \
+ "nop; nop;", \
+ "dsb ish;" \
+ "tlbi " # tlbop ";", \
+ ARM64_WORKAROUND_REPEAT_TLBI, \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
+ "dsb " # sh ";" \
+ "isb;" \
+ : : : "memory"); \
+}
+
/*
* FLush TLB by IPA. This will likely be used in a loop, so the caller
* is responsible to use the appropriate memory barriers before/after
@@ -96,10 +114,10 @@ TLB_HELPER(flush_guest_tlb_local, vmalls12e1, nsh)
TLB_HELPER(flush_guest_tlb, vmalls12e1is, ish)
/* Flush local TLBs, current VMID, stage-1 only */
-TLB_HELPER(flush_guest_tlb_s1_local, vmalle1, nsh)
+TLB_HELPER_NTLBPA(flush_guest_tlb_s1_local, vmalle1, nsh)
/* Flush innershareable TLBs, current VMID, stage-1 only */
-TLB_HELPER(flush_guest_tlb_s1, vmalle1is, ish)
+TLB_HELPER_NTLBPA(flush_guest_tlb_s1, vmalle1is, ish)
/* Flush local TLBs, current VMID, stage-2 for ipa address */
TLB_HELPER_IPA(__flush_guest_tlb_one_s2_local, ipas2e1)
@@ -153,7 +171,15 @@ static inline void flush_guest_tlb_range_ipa_local(paddr_t
ipa,
__flush_guest_tlb_one_s2_local(ipa);
ipa += PAGE_SIZE;
}
- /* Final dsb() and isb() are done in following invocation */
+
+ if ( cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ {
+ dsb(nsh);
+ isb();
+ return;
+ }
+
+ /* Otherwise final dsb() and isb() are done in following invocation */
flush_guest_tlb_s1_local();
}
@@ -185,7 +211,15 @@ static inline void flush_guest_tlb_range_ipa(paddr_t ipa,
__flush_guest_tlb_one_s2(ipa);
ipa += PAGE_SIZE;
}
- /* Final dsb() and isb() are done in following invocation */
+
+ if ( cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ {
+ dsb(ish);
+ isb();
+ return;
+ }
+
+ /* Otherwise final dsb() and isb() are done in following invocation */
flush_guest_tlb_s1();
}
diff --git a/xen/arch/arm/include/asm/cpufeature.h
b/xen/arch/arm/include/asm/cpufeature.h
index b6df188011..af4e9bdf06 100644
--- a/xen/arch/arm/include/asm/cpufeature.h
+++ b/xen/arch/arm/include/asm/cpufeature.h
@@ -76,8 +76,9 @@
#define ARM_WORKAROUND_BHB_SMCC_3 15
#define ARM_HAS_SB 16
#define ARM64_WORKAROUND_1508412 17
+#define ARM_HAS_NTLBPA 18
-#define ARM_NCAPS 18
+#define ARM_NCAPS 19
#ifndef __ASSEMBLY__
@@ -269,7 +270,8 @@ struct cpuinfo_arm {
unsigned long ets:4;
unsigned long __res1:4;
unsigned long afp:4;
- unsigned long __res2:12;
+ unsigned long ntlbpa:4;
+ unsigned long __res2:8;
unsigned long ecbhb:4;
/* MMFR2 */
@@ -430,8 +432,24 @@ struct cpuinfo_arm {
register_t bits[1];
} aux32;
- struct {
+ union {
register_t bits[6];
+ struct {
+ /* MMFR0 */
+ unsigned long __res0:32;
+ /* MMFR1 */
+ unsigned long __res1:32;
+ /* MMFR2 */
+ unsigned long __res2:32;
+ /* MMFR3 */
+ unsigned long __res3:32;
+ /* MMFR4 */
+ unsigned long __res4:32;
+ /* MMFR5 */
+ unsigned long __res5_0:4;
+ unsigned long ntlbpa:4;
+ unsigned long __res5_1:24;
+ };
} mm32;
struct {
diff --git a/xen/arch/arm/include/asm/processor.h
b/xen/arch/arm/include/asm/processor.h
index 92c8bc1a31..01f3cb16a0 100644
--- a/xen/arch/arm/include/asm/processor.h
+++ b/xen/arch/arm/include/asm/processor.h
@@ -458,9 +458,16 @@
/* FSR long format */
#define FSRL_STATUS_DEBUG (_AC(0x22,UL)<<0)
+#ifdef CONFIG_ARM_32
+#define MM32_NTLBPA_SUPPORT_NI 0x0
+#define MM32_NTLBPA_SUPPORT_IMP 0x1
+#endif
+
#ifdef CONFIG_ARM_64
#define MM64_VMID_8_BITS_SUPPORT 0x0
#define MM64_VMID_16_BITS_SUPPORT 0x2
+#define MM64_NTLBPA_SUPPORT_NI 0x0
+#define MM64_NTLBPA_SUPPORT_IMP 0x1
#endif
#ifndef __ASSEMBLY__
--
2.43.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |