|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active
Instead of flushing the TLB from global pages when switching address
spaces with XPTI being active just disable global pages via %cr4
completely when a domain subject to XPTI is active. This avoids the
need for extra TLB flushes as loading %cr3 will remove all TLB
entries.
Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
xen/arch/x86/cpu/mtrr/generic.c | 32 +++++++++++++++++++++-----------
xen/arch/x86/flushtlb.c | 39 +++++++++++++++++++++++++--------------
xen/arch/x86/x86_64/entry.S | 10 ----------
xen/include/asm-x86/domain.h | 3 ++-
4 files changed, 48 insertions(+), 36 deletions(-)
diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c
index e9c0e5e059..d705138100 100644
--- a/xen/arch/x86/cpu/mtrr/generic.c
+++ b/xen/arch/x86/cpu/mtrr/generic.c
@@ -400,8 +400,10 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
* has been called.
*/
-static void prepare_set(void)
+static bool prepare_set(void)
{
+ unsigned long cr4;
+
/* Note that this is not ideal, since the cache is only
flushed/disabled
for this CPU while the MTRRs are changed, but changing this requires
more invasive changes to the way the kernel boots */
@@ -412,18 +414,22 @@ static void prepare_set(void)
write_cr0(read_cr0() | X86_CR0_CD);
wbinvd();
- /* TLB flushing here relies on Xen always using CR4.PGE. */
- BUILD_BUG_ON(!(XEN_MINIMAL_CR4 & X86_CR4_PGE));
- write_cr4(read_cr4() & ~X86_CR4_PGE);
+ cr4 = read_cr4();
+ if (cr4 & X86_CR4_PGE)
+ write_cr4(cr4 & ~X86_CR4_PGE);
+ else
+ asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
/* Save MTRR state */
rdmsrl(MSR_MTRRdefType, deftype);
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff);
+
+ return !!(cr4 & X86_CR4_PGE);
}
-static void post_set(void)
+static void post_set(bool pge)
{
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype);
@@ -432,7 +438,10 @@ static void post_set(void)
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Reenable CR4.PGE (also flushes the TLB) */
- write_cr4(read_cr4() | X86_CR4_PGE);
+ if (pge)
+ write_cr4(read_cr4() | X86_CR4_PGE);
+ else
+ asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
spin_unlock(&set_atomicity_lock);
}
@@ -441,14 +450,15 @@ static void generic_set_all(void)
{
unsigned long mask, count;
unsigned long flags;
+ bool pge;
local_irq_save(flags);
- prepare_set();
+ pge = prepare_set();
/* Actually set the state */
mask = set_mtrr_state();
- post_set();
+ post_set(pge);
local_irq_restore(flags);
/* Use the atomic bitops to update the global mask */
@@ -457,7 +467,6 @@ static void generic_set_all(void)
set_bit(count, &smp_changes_mask);
mask >>= 1;
}
-
}
static void generic_set_mtrr(unsigned int reg, unsigned long base,
@@ -474,11 +483,12 @@ static void generic_set_mtrr(unsigned int reg, unsigned
long base,
{
unsigned long flags;
struct mtrr_var_range *vr;
+ bool pge;
vr = &mtrr_state.var_ranges[reg];
local_irq_save(flags);
- prepare_set();
+ pge = prepare_set();
if (size == 0) {
/* The invalid bit is kept in the mask, so we simply clear the
@@ -499,7 +509,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned
long base,
mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask);
}
- post_set();
+ post_set(pge);
local_irq_restore(flags);
}
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index e4ea4f3297..186d9099f6 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -72,20 +72,39 @@ static void post_flush(u32 t)
this_cpu(tlbflush_time) = t;
}
+static void do_flush_tlb(unsigned long cr3)
+{
+ unsigned long cr4;
+
+ cr4 = read_cr4();
+ if ( cr4 & X86_CR4_PGE )
+ {
+ write_cr4(cr4 & ~X86_CR4_PGE);
+ if ( cr3 )
+ asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+ else
+ barrier();
+ write_cr4(cr4);
+ }
+ else
+ {
+ if ( !cr3 )
+ cr3 = read_cr3();
+ asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+ }
+}
+
void write_cr3(unsigned long cr3)
{
- unsigned long flags, cr4;
+ unsigned long flags;
u32 t;
/* This non-reentrant function is sometimes called in interrupt context. */
local_irq_save(flags);
t = pre_flush();
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
- asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
- write_cr4(cr4);
+ do_flush_tlb(cr3);
post_flush(t);
@@ -123,22 +142,14 @@ unsigned int flush_area_local(const void *va, unsigned
int flags)
u32 t = pre_flush();
if ( !cpu_has_invpcid )
- {
- unsigned long cr4 = read_cr4();
-
- write_cr4(cr4 & ~X86_CR4_PGE);
- barrier();
- write_cr4(cr4);
- }
+ do_flush_tlb(0);
else
- {
/*
* Using invpcid to flush all mappings works
* regardless of whether PCID is enabled or not.
* It is faster than read-modify-write CR4.
*/
invpcid_flush_all();
- }
post_flush(t);
}
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index cdcdc2c40a..a8d38e7eb2 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -73,13 +73,8 @@ restore_all_guest:
ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
rep movsq
.Lrag_copy_done:
- mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
- mov %rdi, %rsi
- and $~X86_CR4_PGE, %rdi
- mov %rdi, %cr4
mov %rax, %cr3
- mov %rsi, %cr4
.Lrag_cr3_end:
ALTERNATIVE_NOP .Lrag_cr3_start, .Lrag_cr3_end, X86_FEATURE_NO_XPTI
@@ -136,12 +131,7 @@ restore_all_xen:
* so "g" will have to do.
*/
UNLIKELY_START(g, exit_cr3)
- mov %cr4, %rdi
- mov %rdi, %rsi
- and $~X86_CR4_PGE, %rdi
- mov %rdi, %cr4
mov %rax, %cr3
- mov %rsi, %cr4
UNLIKELY_END(exit_cr3)
/* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 0cc37dea05..316418a6fe 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -622,7 +622,8 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *,
unsigned long guest_cr4);
X86_CR4_SMAP | X86_CR4_OSXSAVE | \
X86_CR4_FSGSBASE)) \
| ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
- & ~X86_CR4_DE)
+ & ~(X86_CR4_DE | \
+ ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0)))
#define real_cr4_to_pv_guest_cr4(c) \
((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
X86_CR4_OSXSAVE | X86_CR4_SMEP | \
--
2.13.6
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |