|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 8/8] xen/x86: Additional SMAP modes to work around buggy 32bit PV guests
Experimentally, older Linux guests perform construction of `init` with user
pagetable mappings. This is fine for native systems as such a guest would not
set CR4.SMAP itself.
However if Xen uses SMAP itself, 32bit PV guests (whose kernels run in ring1)
are also affected. Older Linux guests end up spinning in a loop assuming that
the SMAP violation pagefaults are spurious, and make no further progress.
One option is to disable SMAP completely, but this is unreasonable. A better
alternative is to disable SMAP only in the context of 32bit PV guests, but
reduces the effectiveness SMAP security. A 3rd option is for Xen to fix up
behind a 32bit guest if it were SMAP-aware. It is a heuristic, and does
result in a guest-visible state change, but allows Xen to keep CR4.SMAP
unconditionally enabled.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>
---
docs/misc/xen-command-line.markdown | 25 ++++++++++++++++++--
xen/arch/x86/domain.c | 16 +++++++++++--
xen/arch/x86/setup.c | 31 +++++++++++++++++++++----
xen/arch/x86/traps.c | 43 ++++++++++++++++++++++++-----------
xen/include/asm-x86/processor.h | 10 ++++++++
5 files changed, 103 insertions(+), 22 deletions(-)
diff --git a/docs/misc/xen-command-line.markdown
b/docs/misc/xen-command-line.markdown
index aa684c0..a63d2b2 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1261,11 +1261,32 @@ Set the serial transmit buffer size.
Flag to enable Supervisor Mode Execution Protection
### smap
-> `= <boolean>`
+> `= <boolean> | compat | fixup`
> Default: `true`
-Flag to enable Supervisor Mode Access Prevention
+Handling of Supervisor Mode Access Prevention.
+
+32bit PV guest kernels qualify as supervisor code, as they execute in ring 1.
+If Xen uses SMAP protection itself, a PV guest which is not SMAP aware may
+suffer unexpected pagefaults which it cannot handle. (Experimentally, there
+are 32bit PV guests which fall foul of SMAP enforcement and spin in an
+infinite loop taking pagefaults early on boot.)
+
+Two further SMAP modes are introduced to work around buggy 32bit PV guests to
+prevent functional regressions of VMs on newer hardware. At any point if the
+guest sets `CR4.SMAP` itself, it is deemed aware, and **compat/fixup** cease
+to apply.
+
+A SMAP mode of **compat** causes Xen to disable `CR4.SMAP` in the context of
+an unaware 32bit PV guest. This prevents the guest from being subject to SMAP
+enforcement, but also prevents Xen from benefiting from the added security
+checks.
+
+A SMAP mode of **fixup** causes Xen to set `EFLAGS.AC` when discovering a SMAP
+pagefault in the context of an unaware 32bit PV guest. This allows Xen to
+retain the added security from SMAP checks, but results in a guest-visible
+state change which it might object to.
### snb\_igd\_quirk
> `= <boolean> | cap | <integer>`
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 18c3c62..045a2b9 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -687,9 +687,10 @@ void arch_domain_unpause(struct domain *d)
* - TSD for vtsc
* - DE for %%dr4/5 emulation
* - OSXSAVE for xsetbv emulation
+ * - SMAP for smap_mode_{compat,fixup} handling
*/
#define PV_CR4_SHADOW \
- (X86_CR4_TSD | X86_CR4_DE | X86_CR4_OSXSAVE)
+ (X86_CR4_TSD | X86_CR4_DE | X86_CR4_OSXSAVE | X86_CR4_SMAP)
/* CR4 bits a guest controls. */
#define PV_CR4_GUEST (X86_CR4_TSD)
@@ -700,7 +701,7 @@ void arch_domain_unpause(struct domain *d)
*/
#define PV_CR4_READ \
(X86_CR4_PAE | X86_CR4_PGE |X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \
- X86_CR4_FSGSBASE | X86_CR4_SMEP | X86_CR4_SMAP)
+ X86_CR4_FSGSBASE | X86_CR4_SMEP)
/*
* These are the masks of CR4 bits (subject to hardware availability) which a
@@ -730,6 +731,12 @@ static int __init init_pv_cr4_masks(void)
if ( cpu_has_fsgsbase )
pv_cr4_mask &= ~X86_CR4_FSGSBASE;
+ /*
+ * 32bit PV guests may attempt to modify SMAP.
+ */
+ if ( cpu_has_smap )
+ compat_pv_cr4_mask &= ~X86_CR4_SMAP;
+
BUILD_BUG_ON(PV_CR4_SHADOW & PV_CR4_READ);
return 0;
@@ -784,6 +791,11 @@ unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu
*v)
if ( v->domain->arch.vtsc )
cr4 |= X86_CR4_TSD;
+ /* Disable SMAP behind unaware 32bit PV guests. */
+ if ( (smap_mode == smap_mode_compat) && is_pv_32bit_vcpu(v) &&
+ ((v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_SMAP) == 0) )
+ cr4 &= ~X86_CR4_SMAP;
+
return cr4;
}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index ff34670..36fce42 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -63,10 +63,6 @@
static bool_t __initdata disable_smep;
invbool_param("smep", disable_smep);
-/* smap: Enable/disable Supervisor Mode Access Prevention (default on). */
-static bool_t __initdata disable_smap;
-invbool_param("smap", disable_smap);
-
/* Boot dom0 in pvh mode */
static bool_t __initdata opt_dom0pvh;
boolean_param("dom0pvh", opt_dom0pvh);
@@ -138,6 +134,29 @@ static void __init parse_acpi_param(char *s)
}
}
+enum xen_smap_mode smap_mode __read_mostly = smap_mode_enable;
+static void __init parse_smap(char *s)
+{
+ switch ( parse_bool(s) )
+ {
+ case 1:
+ smap_mode = smap_mode_enable;
+ break;
+
+ case 0:
+ smap_mode = smap_mode_disable;
+ break;
+
+ default:
+ if ( !strcmp(s, "compat") )
+ smap_mode = smap_mode_compat;
+ else if ( !strcmp(s, "fixup") )
+ smap_mode = smap_mode_fixup;
+ break;
+ }
+}
+custom_param("smap", parse_smap);
+
static const module_t *__initdata initial_images;
static unsigned int __initdata nr_initial_images;
@@ -1296,10 +1315,12 @@ void __init noreturn __start_xen(unsigned long mbi_p)
if ( cpu_has_smep )
set_in_cr4(X86_CR4_SMEP);
- if ( disable_smap )
+ if ( smap_mode == smap_mode_disable )
setup_clear_cpu_cap(X86_FEATURE_SMAP);
if ( cpu_has_smap )
set_in_cr4(X86_CR4_SMAP);
+ else
+ smap_mode = smap_mode_disable;
if ( cpu_has_fsgsbase )
set_in_cr4(X86_CR4_FSGSBASE);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 1ab870d..149a255 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1437,21 +1437,38 @@ static int fixup_page_fault(unsigned long addr, struct
cpu_user_regs *regs)
return 0;
}
- if ( guest_kernel_mode(v, regs) &&
- !(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) &&
- (regs->error_code & PFEC_write_access) )
- {
- if ( VM_ASSIST(d, writable_pagetables) &&
- /* Do not check if access-protection fault since the page may
- legitimately be not present in shadow page tables */
- (paging_mode_enabled(d) ||
- (regs->error_code & PFEC_page_present)) &&
- ptwr_do_page_fault(v, addr, regs) )
- return EXCRET_fault_fixed;
+ if ( guest_kernel_mode(v, regs) )
+ {
+ if (!(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) &&
+ (regs->error_code & PFEC_write_access) )
+ {
+ if ( VM_ASSIST(d, writable_pagetables) &&
+ /* Do not check if access-protection fault since the page may
+ legitimately be not present in shadow page tables */
+ (paging_mode_enabled(d) ||
+ (regs->error_code & PFEC_page_present)) &&
+ ptwr_do_page_fault(v, addr, regs) )
+ return EXCRET_fault_fixed;
- if ( is_hardware_domain(d) && (regs->error_code & PFEC_page_present) &&
- mmio_ro_do_page_fault(v, addr, regs) )
+ if ( is_hardware_domain(d) && (regs->error_code &
PFEC_page_present) &&
+ mmio_ro_do_page_fault(v, addr, regs) )
+ return EXCRET_fault_fixed;
+ }
+
+ /*
+ * SMAP violation behind an unaware 32bit PV guest kernel? Set
+ * EFLAGS.AC behind its back and try again.
+ */
+ if ( (smap_mode == smap_mode_fixup) && is_pv_32bit_domain(d) &&
+ ((regs->error_code &
+ (PFEC_insn_fetch | PFEC_reserved_bit |
+ PFEC_user_mode | PFEC_page_present)) == PFEC_page_present) &&
+ ((v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_SMAP) == 0) &&
+ ((regs->eflags & X86_EFLAGS_AC) == 0) )
+ {
+ regs->eflags |= X86_EFLAGS_AC;
return EXCRET_fault_fixed;
+ }
}
/* For non-external shadowed guests, we fix up both their own
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 5fb340f..22d5cfa 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -581,6 +581,16 @@ enum get_cpu_vendor {
int get_cpu_vendor(const char vendor_id[], enum get_cpu_vendor);
void pv_cpuid(struct cpu_user_regs *regs);
+enum xen_smap_mode {
+ smap_mode_enable, /* Use SMAP. */
+ smap_mode_disable, /* Don't use SMAP. */
+ smap_mode_compat, /* Context switch CR4.SMAP behind an unaware 32bit */
+ /* PV guest. */
+ smap_mode_fixup, /* Set EFLAGAS.AC on a SMAP fault behind an */
+ /* unaware 32bit PV guest. */
+};
+extern enum xen_smap_mode smap_mode;
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |