[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 8/8] xen/x86: Additional SMAP modes to work around buggy 32bit PV guests



Experimentally, older Linux guests perform construction of `init` with user
pagetable mappings.  This is fine for native systems as such a guest would not
set CR4.SMAP itself.

However if Xen uses SMAP itself, 32bit PV guests (whose kernels run in ring1)
are also affected.  Older Linux guests end up spinning in a loop assuming that
the SMAP violation pagefaults are spurious, and make no further progress.

One option is to disable SMAP completely, but this is unreasonable.  A better
alternative is to disable SMAP only in the context of 32bit PV guests, but
reduces the effectiveness SMAP security.  A 3rd option is for Xen to fix up
behind a 32bit guest if it were SMAP-aware.  It is a heuristic, and does
result in a guest-visible state change, but allows Xen to keep CR4.SMAP
unconditionally enabled.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>
---
 docs/misc/xen-command-line.markdown |   25 ++++++++++++++++++--
 xen/arch/x86/domain.c               |   16 +++++++++++--
 xen/arch/x86/setup.c                |   31 +++++++++++++++++++++----
 xen/arch/x86/traps.c                |   43 ++++++++++++++++++++++++-----------
 xen/include/asm-x86/processor.h     |   10 ++++++++
 5 files changed, 103 insertions(+), 22 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index aa684c0..a63d2b2 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1261,11 +1261,32 @@ Set the serial transmit buffer size.
 Flag to enable Supervisor Mode Execution Protection
 
 ### smap
-> `= <boolean>`
+> `= <boolean> | compat | fixup`
 
 > Default: `true`
 
-Flag to enable Supervisor Mode Access Prevention
+Handling of Supervisor Mode Access Prevention.
+
+32bit PV guest kernels qualify as supervisor code, as they execute in ring 1.
+If Xen uses SMAP protection itself, a PV guest which is not SMAP aware may
+suffer unexpected pagefaults which it cannot handle. (Experimentally, there
+are 32bit PV guests which fall foul of SMAP enforcement and spin in an
+infinite loop taking pagefaults early on boot.)
+
+Two further SMAP modes are introduced to work around buggy 32bit PV guests to
+prevent functional regressions of VMs on newer hardware.  At any point if the
+guest sets `CR4.SMAP` itself, it is deemed aware, and **compat/fixup** cease
+to apply.
+
+A SMAP mode of **compat** causes Xen to disable `CR4.SMAP` in the context of
+an unaware 32bit PV guest.  This prevents the guest from being subject to SMAP
+enforcement, but also prevents Xen from benefiting from the added security
+checks.
+
+A SMAP mode of **fixup** causes Xen to set `EFLAGS.AC` when discovering a SMAP
+pagefault in the context of an unaware 32bit PV guest.  This allows Xen to
+retain the added security from SMAP checks, but results in a guest-visible
+state change which it might object to.
 
 ### snb\_igd\_quirk
 > `= <boolean> | cap | <integer>`
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 18c3c62..045a2b9 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -687,9 +687,10 @@ void arch_domain_unpause(struct domain *d)
  *  - TSD for vtsc
  *  - DE for %%dr4/5 emulation
  *  - OSXSAVE for xsetbv emulation
+ *  - SMAP for smap_mode_{compat,fixup} handling
  */
 #define PV_CR4_SHADOW                           \
-    (X86_CR4_TSD | X86_CR4_DE | X86_CR4_OSXSAVE)
+    (X86_CR4_TSD | X86_CR4_DE | X86_CR4_OSXSAVE | X86_CR4_SMAP)
 
 /* CR4 bits a guest controls. */
 #define PV_CR4_GUEST (X86_CR4_TSD)
@@ -700,7 +701,7 @@ void arch_domain_unpause(struct domain *d)
  */
 #define PV_CR4_READ                                                     \
     (X86_CR4_PAE | X86_CR4_PGE |X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT |   \
-     X86_CR4_FSGSBASE | X86_CR4_SMEP | X86_CR4_SMAP)
+     X86_CR4_FSGSBASE | X86_CR4_SMEP)
 
 /*
  * These are the masks of CR4 bits (subject to hardware availability) which a
@@ -730,6 +731,12 @@ static int __init init_pv_cr4_masks(void)
     if ( cpu_has_fsgsbase )
         pv_cr4_mask &= ~X86_CR4_FSGSBASE;
 
+    /*
+     * 32bit PV guests may attempt to modify SMAP.
+     */
+    if ( cpu_has_smap )
+        compat_pv_cr4_mask &= ~X86_CR4_SMAP;
+
     BUILD_BUG_ON(PV_CR4_SHADOW & PV_CR4_READ);
 
     return 0;
@@ -784,6 +791,11 @@ unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu 
*v)
     if ( v->domain->arch.vtsc )
         cr4 |= X86_CR4_TSD;
 
+    /* Disable SMAP behind unaware 32bit PV guests. */
+    if ( (smap_mode == smap_mode_compat) && is_pv_32bit_vcpu(v) &&
+         ((v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_SMAP) == 0) )
+        cr4 &= ~X86_CR4_SMAP;
+
     return cr4;
 }
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index ff34670..36fce42 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -63,10 +63,6 @@
 static bool_t __initdata disable_smep;
 invbool_param("smep", disable_smep);
 
-/* smap: Enable/disable Supervisor Mode Access Prevention (default on). */
-static bool_t __initdata disable_smap;
-invbool_param("smap", disable_smap);
-
 /* Boot dom0 in pvh mode */
 static bool_t __initdata opt_dom0pvh;
 boolean_param("dom0pvh", opt_dom0pvh);
@@ -138,6 +134,29 @@ static void __init parse_acpi_param(char *s)
     }
 }
 
+enum xen_smap_mode smap_mode __read_mostly = smap_mode_enable;
+static void __init parse_smap(char *s)
+{
+    switch ( parse_bool(s) )
+    {
+    case 1:
+        smap_mode = smap_mode_enable;
+        break;
+
+    case 0:
+        smap_mode = smap_mode_disable;
+        break;
+
+    default:
+        if ( !strcmp(s, "compat") )
+            smap_mode = smap_mode_compat;
+        else if ( !strcmp(s, "fixup") )
+            smap_mode = smap_mode_fixup;
+        break;
+    }
+}
+custom_param("smap", parse_smap);
+
 static const module_t *__initdata initial_images;
 static unsigned int __initdata nr_initial_images;
 
@@ -1296,10 +1315,12 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( cpu_has_smep )
         set_in_cr4(X86_CR4_SMEP);
 
-    if ( disable_smap )
+    if ( smap_mode == smap_mode_disable )
         setup_clear_cpu_cap(X86_FEATURE_SMAP);
     if ( cpu_has_smap )
         set_in_cr4(X86_CR4_SMAP);
+    else
+        smap_mode = smap_mode_disable;
 
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 1ab870d..149a255 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1437,21 +1437,38 @@ static int fixup_page_fault(unsigned long addr, struct 
cpu_user_regs *regs)
         return 0;
     }
 
-    if ( guest_kernel_mode(v, regs) &&
-         !(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) &&
-         (regs->error_code & PFEC_write_access) )
-    {
-        if ( VM_ASSIST(d, writable_pagetables) &&
-             /* Do not check if access-protection fault since the page may
-                legitimately be not present in shadow page tables */
-             (paging_mode_enabled(d) ||
-              (regs->error_code & PFEC_page_present)) &&
-             ptwr_do_page_fault(v, addr, regs) )
-            return EXCRET_fault_fixed;
+    if ( guest_kernel_mode(v, regs) )
+    {
+        if (!(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) &&
+            (regs->error_code & PFEC_write_access) )
+        {
+            if ( VM_ASSIST(d, writable_pagetables) &&
+                 /* Do not check if access-protection fault since the page may
+                    legitimately be not present in shadow page tables */
+                 (paging_mode_enabled(d) ||
+                  (regs->error_code & PFEC_page_present)) &&
+                 ptwr_do_page_fault(v, addr, regs) )
+                return EXCRET_fault_fixed;
 
-        if ( is_hardware_domain(d) && (regs->error_code & PFEC_page_present) &&
-             mmio_ro_do_page_fault(v, addr, regs) )
+            if ( is_hardware_domain(d) && (regs->error_code & 
PFEC_page_present) &&
+                 mmio_ro_do_page_fault(v, addr, regs) )
+                return EXCRET_fault_fixed;
+        }
+
+        /*
+         * SMAP violation behind an unaware 32bit PV guest kernel? Set
+         * EFLAGS.AC behind its back and try again.
+         */
+        if ( (smap_mode == smap_mode_fixup) && is_pv_32bit_domain(d) &&
+             ((regs->error_code &
+               (PFEC_insn_fetch | PFEC_reserved_bit |
+                PFEC_user_mode | PFEC_page_present)) == PFEC_page_present) &&
+             ((v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_SMAP) == 0) &&
+             ((regs->eflags & X86_EFLAGS_AC) == 0) )
+        {
+            regs->eflags |= X86_EFLAGS_AC;
             return EXCRET_fault_fixed;
+        }
     }
 
     /* For non-external shadowed guests, we fix up both their own 
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 5fb340f..22d5cfa 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -581,6 +581,16 @@ enum get_cpu_vendor {
 int get_cpu_vendor(const char vendor_id[], enum get_cpu_vendor);
 void pv_cpuid(struct cpu_user_regs *regs);
 
+enum xen_smap_mode {
+    smap_mode_enable,  /* Use SMAP.                                       */
+    smap_mode_disable, /* Don't use SMAP.                                 */
+    smap_mode_compat,  /* Context switch CR4.SMAP behind an unaware 32bit */
+                       /* PV guest.                                       */
+    smap_mode_fixup,   /* Set EFLAGAS.AC on a SMAP fault behind an        */
+                       /* unaware 32bit PV guest.                         */
+};
+extern enum xen_smap_mode smap_mode;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_X86_PROCESSOR_H */
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.