[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode



c/s 7dd3b06ff "vmx: fix handling of NMI VMEXIT" fixed one issue but
inadvertently introduced a regression when it came to the NMI shootdown.  The
shootdown code worked by patching vector 2 in each IDT, but the introduced
direct call to do_nmi() bypassed this.

Instead of patching each IDT, take a different approach by updating the
existing dispatch table.  This allows for the removal of the remote IDT
patching and the removal of the nmi_crash() entry point.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Tim Deegan <tim@xxxxxxx>
---
 xen/arch/x86/crash.c            |   59 ++++++++++++++++-----------------------
 xen/arch/x86/x86_64/entry.S     |    9 ------
 xen/include/asm-x86/processor.h |    1 -
 3 files changed, 24 insertions(+), 45 deletions(-)

diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index c0b83df..e175871 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -36,9 +36,11 @@ static unsigned int crashing_cpu;
 static DEFINE_PER_CPU_READ_MOSTLY(bool_t, crash_save_done);
 
 /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */
-void do_nmi_crash(struct cpu_user_regs *regs)
+static void noreturn do_nmi_crash(const struct cpu_user_regs *regs)
 {
-    int cpu = smp_processor_id();
+    unsigned int cpu = smp_processor_id();
+
+    stac();
 
     /* nmi_shootdown_cpus() should ensure that this assertion is correct. */
     ASSERT(cpu != crashing_cpu);
@@ -113,11 +115,10 @@ void do_nmi_crash(struct cpu_user_regs *regs)
         halt();
 }
 
-void nmi_crash(void);
 static void nmi_shootdown_cpus(void)
 {
     unsigned long msecs;
-    int i, cpu = smp_processor_id();
+    unsigned int cpu = smp_processor_id();
 
     disable_lapic_nmi_watchdog();
     local_irq_disable();
@@ -127,38 +128,26 @@ static void nmi_shootdown_cpus(void)
 
     cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
 
-    /* Change NMI trap handlers.  Non-crashing pcpus get nmi_crash which
-     * invokes do_nmi_crash (above), which cause them to write state and
-     * fall into a loop.  The crashing pcpu gets the nop handler to
-     * cause it to return to this function ASAP.
+    /*
+     * Disable IST for MCEs to avoid stack corruption race conditions, and
+     * change the NMI hanlder to a nop to avoid deviation from this codepath.
      */
-    for ( i = 0; i < nr_cpu_ids; i++ )
-    {
-        if ( idt_tables[i] == NULL )
-            continue;
-
-        if ( i == cpu )
-        {
-            /*
-             * Disable the interrupt stack tables for this cpu's MCE and NMI 
-             * handlers, and alter the NMI handler to have no operation.  
-             * Disabling the stack tables prevents stack corruption race 
-             * conditions, while changing the handler helps prevent cascading 
-             * faults; we are certainly going to crash by this point.
-             *
-             * This update is safe from a security point of view, as this pcpu 
-             * is never going to try to sysret back to a PV vcpu.
-             */
-            _set_gate_lower(&idt_tables[i][TRAP_nmi],
-                            SYS_DESC_irq_gate, 0, &trap_nop);
-            set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE);
-        }
-        else
-        {
-            /* Do not update stack table for other pcpus. */
-            _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash);
-        }
-    }
+    _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
+                    SYS_DESC_irq_gate, 0, &trap_nop);
+    set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+
+    /*
+     * Ideally would be:
+     *   exception_table[TRAP_nmi] = &do_nmi_crash;
+     *
+     * but the exception_table is read only.  Borrow and unused fixmap entry
+     * to construct a writable mapping.
+     */
+    set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi]));
+    write_atomic((unsigned long *)
+                 (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) +
+                  ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)),
+                 (unsigned long)&do_nmi_crash);
 
     /* Ensure the new callback function is set before sending out the NMI. */
     wmb();
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 062c690..2d25d57 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -669,15 +669,6 @@ handle_ist_exception:
         je    restore_all_guest
         jmp   compat_restore_all_guest
 
-ENTRY(nmi_crash)
-        pushq $0
-        movl $TRAP_nmi,4(%rsp)
-        /* Set AC to reduce chance of further SMAP faults */
-        SAVE_ALL STAC
-        movq %rsp,%rdi
-        callq do_nmi_crash /* Does not return */
-        ud2
-
 ENTRY(machine_check)
         pushq $0
         movl  $TRAP_machine_check,4(%rsp)
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index c703581..75c077c 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -530,7 +530,6 @@ DECLARE_TRAP_HANDLER(alignment_check);
 
 void trap_nop(void);
 void enable_nmis(void);
-void noreturn do_nmi_crash(struct cpu_user_regs *regs);
 void do_reserved_trap(struct cpu_user_regs *regs);
 
 void syscall_enter(void);
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.