|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen master] x86/NMI: allow processing unknown NMIs when watchdog is enabled
commit 3ea2ba980afe7356c613c8e1ba00d223d1c25412
Author: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
AuthorDate: Thu Aug 28 16:11:37 2014 +0200
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Aug 28 16:11:37 2014 +0200
x86/NMI: allow processing unknown NMIs when watchdog is enabled
Change NMI processing so that if watchdog=force is passed on the
command-line and the NMI is not caused by a perf counter overflow (i.e.
likely not a watchdog "tick"), the NMI is handled by the unknown NMI
handler.
This allows injection of NMIs from IPMI controllers that don't set the
IOCK/SERR bits to trigger the unknown NMI handler rather than be
ignored.
Signed-off-by: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Fix command line parsing (don't enable the watchdog on e.g.
"watchdog=xyz").
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
docs/misc/xen-command-line.markdown | 6 +++-
xen/arch/x86/nmi.c | 47 +++++++++++++++++++++++++++++++++-
xen/arch/x86/traps.c | 7 +++--
xen/include/asm-x86/apic.h | 2 +-
xen/include/asm-x86/nmi.h | 3 ++
5 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/docs/misc/xen-command-line.markdown
b/docs/misc/xen-command-line.markdown
index 5f4680f..af93e17 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1248,12 +1248,14 @@ As the BTS virtualisation is not 100% safe and because
of the nehalem quirk
don't use the vpmu flag on production systems with Intel cpus!
### watchdog
-> `= <boolean>`
+> `= force | <boolean>`
> Default: `false`
Run an NMI watchdog on each processor. If a processor is stuck for
-longer than the **watchdog\_timeout**, a panic occurs.
+longer than the **watchdog\_timeout**, a panic occurs. When `force` is
+specified, in addition to running an NMI watchdog on each processor,
+unknown NMIs will still be processed.
### watchdog\_timeout
> `= <integer>`
diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index c4427a6..7d15d5b 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -43,7 +43,26 @@ static DEFINE_PER_CPU(unsigned int, nmi_timer_ticks);
/* opt_watchdog: If true, run a watchdog NMI on each processor. */
bool_t __initdata opt_watchdog = 0;
-boolean_param("watchdog", opt_watchdog);
+
+/* watchdog_force: If true, process unknown NMIs when running the watchdog. */
+bool_t watchdog_force = 0;
+
+static void __init parse_watchdog(char *s)
+{
+ switch ( parse_bool(s) )
+ {
+ case 0:
+ opt_watchdog = 0;
+ return;
+ case 1:
+ opt_watchdog = 1;
+ return;
+ }
+
+ if ( !strcmp(s, "force") )
+ watchdog_force = opt_watchdog = 1;
+}
+custom_param("watchdog", parse_watchdog);
/* opt_watchdog_timeout: Number of seconds to wait before panic. */
static unsigned int opt_watchdog_timeout = 5;
@@ -82,6 +101,7 @@ int nmi_active;
#define K7_EVNTSEL_USR (1 << 16)
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+#define K7_EVENT_WIDTH 32
#define P6_EVNTSEL0_ENABLE (1 << 22)
#define P6_EVNTSEL_INT (1 << 20)
@@ -89,10 +109,12 @@ int nmi_active;
#define P6_EVNTSEL_USR (1 << 16)
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
#define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
+#define P6_EVENT_WIDTH 32
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
#define P4_CCCR_OVF_PMI0 (1<<26)
#define P4_CCCR_OVF_PMI1 (1<<27)
+#define P4_CCCR_OVF (1<<31)
#define P4_CCCR_THRESHOLD(N) ((N)<<20)
#define P4_CCCR_COMPLEMENT (1<<19)
#define P4_CCCR_COMPARE (1<<18)
@@ -432,8 +454,10 @@ int __init watchdog_setup(void)
return 0;
}
-void nmi_watchdog_tick(const struct cpu_user_regs *regs)
+/* Returns false if this was not a watchdog NMI, true otherwise */
+bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
{
+ bool_t watchdog_tick = 1;
unsigned int sum = this_cpu(nmi_timer_ticks);
if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
@@ -459,8 +483,15 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
if ( nmi_perfctr_msr )
{
+ uint64_t msr_content;
+
+ /* Work out if this is a watchdog tick by checking for overflow. */
if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
{
+ rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
+ if ( !(msr_content & P4_CCCR_OVF) )
+ watchdog_tick = 0;
+
/*
* P4 quirks:
* - An overflown perfctr will assert its interrupt
@@ -473,14 +504,26 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
}
else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
{
+ rdmsrl(MSR_P6_PERFCTR0, msr_content);
+ if ( msr_content & (1ULL << P6_EVENT_WIDTH) )
+ watchdog_tick = 0;
+
/*
* Only P6 based Pentium M need to re-unmask the apic vector but
* it doesn't hurt other P6 variants.
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+ else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
+ {
+ rdmsrl(MSR_K7_PERFCTR0, msr_content);
+ if ( msr_content & (1ULL << K7_EVENT_WIDTH) )
+ watchdog_tick = 0;
+ }
write_watchdog_counter(NULL);
}
+
+ return watchdog_tick;
}
/*
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 71be2ae..7f5306f 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3306,14 +3306,15 @@ void do_nmi(const struct cpu_user_regs *regs)
{
unsigned int cpu = smp_processor_id();
unsigned char reason;
+ bool_t handle_unknown = 0;
++nmi_count(cpu);
if ( nmi_callback(regs, cpu) )
return;
- if ( nmi_watchdog )
- nmi_watchdog_tick(regs);
+ if ( !nmi_watchdog || (!nmi_watchdog_tick(regs) && watchdog_force) )
+ handle_unknown = 1;
/* Only the BSP gets external NMIs from the system. */
if ( cpu == 0 )
@@ -3323,7 +3324,7 @@ void do_nmi(const struct cpu_user_regs *regs)
pci_serr_error(regs);
if ( reason & 0x40 )
io_check_error(regs);
- if ( !(reason & 0xc0) && !nmi_watchdog )
+ if ( !(reason & 0xc0) && handle_unknown )
unknown_nmi_error(regs, reason);
}
}
diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
index 5d7623f..6697245 100644
--- a/xen/include/asm-x86/apic.h
+++ b/xen/include/asm-x86/apic.h
@@ -206,7 +206,7 @@ extern void release_lapic_nmi(void);
extern void self_nmi(void);
extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void);
-extern void nmi_watchdog_tick (const struct cpu_user_regs *regs);
+extern bool_t nmi_watchdog_tick (const struct cpu_user_regs *regs);
extern int APIC_init_uniprocessor (void);
extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
diff --git a/xen/include/asm-x86/nmi.h b/xen/include/asm-x86/nmi.h
index 4504fe1..2c92db9 100644
--- a/xen/include/asm-x86/nmi.h
+++ b/xen/include/asm-x86/nmi.h
@@ -8,6 +8,9 @@ struct cpu_user_regs;
/* Watchdog boolean from the command line */
extern bool_t opt_watchdog;
+
+/* Watchdog force parameter from the command line */
+extern bool_t watchdog_force;
typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
--
generated by git-patchbot for /home/xen/git/xen.git#master
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |