[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [Patch] avoid deadlock during console output
Hi, during my test for cpupools I've found an issue in console output. Sometimes the hypervisor hangs up due to a deadlock if something is printed to the console via printk if a per-cpu scheduler lock is held by the printing processor. Inside printk an event is sent to dom0 which in some cases leads to a call of vcpu_wake resulting in the deadlock. This problem occurs when calling BUG during holding the lock, too. This issue is easily reproducable on a system with multiple cpus under low load by calling xm debug-keys r to dump the schedulers run-queues. On my 4-core machine I need only about 5 calls to stop the machine. The attached patch solves the problem by avoiding sending the event in critical paths. Juergen -- Juergen Gross Principal Developer IP SW OS6 Telephone: +49 (0) 89 636 47950 Fujitsu Siemens Computers e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html Signed-off-by: juergen.gross@xxxxxxxxxxxxxxxxxxx # HG changeset patch # User juergen.gross@xxxxxxxxxxxxxxxxxxx # Date 1236328387 -3600 # Node ID 0a7f637315e43205425da88aff3899c8e1ff6d11 # Parent 6315b66fbd5b25597ad2aa766aeda68d6852205d avoid deadlocks in console output diff -r 6315b66fbd5b -r 0a7f637315e4 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Mar 06 08:46:08 2009 +0100 +++ b/xen/arch/x86/traps.c Fri Mar 06 09:33:07 2009 +0100 @@ -389,6 +389,7 @@ { watchdog_disable(); console_start_sync(); + console_enter_critical(); show_execution_state(regs); @@ -398,6 +399,7 @@ printk("Faulting linear address: %p\n", _p(cr2)); show_page_walk(cr2); } + console_exit_critical(); } panic("FATAL TRAP: vector = %d (%s)\n" @@ -545,7 +547,9 @@ DEBUGGER_trap_fatal(trapnr, regs); + console_enter_critical(); show_execution_state(regs); + console_exit_critical(); panic("FATAL TRAP: vector = %d (%s)\n" "[error_code=%04x]\n", trapnr, trapstr(trapnr), regs->error_code); @@ -866,7 +870,9 @@ if ( id == BUGFRAME_dump ) { + console_enter_critical(); show_execution_state(regs); + console_exit_critical(); regs->eip = (unsigned long)eip; return; } @@ -883,17 +889,21 @@ if ( id == BUGFRAME_warn ) { + console_enter_critical(); printk("Xen WARN at %.50s:%d\n", filename, lineno); show_execution_state(regs); + console_exit_critical(); regs->eip = (unsigned long)eip; return; } if ( id == BUGFRAME_bug ) { + console_enter_critical(); printk("Xen BUG at %.50s:%d\n", filename, lineno); DEBUGGER_trap_fatal(TRAP_invalid_op, regs); show_execution_state(regs); + console_exit_critical(); panic("Xen BUG at %.50s:%d\n", filename, lineno); } @@ -906,10 +916,12 @@ eip += sizeof(bug_str); predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>"; + console_enter_critical(); printk("Assertion '%s' failed at %.50s:%d\n", predicate, filename, lineno); DEBUGGER_trap_fatal(TRAP_invalid_op, regs); show_execution_state(regs); + console_exit_critical(); panic("Assertion '%s' failed at %.50s:%d\n", predicate, filename, lineno); @@ -920,7 +932,9 @@ return; } DEBUGGER_trap_fatal(TRAP_invalid_op, regs); + console_enter_critical(); show_execution_state(regs); + console_exit_critical(); panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op); } @@ -945,10 +959,12 @@ static void reserved_bit_page_fault( unsigned long addr, struct cpu_user_regs *regs) { + console_enter_critical(); printk("d%d:v%d: reserved bit in page table (ec=%04X)\n", current->domain->domain_id, current->vcpu_id, regs->error_code); show_page_walk(addr); show_execution_state(regs); + console_exit_critical(); } void propagate_page_fault(unsigned long addr, u16 error_code) @@ -1247,8 +1263,10 @@ DEBUGGER_trap_fatal(TRAP_page_fault, regs); + console_enter_critical(); show_execution_state(regs); show_page_walk(addr); + console_exit_critical(); panic("FATAL PAGE FAULT\n" "[error_code=%04x]\n" "Faulting linear address: %p\n", @@ -2757,7 +2775,9 @@ DEBUGGER_trap_fatal(TRAP_gp_fault, regs); hardware_gp: + console_enter_critical(); show_execution_state(regs); + console_exit_critical(); panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code); } diff -r 6315b66fbd5b -r 0a7f637315e4 xen/common/schedule.c --- a/xen/common/schedule.c Fri Mar 06 08:46:08 2009 +0100 +++ b/xen/common/schedule.c Fri Mar 06 09:33:07 2009 +0100 @@ -930,10 +930,12 @@ for_each_online_cpu ( i ) { + console_enter_critical(); spin_lock(&per_cpu(schedule_data, i).schedule_lock); printk("CPU[%02d] ", i); SCHED_OP(dump_cpu_state, i); spin_unlock(&per_cpu(schedule_data, i).schedule_lock); + console_exit_critical(); } local_irq_restore(flags); diff -r 6315b66fbd5b -r 0a7f637315e4 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Fri Mar 06 08:46:08 2009 +0100 +++ b/xen/drivers/char/console.c Fri Mar 06 09:33:07 2009 +0100 @@ -414,6 +414,22 @@ * ***************************************************** */ +/* don't try to wake up dom0 if schedule lock might be held, as this could + result in a deadlock! */ + +static atomic_t console_crit_cnt = ATOMIC_INIT(0); + +void console_enter_critical(void) +{ + atomic_inc(&console_crit_cnt); +} + +void console_exit_critical(void) +{ + BUG_ON(atomic_read(&console_crit_cnt) == 0); + atomic_dec(&console_crit_cnt); +} + static void __putstr(const char *str) { int c; @@ -426,7 +442,8 @@ while ( (c = *str++) != '\0' ) putchar_console_ring(c); - send_guest_global_virq(dom0, VIRQ_CON_RING); + if (atomic_read(&console_crit_cnt) == 0) + send_guest_global_virq(dom0, VIRQ_CON_RING); } static int printk_prefix_check(char *p, char **pp) @@ -915,6 +932,7 @@ static DEFINE_SPINLOCK(lock); static char buf[128]; + console_enter_critical(); debugtrace_dump(); /* Protects buf[] and ensure multi-line message prints atomically. */ @@ -935,6 +953,7 @@ printk("Reboot in five seconds...\n"); spin_unlock_irqrestore(&lock, flags); + console_exit_critical(); debugger_trap_immediate(); @@ -953,17 +972,21 @@ void __bug(char *file, int line) { + console_enter_critical(); console_start_sync(); printk("Xen BUG at %s:%d\n", file, line); dump_execution_state(); + console_exit_critical(); panic("Xen BUG at %s:%d\n", file, line); for ( ; ; ) ; } void __warn(char *file, int line) { + console_enter_critical(); printk("Xen WARN at %s:%d\n", file, line); dump_execution_state(); + console_exit_critical(); } diff -r 6315b66fbd5b -r 0a7f637315e4 xen/include/xen/lib.h --- a/xen/include/xen/lib.h Fri Mar 06 08:46:08 2009 +0100 +++ b/xen/include/xen/lib.h Fri Mar 06 09:33:07 2009 +0100 @@ -100,4 +100,8 @@ extern char *print_tainted(char *str); extern void add_taint(unsigned); +/* avoid scheduling during console output in critical paths */ +void console_enter_critical(void); +void console_exit_critical(void); + #endif /* __LIB_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |