[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, v2] x86: slightly improve stack trace on debug builds



As was rather obvious from crashes recently happening in stage testing,
the debug hypervisor, in that special case, has a drawback compared to
the non-debug one: When a call through a bad pointer happens, there's
no frame, and the top level (and frequently most important for
analysis) stack entry would get skipped:

(XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    1
(XEN) RIP:    e008:[<0000000000000000>] ???
(XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
(XEN) rax: 0000000000000008   rbx: 0000000000000001   rcx: 0000000000000003
(XEN) rdx: 0000003db54eb700   rsi: 7fffffffffffffff   rdi: 0000000000000001
(XEN) rbp: ffff8302357e7ee0   rsp: ffff8302357e7e58   r8:  0000000000000000
(XEN) r9:  000000000000003e   r10: ffff8302357e7f18   r11: ffff8302357e7f18
(XEN) r12: ffff8302357ee340   r13: ffff82c480263980   r14: ffff8302357ee3d0
(XEN) r15: 0000000000000001   cr0: 000000008005003b   cr4: 00000000000026f0
(XEN) cr3: 00000000bf473000   cr2: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
(XEN) Xen stack trace from rsp=ffff8302357e7e58:
(XEN)    ffff82c4801a3d05 ffff8302357eca70 0000000800000020 ffff82c4802ead60
(XEN)    0000000000000001 ffff8302357e7ea0 ffff82c48016bf07 0000000000000000
(XEN)    0000000000000000 ffff8302357e7ee0 fffff830fffff830 0000000000000046
(XEN)    ffff8302357e7f18 ffff82c480263980 ffff8302357e7f18 0000000000000000
(XEN)    0000000000000000 ffff8302357e7f10 ffff82c48015c2be 8302357dc0000fff
...
(XEN) Xen call trace:
(XEN)    [<0000000000000000>] ???
(XEN)    [<ffff82c48015c2be>] idle_loop+0x6c/0x7a
(XEN)    
(XEN) Pagetable walk from 0000000000000000:

Since the bad pointer is being printed anyway (as part of the register
state), replace it with the top of stack value in such a case.

With the introduction of is_active_kernel_text(), use it also at the
(few) other suitable places (I intentionally didn't replace the use in
xen/arch/arm/mm.c - while it would be functionally correct, the
dependency on system_state wouldn't be from an abstract perspective).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Introduce and use is_active_kernel_text(). Make logic to determine
    what address to print at the top of show_trace() more readable.

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -199,7 +199,7 @@ static void show_trace(struct cpu_user_r
     while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
     {
         addr = *stack++;
-        if ( is_kernel_text(addr) || is_kernel_inittext(addr) )
+        if ( is_active_kernel_text(addr) )
         {
             printk("[<%p>]", _p(addr));
             print_symbol(" %s\n   ", addr);
@@ -217,8 +217,16 @@ static void show_trace(struct cpu_user_r
 
     printk("Xen call trace:\n   ");
 
-    printk("[<%p>]", _p(regs->eip));
-    print_symbol(" %s\n   ", regs->eip);
+    /*
+     * If RIP is not pointing into hypervisor code then someone may have
+     * called into oblivion. Peek to see if they left a return address at
+     * top of stack.
+     */
+    addr = is_active_kernel_text(regs->eip) ||
+           !is_active_kernel_text(*ESP_BEFORE_EXCEPTION(regs)) ?
+           regs->eip : *ESP_BEFORE_EXCEPTION(regs);
+    printk("[<%p>]", _p(addr));
+    print_symbol(" %s\n   ", addr);
 
     /* Bounds for range of valid frame pointer. */
     low  = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
@@ -322,7 +330,7 @@ void show_stack_overflow(unsigned int cp
     while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
     {
         addr = *stack++;
-        if ( is_kernel_text(addr) || is_kernel_inittext(addr) )
+        if ( is_active_kernel_text(addr) )
         {
             printk("%p: [<%p>]", stack, _p(addr));
             print_symbol(" %s\n   ", addr);
--- a/xen/common/symbols.c
+++ b/xen/common/symbols.c
@@ -93,6 +93,12 @@ static unsigned int get_symbol_offset(un
     return name - symbols_names;
 }
 
+bool_t is_active_kernel_text(unsigned long addr)
+{
+    return (is_kernel_text(addr) ||
+            (system_state == SYS_STATE_boot && is_kernel_inittext(addr)));
+}
+
 const char *symbols_lookup(unsigned long addr,
                            unsigned long *symbolsize,
                            unsigned long *offset,
@@ -104,7 +110,7 @@ const char *symbols_lookup(unsigned long
     namebuf[KSYM_NAME_LEN] = 0;
     namebuf[0] = 0;
 
-    if (!is_kernel_text(addr) && !is_kernel_inittext(addr))
+    if (!is_active_kernel_text(addr))
         return NULL;
 
         /* do a binary search on the sorted symbols_addresses array */
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -5,6 +5,8 @@
  * 'kernel.h' contains some often-used function prototypes etc
  */
 
+#include <xen/types.h>
+
 /*
  * min()/max() macros that also do
  * strict type-checking.. See the
@@ -95,5 +97,7 @@ extern enum system_state {
     SYS_STATE_resume
 } system_state;
 
+bool_t is_active_kernel_text(unsigned long addr);
+
 #endif /* _LINUX_KERNEL_H */
 


Attachment: x86-debug-dump-TOS.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.