[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] Regression in kernel 3.5 as Dom0 regarding PCI Passthrough?!
me again :) it seems the Crash is not always a "fatal one": [ 247.080617] vif vif-2-0: 2 reading script [ 247.083519] br0: port 4(vif2.0) entered disabled state [ 247.084144] br0: port 4(vif2.0) entered disabled state [ 250.700029] ------------[ cut here ]------------ [ 250.700046] kernel BUG at drivers/xen/balloon.c:359! [ 250.700059] invalid opcode: 0000 [#1] PREEMPT SMP [ 250.700071] CPU 4[ 250.700075] Modules linked in: joydev hid_generic uvcvideo snd_usb_audio snd_seq_midi snd_usbmidi_lib snd_hwdep snd_r awmidi videobuf2_vmalloc videobuf2_memops videobuf2_core videodev gpio_ich [last unloaded: scsi_wait_scan] [ 250.700122][ 250.700125] Pid: 23, comm: kworker/4:0 Not tainted 3.5.0 #3 /DX58SO [ 250.700139] RIP: e030:[<ffffffff81447f95>] [<ffffffff81447f95>] balloon_process+0x385/0x3a0 [ 250.700158] RSP: e02b:ffff8801317b9dc0 EFLAGS: 00010213[ 250.700162] RAX: 000000021f895000 RBX: 0000000000000000 RCX: 0000000000000002 [ 250.700167] RDX: ffffffff82027000 RSI: 0000000000000137 RDI: 00000000000a2337 [ 250.700172] RBP: ffff8801317b9e20 R08: ffff88014068e140 R09: 00000000fffffffc [ 250.700180] R10: 0000000000000001 R11: 0000000000000000 R12: 0000160000000000 [ 250.700185] R13: 0000000000000001 R14: 00000000000a2337 R15: ffffea000288cdc0 [ 250.700192] FS: 00007fb82ee14700(0000) GS:ffff880140680000(0000) knlGS:0000000000000000 [ 250.700198] CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b[ 250.700202] CR2: 00007fb82e7b39a6 CR3: 0000000001e0c000 CR4: 0000000000002660 [ 250.700207] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 250.700213] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 250.700218] Process kworker/4:0 (pid: 23, threadinfo ffff8801317b8000, task ffff88013178db00) [ 250.700223] Stack:[ 250.700225] 000000000006aa7b 0000000000000001 ffffffff8200ea80 0000000000000001 [ 250.700293] 0000000000000000 0000000000007ff0 ffff8801317b9e00 ffff880131796400 [ 250.700301] ffff880140697000 ffff88014068e140 0000000000000000 ffffffff81e587c0 [ 250.700311] Call Trace: [ 250.700317] [<ffffffff8106753b>] process_one_work+0x12b/0x450 [ 250.700322] [<ffffffff81447c10>] ? decrease_reservation+0x320/0x320 [ 250.700328] [<ffffffff810688be>] worker_thread+0x12e/0x2d0[ 250.700334] [<ffffffff81068790>] ? manage_workers.isra.26+0x1f0/0x1f0 [ 250.700340] [<ffffffff8106db7e>] kthread+0x8e/0xa0 [ 250.700346] [<ffffffff8184e3e4>] kernel_thread_helper+0x4/0x10 [ 250.700353] [<ffffffff8184c87c>] ? retint_restore_args+0x5/0x6 [ 250.700358] [<ffffffff8184e3e0>] ? gs_change+0x13/0x13[ 250.700362] Code: 01 15 f0 6a bc 00 48 29 d0 48 89 05 ee 6a bc 00 e9 31 fd ff ff 0f 0b 0f 0b 4c 89 f7 e8 85 34 bc ff 48 83 f8 ff 0f 84 2b fe ff ff <0f> 0b 66 0f 1f 84 00 00 00 00 00 48 83 c1 01 e9 c2 fd ff ff 0f [ 250.700471] RIP [<ffffffff81447f95>] balloon_process+0x385/0x3a0 [ 250.700482] RSP <ffff8801317b9dc0> [ 250.733955] ---[ end trace a5e5187e8ed6c1ff ]---[ 250.733982] BUG: unable to handle kernel paging request at fffffffffffffff8 [ 250.733992] IP: [<ffffffff8106e08c>] kthread_data+0xc/0x20 [ 250.733999] PGD 1e0e067 PUD 1e0f067 PMD 0 [ 250.734006] Oops: 0000 [#2] PREEMPT SMP [ 250.734013] CPU 4[ 250.734016] Modules linked in: joydev hid_generic uvcvideo snd_usb_audio snd_seq_midi snd_usbmidi_lib snd_hwdep snd_r awmidi videobuf2_vmalloc videobuf2_memops videobuf2_core videodev gpio_ich [last unloaded: scsi_wait_scan] [ 250.734071][ 250.734073] Pid: 23, comm: kworker/4:0 Tainted: G D 3.5.0 #3 /DX58SO [ 250.734095] RIP: e030:[<ffffffff8106e08c>] [<ffffffff8106e08c>] kthread_data+0xc/0x20 [ 250.734111] RSP: e02b:ffff8801317b9a90 EFLAGS: 00010092[ 250.734122] RAX: 0000000000000000 RBX: 0000000000000004 RCX: 0000000000000004 [ 250.734137] RDX: ffffffff81fcba40 RSI: 0000000000000004 RDI: ffff88013178db00 [ 250.734151] RBP: ffff8801317b9aa8 R08: 0000000000989680 R09: ffffffff81fcba40 [ 250.734166] R10: ffffffff8104960a R11: 0000000000000000 R12: ffff8801406936c0 [ 250.734178] R13: 0000000000000004 R14: ffff88013178daf0 R15: ffff88013178db00 [ 250.734196] FS: 00007fb82ee14700(0000) GS:ffff880140680000(0000) knlGS:0000000000000000 [ 250.734202] CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b[ 250.734209] CR2: fffffffffffffff8 CR3: 0000000001e0c000 CR4: 0000000000002660 [ 250.734222] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 250.734235] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 250.734249] Process kworker/4:0 (pid: 23, threadinfo ffff8801317b8000, task ffff88013178db00) [ 250.734266] Stack:[ 250.734271] ffffffff810698e0 ffff8801317b9aa8 ffff88013178ded8 ffff8801317b9b18 [ 250.734292] ffffffff8184ae02 ffff8801317b9fd8 ffff88013178db00 ffff8801317b9fd8 [ 250.734313] ffff8801317b9fd8 ffff8801334796c0 ffff88013178db00 ffff8801317b9ae8 [ 250.734979] Call Trace: [ 250.735572] [<ffffffff810698e0>] ? wq_worker_sleeping+0x10/0xa0 [ 250.736179] [<ffffffff8184ae02>] __schedule+0x592/0x7d0 [ 250.736783] [<ffffffff8184b164>] schedule+0x24/0x70 [ 250.737373] [<ffffffff81051592>] do_exit+0x5b2/0x910 [ 250.737937] [<ffffffff8183ea1e>] ? printk+0x48/0x4a [ 250.738498] [<ffffffff8100ace2>] ? check_events+0x12/0x20 [ 250.739053] [<ffffffff81017581>] oops_end+0x71/0xa0 [ 250.739596] [<ffffffff810176f3>] die+0x53/0x80 [ 250.740134] [<ffffffff810143f8>] do_trap+0xb8/0x160 [ 250.740668] [<ffffffff810146f3>] do_invalid_op+0xa3/0xb0 [ 250.741203] [<ffffffff81447f95>] ? balloon_process+0x385/0x3a0 [ 250.741737] [<ffffffff81085f52>] ? load_balance+0xd2/0x800 [ 250.742267] [<ffffffff81006276>] ? xen_flush_tlb+0xd6/0x2a0 [ 250.742803] [<ffffffff8108117d>] ? cpuacct_charge+0x6d/0xb0 [ 250.743332] [<ffffffff8184e25b>] invalid_op+0x1b/0x20 [ 250.743855] [<ffffffff81447f95>] ? balloon_process+0x385/0x3a0 [ 250.744374] [<ffffffff8106753b>] process_one_work+0x12b/0x450 [ 250.744897] [<ffffffff81447c10>] ? decrease_reservation+0x320/0x320 [ 250.745426] [<ffffffff810688be>] worker_thread+0x12e/0x2d0[ 250.745942] [<ffffffff81068790>] ? manage_workers.isra.26+0x1f0/0x1f0 [ 250.746457] [<ffffffff8106db7e>] kthread+0x8e/0xa0 [ 250.746969] [<ffffffff8184e3e4>] kernel_thread_helper+0x4/0x10 [ 250.747480] [<ffffffff8184c87c>] ? retint_restore_args+0x5/0x6 [ 250.747990] [<ffffffff8184e3e0>] ? gs_change+0x13/0x13[ 250.748487] Code: e0 ff ff 01 48 8b 80 38 e0 ff ff a8 08 0f 84 3d ff ff ff e8 57 d0 7d 00 e9 33 ff ff ff 66 90 48 8b 87 80 03 00 00 55 48 89 e5 5d <48> 8b 40 f8 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 [ 250.749575] RIP [<ffffffff8106e08c>] kthread_data+0xc/0x20 [ 250.750103] RSP <ffff8801317b9a90> [ 250.750627] CR2: fffffffffffffff8 [ 250.751151] ---[ end trace a5e5187e8ed6c200 ]--- [ 250.751152] Fixing recursive fault but reboot is needed![ 311.042233] INFO: rcu_preempt detected stalls on CPUs/tasks: { 4} (detected by 7, t=60011 jiffies) [ 311.042237] INFO: Stall ended before state dump start[ 491.279642] INFO: rcu_preempt detected stalls on CPUs/tasks: { 4} (detected by 7, t=240249 jiffies) [ 491.279646] INFO: Stall ended before state dump start[ 671.670546] INFO: rcu_preempt detected stalls on CPUs/tasks: { 4} (detected by 7, t=420638 jiffies) [ 671.670550] INFO: Stall ended before state dump start[ 763.240862] INFO: rcu_bh detected stalls on CPUs/tasks: { 1 4} (detected by 5, t=63547 jiffies) [ 763.240867] INFO: Stall ended before state dump start[ 853.438186] INFO: rcu_preempt detected stalls on CPUs/tasks: { 4} (detected by 7, t=602410 jiffies) [ 853.438190] INFO: Stall ended before state dump start[ 943.632087] INFO: rcu_bh detected stalls on CPUs/tasks: { 1 4} (detected by 0, t=243935 jiffies) [ 943.632092] INFO: Stall ended before state dump start[ 1033.828726] INFO: rcu_preempt detected stalls on CPUs/tasks: { 4} (detected by 7, t=782798 jiffies) [ 1033.828729] INFO: Stall ended before state dump start Now Dom0 still reacts, but mostly unusable sluggish... Am 05.09.2012 20:54, schrieb Konrad Rzeszutek Wilk: > > > And its due to a patch I added in v3.4 > > > (cd9db80e5257682a7f7ab245a2459648b3c8d268)> > > - which did not work properly in v3.4, but with v3.5 got it working > > > (977f857ca566a1e68045fcbb7cfc9c4acb077cf0) which causes v3.5 to> now > > > work > > > anymore. > > > > > > Anyhow, for right now jsut revert > > > cd9db80e5257682a7f7ab245a2459648b3c8d268 > > > and it should work for you. > > > Confirmed, after reverting that commit, VT-d will work fine. Will you fix this and push it to upstream Linux, Konrad? > > Also, our team reported a VT-d bug 2 months ago. > > http://bugzilla.xen.org/bugzilla/show_bug.cgi?id=1824 >Can either one of you please test this patch, please: diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 097e536..425bd0b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -4,6 +4,8 @@ * Ryan Wilson <hap9@xxxxxxxxxxxxxx> * Chris Bookholt <hap10@xxxxxxxxxxxxxx> */ +#define DEBUG 1 + #include <linux/module.h> #include <linux/init.h> #include <linux/rwsem.h>@@ -97,13 +99,15 @@ static void pcistub_device_release(struct kref *kref)/* Call the reset function which does not take lock as this * is called from "unbind" which takes a device_lock mutex. */ + dev_dbg(&psdev->dev->dev, "FLR locked..\n"); __pci_reset_function_locked(psdev->dev); if (pci_load_and_free_saved_state(psdev->dev, &dev_data->pci_saved_state)) { dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n"); - } else + } else { + dev_dbg(&psdev->dev->dev, "Reloading PCI state..\n"); pci_restore_state(psdev->dev); - + } /* Disable the device */ xen_pcibk_reset_device(psdev->dev); @@ -353,16 +357,16 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) if (err) goto config_release; - dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); - __pci_reset_function_locked(dev); - /* We need the device active to save the state. */ dev_dbg(&dev->dev, "save state of device\n"); pci_save_state(dev); dev_data->pci_saved_state = pci_store_saved_state(dev); if (!dev_data->pci_saved_state) dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); - + else { + dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); + __pci_reset_function_locked(dev); + } /* Now disable the device (this also ensures some private device * data is setup before we export) */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |