[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] libxl: Wait until QEMU removed the device before tearing it down



When a device is hot-unplugged libxl sends QEMU a "device-del" message
(via QMP). This call returns after QEMU has initiated device removal by
sending an interrupt to the guest. At some point later QEMU is expected
to clean up after the device (such as unbind/unmap MSIs), which will
occur when the guest signals that the device has been ejected.

Before this happens, however, it is likely that libxl will try to unmap
the pirq itself (without first unbinding it) and then reset the device. The
unmapping will result in the hypervisor having to perform a forced unbind
and that, in turn, will cause any future binds (such as when we try to
hotplug the device back) to fail.

In addition, if the guest kernel had not had a chance to respond to QEMU's
interrupt before libxl resets the device it may fail to properly clean
up, e.g. with Linux igb driver:

[   26.976212] WARNING: CPU: 0 PID: 6 at lib/iomap.c:43 
bad_io_access+0x3d/0x40()
[   26.985855] Bad IO access at port 0x0 ()
[   26.990303] Modules linked in: xt_REDIRECT iptable_nat nf_conntrack_ipv4 
nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack scsi_transport_iscsi igb 
i2c_algo_bit ptp pps_core i2c_core hwmon dca floppy
[   27.016289] CPU: 0 PID: 6 Comm: kworker/u256:0 Not tainted 3.18.0-rc2 #66
[   27.025154] Hardware name: Xen HVM domU, BIOS 4.5-unstable 10/24/2014
[   27.033357] Workqueue: kacpi_hotplug acpi_hotplug_work_fn
[   27.040638]  0000000000000009 ffff88003c2f3b38 ffffffff8169dbd1 
0000000000000001
[   27.050884]  ffff88003c2f3b88 ffff88003c2f3b78 ffffffff8109149c 
ffff88003c2f3b88
[   27.060959]  ffff880039eb6000 ffff88003b8a8000 ffff880039eb6880 
ffff88003b8a8098
[   27.071173] Call Trace:
[   27.074525]  [<ffffffff8169dbd1>] dump_stack+0x46/0x58
[   27.081411]  [<ffffffff8109149c>] warn_slowpath_common+0x8c/0xc0
[   27.089290]  [<ffffffff81091586>] warn_slowpath_fmt+0x46/0x50
[   27.096756]  [<ffffffffa00461c4>] ? igb_reset_interrupt_capability+0x64/0x80 
[igb]
[   27.106651]  [<ffffffff8137bded>] bad_io_access+0x3d/0x40
[   27.113785]  [<ffffffff8137be1c>] pci_iounmap+0x2c/0x40
[   27.120619]  [<ffffffffa00489cf>] igb_remove+0xbf/0x160 [igb]
[   27.128161]  [<ffffffff813a0276>] pci_device_remove+0x46/0xc0
[   27.135895]  [<ffffffff81479f8f>] __device_release_driver+0x7f/0xf0
[   27.144748]  [<ffffffff8147a34c>] device_release_driver+0x2c/0x40
[   27.153123]  [<ffffffff81399b4c>] pci_stop_bus_device+0x9c/0xb0
[   27.160951]  [<ffffffff81399cc6>] pci_stop_and_remove_bus_device+0x16/0x30
[   27.170290]  [<ffffffff813b64a7>] disable_slot+0x57/0xb0
[   27.177610]  [<ffffffff813b6521>] acpiphp_disable_and_eject_slot+0x21/0x90
[   27.187063]  [<ffffffff813b7031>] acpiphp_hotplug_notify+0x141/0x220
[   27.195588]  [<ffffffff813b6ef0>] ? acpiphp_post_dock_fixup+0xd0/0xd0
[   27.204205]  [<ffffffff813deb6d>] acpi_device_hotplug+0x3a1/0x3f0
[   27.212677]  [<ffffffff813d8871>] acpi_hotplug_work_fn+0x1f/0x2b
[   27.220853]  [<ffffffff810a8b8c>] process_one_work+0x15c/0x410
[   27.228802]  [<ffffffff810a8f9d>] worker_thread+0x11d/0x520
[   27.238591]  [<ffffffff810a8e80>] ? process_scheduled_works+0x40/0x40
[   27.251911]  [<ffffffff810aed49>] kthread+0xc9/0xe0
[   27.262033]  [<ffffffff810aec80>] ? flush_kthread_worker+0x70/0x70
[   27.274774]  [<ffffffff816a62bc>] ret_from_fork+0x7c/0xb0
[   27.285985]  [<ffffffff810aec80>] ? flush_kthread_worker+0x70/0x70
[   27.298839] ---[ end trace e8838cc146d5f64c ]---

To avoid this we should wait until QEMU has completed device teardown (which, 
at least
for Linux, happens after the guest is done with its own cleanup).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
 tools/libxl/libxl_internal.h |    1 +
 tools/libxl/libxl_pci.c      |   22 ++++++++++++++
 tools/libxl/libxl_qmp.c      |   64 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 0 deletions(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index f61673c..139da3a 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1589,6 +1589,7 @@ _hidden libxl__qmp_handler 
*libxl__qmp_initialize(libxl__gc *gc,
                                                   uint32_t domid);
 /* ask to QEMU the serial port information and store it in xenstore. */
 _hidden int libxl__qmp_query_serial(libxl__qmp_handler *qmp);
+_hidden int libxl__qmp_pci_test(libxl__gc *gc, int d, libxl_device_pci 
*pcidev);
 _hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev);
 _hidden int libxl__qmp_pci_del(libxl__gc *gc, int domid,
                                libxl_device_pci *pcidev);
diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
index 9f40100..4c6a733 100644
--- a/tools/libxl/libxl_pci.c
+++ b/tools/libxl/libxl_pci.c
@@ -1246,6 +1246,28 @@ static int do_pci_remove(libxl__gc *gc, uint32_t domid,
             break;
         case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
             rc = libxl__qmp_pci_del(gc, domid, pcidev);
+            if (rc < 0)
+                LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "libxl__qmp_pci_del");
+            else {
+                unsigned total_us = 0, wait_us = 100000;
+
+                rc = -ETIMEDOUT;
+                /* Wait (at most ~6 seconds) for device to disappear */
+                do {
+                    int err = libxl__qmp_pci_test(gc, domid, pcidev);
+                    if (err) {
+                        if (err == -ENODEV)
+                            rc = 0;
+                        else
+                            rc = err;
+                        break;
+                    }
+                    usleep(wait_us);
+                    total_us += wait_us;
+                    wait_us *= 2;
+                } while (total_us < 5000000);
+            }
+
             break;
         default:
             rc = ERROR_INVAL;
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index c7324e6..3235c83 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -739,6 +739,41 @@ static int qmp_query_vnc(libxl__qmp_handler *qmp)
                                 NULL, qmp->timeout);
 }
 
+static int pci_exists_callback(libxl__qmp_handler *qmp,
+                            const libxl__json_object *response, void *opaque)
+{
+    libxl_device_pci *pcidev = opaque;
+    const libxl__json_object *bus;
+    GC_INIT(qmp->ctx);
+    int i, j, rc = -ENODEV;
+    char *asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
+                               pcidev->bus, pcidev->dev, pcidev->func);
+
+    for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
+        const libxl__json_object *devices = NULL;
+        const libxl__json_object *device = NULL;
+        const libxl__json_object *o = NULL;
+        const char *id = NULL;
+
+        devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
+
+        for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
+             o = libxl__json_map_get("qdev_id", device, JSON_STRING);
+             id = libxl__json_object_get_string(o);
+
+             if (id && strcmp(asked_id, id) == 0) {
+                 rc = 0;
+                 goto out;
+             }
+        }
+    }
+
+out:
+    GC_FREE;
+    return rc;
+
+}
+
 static int pci_add_callback(libxl__qmp_handler *qmp,
                             const libxl__json_object *response, void *opaque)
 {
@@ -804,6 +839,35 @@ static int qmp_run_command(libxl__gc *gc, int domid,
     return rc;
 }
 
+int libxl__qmp_pci_test(libxl__gc *gc, int domid, libxl_device_pci *pcidev)
+{
+    libxl__qmp_handler *qmp = NULL;
+    libxl__json_object *args = NULL;
+    char *hostaddr = NULL;
+    int rc = 0;
+
+    qmp = libxl__qmp_initialize(gc, domid);
+    if (!qmp)
+        return -1;
+
+    hostaddr = GCSPRINTF("%04x:%02x:%02x.%01x", pcidev->domain,
+                         pcidev->bus, pcidev->dev, pcidev->func);
+    if (!hostaddr)
+        return -1;
+
+    qmp_parameters_add_string(gc, &args, "driver", "xen-pci-passthrough");
+    QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
+                           pcidev->bus, pcidev->dev, pcidev->func);
+    qmp_parameters_add_string(gc, &args, "hostaddr", hostaddr);
+
+    rc = qmp_synchronous_send(qmp, "query-pci", NULL,
+                             pci_exists_callback, pcidev, qmp->timeout);
+
+    libxl__qmp_close(qmp);
+    return rc;
+
+}
+
 int libxl__qmp_pci_add(libxl__gc *gc, int domid, libxl_device_pci *pcidev)
 {
     libxl__qmp_handler *qmp = NULL;
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.