[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 2/2] VT-d: Fix vt-d flush timeout issue.



If IOTLB/Context/IETC flush is timeout, we should think
all devices under this IOMMU cannot function correctly.
So for each device under this IOMMU we'll mark it as
unassignable and kill the domain owning the device.

If Device-TLB flush is timeout, we'll mark the target
ATS device as unassignable and kill the domain owning
this device.

If impacted domain is hardware domain, just throw out
a warning. It's an open here whether we want to kill
hardware domain (or directly panic hypervisor). Comments
are welcomed.

Device marked as unassignable will be disallowed to be
further assigned to any domain.

Signed-off-by: Quan Xu <quan.xu@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/extern.h  |  5 +++
 xen/drivers/passthrough/vtd/iommu.c   |  6 +++
 xen/drivers/passthrough/vtd/qinval.c  | 78 ++++++++++++++++++++++++++++++++++-
 xen/drivers/passthrough/vtd/x86/ats.c | 16 +++++++
 xen/include/xen/pci.h                 | 11 +++++
 5 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/extern.h 
b/xen/drivers/passthrough/vtd/extern.h
index 8acf889..96c1a28 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -62,6 +62,11 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 int qinval_device_iotlb(struct iommu *iommu,
                         u32 max_invs_pend, u16 sid, u16 size, u64 addr);
 
+void invalidate_timeout(struct iommu *iommu);
+void device_tlb_invalidate_timeout(struct iommu *iommu, u16 did,
+                                   u16 seg, u8 bus, u8 devfn);
+int invalidate_sync(struct iommu *iommu);
+
 unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
 void flush_all_cache(void);
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index dd13865..f9a5d66 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1890,6 +1890,9 @@ static int intel_iommu_add_device(u8 devfn, struct 
pci_dev *pdev)
     if ( !pdev->domain )
         return -EINVAL;
 
+    if ( is_pdev_unassignable(pdev) )
+        return -EACCES;
+
     ret = domain_context_mapping(pdev->domain, devfn, pdev);
     if ( ret )
     {
@@ -2301,6 +2304,9 @@ static int intel_iommu_assign_device(
     if ( list_empty(&acpi_drhd_units) )
         return -ENODEV;
 
+    if ( is_pdev_unassignable(pdev) )
+        return -EACCES;
+
     seg = pdev->seg;
     bus = pdev->bus;
     /*
diff --git a/xen/drivers/passthrough/vtd/qinval.c 
b/xen/drivers/passthrough/vtd/qinval.c
index 990baf2..c514f99 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,12 +27,58 @@
 #include "dmar.h"
 #include "vtd.h"
 #include "extern.h"
+#include "../ats.h"
 
 static int __read_mostly iommu_qi_timeout_ms = 1;
 integer_param("iommu_qi_timeout_ms", iommu_qi_timeout_ms);
 
 #define IOMMU_QI_TIMEOUT (iommu_qi_timeout_ms * MILLISECS(1))
 
+void invalidate_timeout(struct iommu *iommu)
+{
+    struct domain *d;
+    unsigned long nr_dom, i;
+    struct pci_dev *pdev;
+
+    nr_dom = cap_ndoms(iommu->cap);
+    i = find_first_bit(iommu->domid_bitmap, nr_dom);
+    while ( i < nr_dom ) {
+        d = rcu_lock_domain_by_id(iommu->domid_map[i]);
+        ASSERT(d);
+
+        /* Mark the devices as unassignable. */
+        for_each_pdev(d, pdev)
+            mark_pdev_unassignable(pdev);
+        if ( !is_hardware_domain(d) )
+            domain_kill(d);
+
+        rcu_unlock_domain(d);
+        i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
+    }
+}
+
+void device_tlb_invalidate_timeout(struct iommu *iommu, u16 did,
+                                   u16 seg, u8 bus, u8 devfn)
+{
+    struct domain *d;
+    struct pci_dev *pdev;
+
+        d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+        ASSERT(d);
+        for_each_pdev(d, pdev)
+            if ( (pdev->seg == seg) &&
+                 (pdev->bus == bus) &&
+                 (pdev->devfn == devfn) )
+            {
+                mark_pdev_unassignable(pdev);
+                break;
+            }
+
+        if ( !is_hardware_domain(d) )
+            domain_kill(d);
+        rcu_unlock_domain(d);
+}
+
 static void print_qi_regs(struct iommu *iommu)
 {
     u64 val;
@@ -187,7 +233,7 @@ static int queue_invalidate_wait(struct iommu *iommu,
     return -EOPNOTSUPP;
 }
 
-static int invalidate_sync(struct iommu *iommu)
+int invalidate_sync(struct iommu *iommu)
 {
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
@@ -262,6 +308,14 @@ static int __iommu_flush_iec(struct iommu *iommu, u8 
granu, u8 im, u16 iidx)
 
     queue_invalidate_iec(iommu, granu, im, iidx);
     ret = invalidate_sync(iommu);
+
+    if ( ret == -ETIMEDOUT )
+    {
+        invalidate_timeout(iommu);
+        dprintk(XENLOG_WARNING VTDPREFIX,
+                "IEC flush timeout.\n");
+        return ret;
+    }
     /*
      * reading vt-d architecture register will ensure
      * draining happens in implementation independent way.
@@ -308,6 +362,13 @@ static int flush_context_qi(
         queue_invalidate_context(iommu, did, sid, fm,
                                  type >> DMA_CCMD_INVL_GRANU_OFFSET);
         ret = invalidate_sync(iommu);
+        if ( ret == -ETIMEDOUT )
+        {
+            invalidate_timeout(iommu);
+            dprintk(XENLOG_WARNING  VTDPREFIX,
+                    "Context flush timeout.\n");
+            return ret;
+        }
     }
     return ret;
 }
@@ -349,9 +410,22 @@ static int flush_iotlb_qi(
         queue_invalidate_iotlb(iommu,
                                type >> DMA_TLB_FLUSH_GRANU_OFFSET, dr,
                                dw, did, size_order, 0, addr);
+
+        /*
+         * Synchronize with hardware for invalidation request descriptors
+         * submitted before Device-TLB invalidate descriptor.
+         */
+        rc = invalidate_sync(iommu);
+        if ( rc == -ETIMEDOUT )
+        {
+            invalidate_timeout(iommu);
+            dprintk(XENLOG_WARNING VTDPREFIX, "IOTLB flush timeout.\n");
+            return rc;
+        }
+
         if ( flush_dev_iotlb )
             ret = dev_invalidate_iotlb(iommu, did, addr, size_order, type);
-        rc = invalidate_sync(iommu);
+
         if ( !ret )
             ret = rc;
     }
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c 
b/xen/drivers/passthrough/vtd/x86/ats.c
index 7c797f6..973dde7 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -156,6 +156,22 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 
             rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
                                      sid, sbit, addr);
+
+            /*
+             * Synchronize with hardware for Device-TLB invalidate
+             * descriptor.
+             */
+            rc = invalidate_sync(iommu);
+
+            if ( rc == -ETIMEDOUT )
+            {
+                device_tlb_invalidate_timeout(iommu, did, pdev->seg, pdev->bus,
+                                              pdev->devfn);
+                dprintk(XENLOG_WARNING VTDPREFIX,
+                        "Device-TLB flush timeout.\n");
+                return rc;
+            }
+
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index a5aef55..21a0af6 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -41,6 +41,7 @@
 struct pci_dev_info {
     bool_t is_extfn;
     bool_t is_virtfn;
+    bool_t is_unassignable;
     struct {
         u8 bus;
         u8 devfn;
@@ -88,6 +89,16 @@ struct pci_dev {
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
 
+static inline void mark_pdev_unassignable(struct pci_dev *pdev)
+{
+    pdev->info.is_unassignable = 1;
+}
+
+static inline bool_t is_pdev_unassignable(const struct pci_dev *pdev)
+{
+    return pdev->info.is_unassignable;
+}
+
 /*
  * The pcidevs_lock protect alldevs_list, and the assignment for the 
  * devices, it also sync the access to the msi capability that is not
-- 
1.8.1.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.