[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Re-enable MSI support



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1228996099 0
# Node ID 2941b1a97c6015aa5618373e4b280dfc88c16784
# Parent  c15244125a693d2a1ae5e5745a649467394d8dac
Re-enable MSI support

Currently the MSI is disabled because of some lock issue. This patch
tries to clean up the locking related to MSI lock.

Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
---
 xen/arch/x86/domctl.c                       |   17 --
 xen/arch/x86/irq.c                          |   59 +++++---
 xen/arch/x86/msi.c                          |  175 ++++++++++-------------
 xen/arch/x86/physdev.c                      |    6 
 xen/arch/x86/x86_64/asm-offsets.c           |    2 
 xen/common/domain.c                         |    8 -
 xen/drivers/passthrough/amd/pci_amd_iommu.c |   16 +-
 xen/drivers/passthrough/iommu.c             |   58 +++++--
 xen/drivers/passthrough/pci.c               |   73 ++++-----
 xen/drivers/passthrough/vtd/iommu.c         |  206 ++++++++++++++++------------
 xen/include/asm-x86/msi.h                   |    8 -
 xen/include/xen/iommu.h                     |    2 
 xen/include/xen/pci.h                       |    3 
 13 files changed, 337 insertions(+), 296 deletions(-)

diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/domctl.c     Thu Dec 11 11:48:19 2008 +0000
@@ -665,14 +665,6 @@ long arch_do_domctl(
         }
 
         ret = -EINVAL;
-        if ( device_assigned(bus, devfn) )
-        {
-            gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
-                     "%x:%x:%x already assigned, or non-existent\n",
-                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-            put_domain(d);
-            break;
-        }
 
         ret = assign_device(d, bus, devfn);
         if ( ret )
@@ -715,15 +707,8 @@ long arch_do_domctl(
             put_domain(d);
             break;
         }
-
-        if ( !device_assigned(bus, devfn) )
-        {
-            put_domain(d);
-            break;
-        }
-
         ret = 0;
-        deassign_device(d, bus, devfn);
+        ret = deassign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/irq.c        Thu Dec 11 11:48:19 2008 +0000
@@ -847,12 +847,11 @@ int map_domain_pirq(
     int old_vector, old_pirq;
     irq_desc_t *desc;
     unsigned long flags;
-
+    struct msi_desc *msi_desc;
+    struct pci_dev *pdev = NULL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
-
-    /* XXX Until pcidev and msi locking is fixed. */
-    if ( type == MAP_PIRQ_TYPE_MSI )
-        return -EINVAL;
 
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
@@ -884,25 +883,35 @@ int map_domain_pirq(
     }
 
     desc = &irq_desc[vector];
-    spin_lock_irqsave(&desc->lock, flags);
 
     if ( type == MAP_PIRQ_TYPE_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
+
+        pdev = pci_get_pdev(msi->bus, msi->devfn);
+        ret = pci_enable_msi(msi, &msi_desc);
+        if ( ret )
+            goto done;
+
+        spin_lock_irqsave(&desc->lock, flags);
+
         if ( desc->handler != &no_irq_type )
             dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
-                    d->domain_id, vector);
+              d->domain_id, vector);
         desc->handler = &pci_msi_type;
-        ret = pci_enable_msi(msi);
-        if ( ret )
-            goto done;
-    }
-
-    d->arch.pirq_vector[pirq] = vector;
-    d->arch.vector_pirq[vector] = pirq;
+        d->arch.pirq_vector[pirq] = vector;
+        d->arch.vector_pirq[vector] = pirq;
+        setup_msi_irq(pdev, msi_desc);
+        spin_unlock_irqrestore(&desc->lock, flags);
+    } else
+    {
+        spin_lock_irqsave(&desc->lock, flags);
+        d->arch.pirq_vector[pirq] = vector;
+        d->arch.vector_pirq[vector] = pirq;
+        spin_unlock_irqrestore(&desc->lock, flags);
+    }
 
  done:
-    spin_unlock_irqrestore(&desc->lock, flags);
     return ret;
 }
 
@@ -913,6 +922,7 @@ int unmap_domain_pirq(struct domain *d, 
     irq_desc_t *desc;
     int vector, ret = 0;
     bool_t forced_unbind;
+    struct msi_desc *msi_desc = NULL;
 
     if ( (pirq < 0) || (pirq >= NR_IRQS) )
         return -EINVAL;
@@ -920,6 +930,7 @@ int unmap_domain_pirq(struct domain *d, 
     if ( !IS_PRIV(current->domain) )
         return -EINVAL;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
     vector = d->arch.pirq_vector[pirq];
@@ -937,18 +948,19 @@ int unmap_domain_pirq(struct domain *d, 
                 d->domain_id, pirq);
 
     desc = &irq_desc[vector];
+
+    if ( (msi_desc = desc->msi_desc) != NULL )
+        pci_disable_msi(msi_desc);
+
     spin_lock_irqsave(&desc->lock, flags);
 
     BUG_ON(vector != d->arch.pirq_vector[pirq]);
 
-    if ( desc->msi_desc )
-        pci_disable_msi(vector);
+    if ( msi_desc )
+        teardown_msi_vector(vector);
 
     if ( desc->handler == &pci_msi_type )
-    {
         desc->handler = &no_irq_type;
-        free_irq_vector(vector);
-    }
 
     if ( !forced_unbind )
     {
@@ -962,6 +974,11 @@ int unmap_domain_pirq(struct domain *d, 
     }
 
     spin_unlock_irqrestore(&desc->lock, flags);
+    if (msi_desc)
+    {
+        msi_free_vector(msi_desc);
+        free_irq_vector(vector);
+    }
 
     ret = irq_deny_access(d, pirq);
     if ( ret )
@@ -976,6 +993,7 @@ void free_domain_pirqs(struct domain *d)
 {
     int i;
 
+    read_lock(&pcidevs_lock);
     spin_lock(&d->event_lock);
 
     for ( i = 0; i < NR_IRQS; i++ )
@@ -983,6 +1001,7 @@ void free_domain_pirqs(struct domain *d)
             unmap_domain_pirq(d, i);
 
     spin_unlock(&d->event_lock);
+    read_unlock(&pcidevs_lock);
 }
 
 extern void dump_ioapic_irq_info(void);
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/msi.c        Thu Dec 11 11:48:19 2008 +0000
@@ -153,6 +153,8 @@ static int set_vector_msi(struct msi_des
 
 static int unset_vector_msi(int vector)
 {
+    ASSERT(spin_is_locked(&irq_desc[vector].lock));
+
     if ( vector >= NR_VECTORS )
     {
         dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
@@ -161,6 +163,7 @@ static int unset_vector_msi(int vector)
     }
 
     irq_desc[vector].msi_desc = NULL;
+
     return 0;
 }
 
@@ -228,14 +231,12 @@ void set_msi_affinity(unsigned int vecto
         return;
 
     ASSERT(spin_is_locked(&irq_desc[vector].lock));
-    spin_lock(&desc->dev->lock);
     read_msi_msg(desc, &msg);
 
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
     write_msi_msg(desc, &msg);
-    spin_unlock(&desc->dev->lock);
 }
 
 static void msi_set_enable(struct pci_dev *dev, int enable)
@@ -369,7 +370,7 @@ static struct msi_desc* alloc_msi_entry(
     return entry;
 }
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
     struct msi_msg msg;
 
@@ -380,19 +381,13 @@ static int setup_msi_irq(struct pci_dev 
     return 0;
 }
 
-static void teardown_msi_vector(int vector)
+void teardown_msi_vector(int vector)
 {
     unset_vector_msi(vector);
 }
 
-static void msi_free_vector(int vector)
-{
-    struct msi_desc *entry;
-
-    ASSERT(spin_is_locked(&irq_desc[vector].lock));
-    entry = irq_desc[vector].msi_desc;
-    teardown_msi_vector(vector);
-
+int msi_free_vector(struct msi_desc *entry)
+{
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
     {
         unsigned long start;
@@ -407,6 +402,7 @@ static void msi_free_vector(int vector)
     }
     list_del(&entry->list);
     xfree(entry);
+    return 0;
 }
 
 static struct msi_desc *find_msi_entry(struct pci_dev *dev,
@@ -433,15 +429,18 @@ static struct msi_desc *find_msi_entry(s
  * multiple messages. A return of zero indicates the successful setup
  * of an entry zero with the new MSI irq or non-zero for otherwise.
  **/
-static int msi_capability_init(struct pci_dev *dev, int vector)
+static int msi_capability_init(struct pci_dev *dev,
+                               int vector,
+                               struct msi_desc **desc)
 {
     struct msi_desc *entry;
-    int pos, ret;
+    int pos;
     u16 control;
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
     control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
     /* MSI Entry Initialization */
@@ -477,14 +476,7 @@ static int msi_capability_init(struct pc
     }
     list_add_tail(&entry->list, &dev->msi_list);
 
-    /* Configure MSI capability structure */
-    ret = setup_msi_irq(dev, entry);
-    if ( ret )
-    {
-        msi_free_vector(vector);
-        return ret;
-    }
-
+    *desc = entry;
     /* Restore the original MSI enabled bits  */
     pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
 
@@ -501,7 +493,9 @@ static int msi_capability_init(struct pc
  * single MSI-X irq. A return of zero indicates the successful setup of
  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
  **/
-static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
+static int msix_capability_init(struct pci_dev *dev,
+                                struct msi_info *msi,
+                                struct msi_desc **desc)
 {
     struct msi_desc *entry;
     int pos;
@@ -515,6 +509,9 @@ static int msix_capability_init(struct p
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    ASSERT(desc);
+
     pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
     control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
     msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
@@ -550,9 +547,13 @@ static int msix_capability_init(struct p
 
     list_add_tail(&entry->list, &dev->msi_list);
 
-    setup_msi_irq(dev, entry);
-
-    /* Set MSI-X enabled bits */
+    /* Mask interrupt here */
+    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
+                * PCI_MSIX_ENTRY_SIZE
+                + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+
+    *desc = entry;
+    /* Restore MSI-X enabled bits */
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
 
     return 0;
@@ -568,45 +569,35 @@ static int msix_capability_init(struct p
  * indicates the successful setup of an entry zero with the new MSI
  * irq or non-zero for otherwise.
  **/
-static int __pci_enable_msi(struct msi_info *msi)
+static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
     int status;
     struct pci_dev *pdev;
 
-    pdev = pci_lock_pdev(msi->bus, msi->devfn);
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev(msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
     if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
     {
-        spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
                 "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msi_capability_init(pdev, msi->vector);
-    spin_unlock(&pdev->lock);
+    status = msi_capability_init(pdev, msi->vector, desc);
     return status;
 }
 
-static void __pci_disable_msi(int vector)
-{
-    struct msi_desc *entry;
+static void __pci_disable_msi(struct msi_desc *entry)
+{
     struct pci_dev *dev;
     int pos;
     u16 control;
     u8 bus, slot, func;
 
-    entry = irq_desc[vector].msi_desc;
-    if ( !entry )
-        return;
-    /*
-     * Lock here is safe.  msi_desc can not be removed without holding
-     * both irq_desc[].lock (which we do) and pdev->lock.
-     */
-    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -618,10 +609,6 @@ static void __pci_disable_msi(int vector
 
     BUG_ON(list_empty(&dev->msi_list));
 
-    msi_free_vector(vector);
-
-    pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
-    spin_unlock(&dev->lock);
 }
 
 /**
@@ -639,7 +626,7 @@ static void __pci_disable_msi(int vector
  * of irqs available. Driver should use the returned value to re-send
  * its request.
  **/
-static int __pci_enable_msix(struct msi_info *msi)
+static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
 {
     int status, pos, nr_entries;
     struct pci_dev *pdev;
@@ -647,7 +634,8 @@ static int __pci_enable_msix(struct msi_
     u8 slot = PCI_SLOT(msi->devfn);
     u8 func = PCI_FUNC(msi->devfn);
 
-    pdev = pci_lock_pdev(msi->bus, msi->devfn);
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev(msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
@@ -655,41 +643,27 @@ static int __pci_enable_msix(struct msi_
     control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
     if (msi->entry_nr >= nr_entries)
-    {
-        spin_unlock(&pdev->lock);
         return -EINVAL;
-    }
 
     if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
     {
-        spin_unlock(&pdev->lock);
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
                 "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msix_capability_init(pdev, msi);
-    spin_unlock(&pdev->lock);
+    status = msix_capability_init(pdev, msi, desc);
     return status;
 }
 
-static void __pci_disable_msix(int vector)
-{
-    struct msi_desc *entry;
+static void __pci_disable_msix(struct msi_desc *entry)
+{
     struct pci_dev *dev;
     int pos;
     u16 control;
     u8 bus, slot, func;
 
-    entry = irq_desc[vector].msi_desc;
-    if ( !entry )
-        return;
-    /*
-     * Lock here is safe.  msi_desc can not be removed without holding
-     * both irq_desc[].lock (which we do) and pdev->lock.
-     */
-    spin_lock(&entry->dev->lock);
     dev = entry->dev;
     bus = dev->bus;
     slot = PCI_SLOT(dev->devfn);
@@ -701,50 +675,51 @@ static void __pci_disable_msix(int vecto
 
     BUG_ON(list_empty(&dev->msi_list));
 
-    msi_free_vector(vector);
+    writel(1, entry->mask_base + entry->msi_attrib.entry_nr
+      * PCI_MSIX_ENTRY_SIZE
+      + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 
     pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
-    spin_unlock(&dev->lock);
-}
-
-int pci_enable_msi(struct msi_info *msi)
-{
-    ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
-
-    return  msi->table_base ? __pci_enable_msix(msi) :
-        __pci_enable_msi(msi);
-}
-
-void pci_disable_msi(int vector)
-{
-    irq_desc_t *desc = &irq_desc[vector];
-    ASSERT(spin_is_locked(&desc->lock));
-    if ( !desc->msi_desc )
-        return;
-
-    if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
-        __pci_disable_msi(vector);
-    else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
-        __pci_disable_msix(vector);
+}
+
+/*
+ * Notice: only construct the msi_desc
+ * no change to irq_desc here, and the interrupt is masked
+ */
+int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
+{
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
+    return  msi->table_base ? __pci_enable_msix(msi, desc) :
+        __pci_enable_msi(msi, desc);
+}
+
+/*
+ * Device only, no irq_desc
+ */
+void pci_disable_msi(struct msi_desc *msi_desc)
+{
+    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        __pci_disable_msi(msi_desc);
+    else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
+        __pci_disable_msix(msi_desc);
 }
 
 static void msi_free_vectors(struct pci_dev* dev)
 {
     struct msi_desc *entry, *tmp;
     irq_desc_t *desc;
-    unsigned long flags;
-
- retry:
+    unsigned long flags, vector;
+
     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
     {
-        desc = &irq_desc[entry->vector];
-
-        local_irq_save(flags);
-        if ( !spin_trylock(&desc->lock) )
-        {
-            local_irq_restore(flags);
-            goto retry;
-        }
+        vector = entry->vector;
+        desc = &irq_desc[vector];
+        pci_disable_msi(entry);
+
+        spin_lock_irqsave(&desc->lock, flags);
+
+        teardown_msi_vector(vector);
 
         if ( desc->handler == &pci_msi_type )
         {
@@ -753,8 +728,8 @@ static void msi_free_vectors(struct pci_
             desc->handler = &no_irq_type;
         }
 
-        msi_free_vector(entry->vector);
         spin_unlock_irqrestore(&desc->lock, flags);
+        msi_free_vector(entry);
     }
 }
 
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/physdev.c    Thu Dec 11 11:48:19 2008 +0000
@@ -100,6 +100,7 @@ static int physdev_map_pirq(struct physd
             goto free_domain;
     }
 
+    read_lock(&pcidevs_lock);
     /* Verify or get pirq. */
     spin_lock(&d->event_lock);
     if ( map->pirq < 0 )
@@ -147,6 +148,7 @@ static int physdev_map_pirq(struct physd
 
 done:
     spin_unlock(&d->event_lock);
+    read_unlock(&pcidevs_lock);
     if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
         free_irq_vector(vector);
 free_domain:
@@ -170,9 +172,11 @@ static int physdev_unmap_pirq(struct phy
     if ( d == NULL )
         return -ESRCH;
 
+    read_lock(&pcidevs_lock);
     spin_lock(&d->event_lock);
     ret = unmap_domain_pirq(d, unmap->pirq);
     spin_unlock(&d->event_lock);
+    read_unlock(&pcidevs_lock);
 
     rcu_unlock_domain(d);
 
@@ -341,10 +345,12 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         irq_op.vector = assign_irq_vector(irq);
 
+        read_lock(&pcidevs_lock);
         spin_lock(&dom0->event_lock);
         ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
                               MAP_PIRQ_TYPE_GSI, NULL);
         spin_unlock(&dom0->event_lock);
+        read_unlock(&pcidevs_lock);
 
         if ( copy_to_guest(arg, &irq_op, 1) != 0 )
             ret = -EFAULT;
diff -r c15244125a69 -r 2941b1a97c60 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Dec 11 11:48:19 2008 +0000
@@ -60,6 +60,8 @@ void __dummy__(void)
     DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
     BLANK();
 
+    OFFSET(irq_caps_offset, struct domain, irq_caps);
+    OFFSET(next_in_list_offset, struct domain, next_in_list);
     OFFSET(VCPU_processor, struct vcpu, processor);
     OFFSET(VCPU_domain, struct vcpu, domain);
     OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
diff -r c15244125a69 -r 2941b1a97c60 xen/common/domain.c
--- a/xen/common/domain.c       Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/common/domain.c       Thu Dec 11 11:48:19 2008 +0000
@@ -558,11 +558,11 @@ static void complete_domain_destroy(stru
         sched_destroy_vcpu(v);
     }
 
+    grant_table_destroy(d);
+
+    arch_domain_destroy(d);
+
     rangeset_domain_destroy(d);
-
-    grant_table_destroy(d);
-
-    arch_domain_destroy(d);
 
     sched_destroy_domain(d);
 
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Dec 11 11:40:10 
2008 +0000
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Dec 11 11:48:19 
2008 +0000
@@ -282,9 +282,13 @@ static int reassign_device( struct domai
     struct amd_iommu *iommu;
     int bdf;
 
-    pdev = pci_lock_domain_pdev(source, bus, devfn);
+    read_lock(&pcidevs_lock);
+    pdev = pci_get_pdev_by_domain(source, bus, devfn);
     if ( !pdev )
-       return -ENODEV;
+    {
+        read_unlock(&pcidevs_lock);
+        return -ENODEV;
+    }
 
     bdf = (bus << 8) | devfn;
     /* supported device? */
@@ -293,8 +297,8 @@ static int reassign_device( struct domai
 
     if ( !iommu )
     {
-       spin_unlock(&pdev->lock);
-       amd_iov_error("Fail to find iommu."
+        read_unlock(&pcidevs_lock);
+        amd_iov_error("Fail to find iommu."
                      " %x:%x.%x cannot be assigned to domain %d\n", 
                      bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
        return -ENODEV;
@@ -302,9 +306,7 @@ static int reassign_device( struct domai
 
     amd_iommu_disable_domain_device(source, iommu, bdf);
 
-    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
-    write_unlock(&pcidevs_lock);
     pdev->domain = target;
 
     amd_iommu_setup_domain_device(target, iommu, bdf);
@@ -312,7 +314,7 @@ static int reassign_device( struct domai
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
-    spin_unlock(&pdev->lock);
+    read_unlock(&pcidevs_lock);
     return 0;
 }
 
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/drivers/passthrough/iommu.c   Thu Dec 11 11:48:19 2008 +0000
@@ -83,8 +83,11 @@ int iommu_add_device(struct pci_dev *pde
 int iommu_add_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+
     if ( !pdev->domain )
         return -EINVAL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     hd = domain_hvm_iommu(pdev->domain);
     if ( !iommu_enabled || !hd->platform_ops )
@@ -109,20 +112,24 @@ int assign_device(struct domain *d, u8 b
 int assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
-    int rc;
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
+    int rc = 0;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    read_lock(&pcidevs_lock);
     if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
-        return rc;
+        goto done;
 
     if ( has_arch_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) )
     {
         d->need_iommu = 1;
-        return iommu_populate_page_table(d);
-    }
-    return 0;
+        rc = iommu_populate_page_table(d);
+        goto done;
+    }
+done:    
+    read_unlock(&pcidevs_lock);
+    return rc;
 }
 
 static int iommu_populate_page_table(struct domain *d)
@@ -204,12 +211,29 @@ int iommu_unmap_page(struct domain *d, u
     return hd->platform_ops->unmap_page(d, gfn);
 }
 
-void deassign_device(struct domain *d, u8 bus, u8 devfn)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return;
+int  deassign_device(struct domain *d, u8 bus, u8 devfn)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev = NULL;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return -EINVAL;
+
+    read_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(bus, devfn);
+    if (!pdev)
+    {
+        read_unlock(&pcidevs_lock);
+        return -ENODEV;
+    }
+
+    if (pdev->domain != d)
+    {
+        read_unlock(&pcidevs_lock);
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                "IOMMU: deassign a device not owned\n");
+       return -EINVAL;
+    }
 
     hd->platform_ops->reassign_device(d, dom0, bus, devfn);
 
@@ -218,6 +242,10 @@ void deassign_device(struct domain *d, u
         d->need_iommu = 0;
         hd->platform_ops->teardown(d);
     }
+
+    read_unlock(&pcidevs_lock);
+
+    return 0;
 }
 
 static int iommu_setup(void)
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/drivers/passthrough/pci.c     Thu Dec 11 11:48:19 2008 +0000
@@ -41,11 +41,11 @@ struct pci_dev *alloc_pdev(u8 bus, u8 de
     pdev = xmalloc(struct pci_dev);
     if ( !pdev )
         return NULL;
+    memset(pdev, 0, sizeof(struct pci_dev));
 
     *((u8*) &pdev->bus) = bus;
     *((u8*) &pdev->devfn) = devfn;
     pdev->domain = NULL;
-    spin_lock_init(&pdev->lock);
     INIT_LIST_HEAD(&pdev->msi_list);
     list_add(&pdev->alldevs_list, &alldevs_list);
 
@@ -58,42 +58,35 @@ void free_pdev(struct pci_dev *pdev)
     xfree(pdev);
 }
 
-struct pci_dev *pci_lock_pdev(int bus, int devfn)
-{
-    struct pci_dev *pdev;
-
-    read_lock(&pcidevs_lock);
+struct pci_dev *pci_get_pdev(int bus, int devfn)
+{
+    struct pci_dev *pdev = NULL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
     list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
         if ( (pdev->bus == bus || bus == -1) &&
              (pdev->devfn == devfn || devfn == -1) )
-    {
-        spin_lock(&pdev->lock);
-        read_unlock(&pcidevs_lock);
-        return pdev;
-    }
-    read_unlock(&pcidevs_lock);
+        {
+            return pdev;
+        }
 
     return NULL;
 }
 
-struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
-{
-    struct pci_dev *pdev;
-
-    read_lock(&pcidevs_lock);
-    list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
-    {
-        spin_lock(&pdev->lock);
-        if ( (pdev->bus == bus || bus == -1) &&
-             (pdev->devfn == devfn || devfn == -1) &&
-             (pdev->domain == d) )
-        {
-            read_unlock(&pcidevs_lock);
-            return pdev;
-        }
-        spin_unlock(&pdev->lock);
-    }
-    read_unlock(&pcidevs_lock);
+struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn)
+{
+    struct pci_dev *pdev = NULL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
+    list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+         if ( (pdev->bus == bus || bus == -1) &&
+              (pdev->devfn == devfn || devfn == -1) &&
+              (pdev->domain == d) )
+         {
+             return pdev;
+         }
 
     return NULL;
 }
@@ -109,24 +102,20 @@ int pci_add_device(u8 bus, u8 devfn)
         goto out;
 
     ret = 0;
-    spin_lock(&pdev->lock);
     if ( !pdev->domain )
     {
         pdev->domain = dom0;
         ret = iommu_add_device(pdev);
         if ( ret )
-        {
-            spin_unlock(&pdev->lock);
             goto out;
-        }
+
         list_add(&pdev->domain_list, &dom0->arch.pdev_list);
     }
-    spin_unlock(&pdev->lock);
+
+out:
+    write_unlock(&pcidevs_lock);
     printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
            PCI_SLOT(devfn), PCI_FUNC(devfn));
-
-out:
-    write_unlock(&pcidevs_lock);
     return ret;
 }
 
@@ -139,7 +128,6 @@ int pci_remove_device(u8 bus, u8 devfn)
     list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
         if ( pdev->bus == bus && pdev->devfn == devfn )
         {
-            spin_lock(&pdev->lock);
             ret = iommu_remove_device(pdev);
             if ( pdev->domain )
                 list_del(&pdev->domain_list);
@@ -199,14 +187,15 @@ void pci_release_devices(struct domain *
     struct pci_dev *pdev;
     u8 bus, devfn;
 
+    read_lock(&pcidevs_lock);
     pci_clean_dpci_irqs(d);
-    while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+    while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) )
     {
         pci_cleanup_msi(pdev);
         bus = pdev->bus; devfn = pdev->devfn;
-        spin_unlock(&pdev->lock);
         deassign_device(d, bus, devfn);
     }
+    read_unlock(&pcidevs_lock);
 }
 
 #ifdef SUPPORT_MSI_REMAPPING
@@ -220,14 +209,12 @@ static void dump_pci_devices(unsigned ch
 
     list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
     {
-        spin_lock(&pdev->lock);
         printk("%02x:%02x.%x - dom %-3d - MSIs < ",
                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
                pdev->domain ? pdev->domain->domain_id : -1);
         list_for_each_entry ( msi, &pdev->msi_list, list )
                printk("%d ", msi->vector);
         printk(">\n");
-        spin_unlock(&pdev->lock);
     }
 
     read_unlock(&pcidevs_lock);
diff -r c15244125a69 -r 2941b1a97c60 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Dec 11 11:48:19 2008 +0000
@@ -49,15 +49,14 @@ static void context_set_domain_id(struct
 static void context_set_domain_id(struct context_entry *context,
                                   struct domain *d)
 {
-    unsigned long flags;
     domid_t iommu_domid = domain_iommu_domid(d);
 
     if ( iommu_domid == 0 )
     {
-        spin_lock_irqsave(&domid_bitmap_lock, flags);
+        spin_lock(&domid_bitmap_lock);
         iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
         set_bit(iommu_domid, domid_bitmap);
-        spin_unlock_irqrestore(&domid_bitmap_lock, flags);
+        spin_unlock(&domid_bitmap_lock);
         d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
     }
 
@@ -140,10 +139,9 @@ static u64 bus_to_context_maddr(struct i
 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
 {
     struct root_entry *root, *root_entries;
-    unsigned long flags;
     u64 maddr;
 
-    spin_lock_irqsave(&iommu->lock, flags);
+    ASSERT(spin_is_locked(&iommu->lock));
     root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
     root = &root_entries[bus];
     if ( !root_present(*root) )
@@ -152,7 +150,6 @@ static u64 bus_to_context_maddr(struct i
         if ( maddr == 0 )
         {
             unmap_vtd_domain_page(root_entries);
-            spin_unlock_irqrestore(&iommu->lock, flags);
             return 0;
         }
         set_root_value(*root, maddr);
@@ -161,34 +158,7 @@ static u64 bus_to_context_maddr(struct i
     }
     maddr = (u64) get_context_addr(*root);
     unmap_vtd_domain_page(root_entries);
-    spin_unlock_irqrestore(&iommu->lock, flags);
     return maddr;
-}
-
-static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
-{
-    struct root_entry *root, *root_entries;
-    struct context_entry *context;
-    u64 context_maddr;
-    int ret;
-    unsigned long flags;
-
-    spin_lock_irqsave(&iommu->lock, flags);
-    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
-    root = &root_entries[bus];
-    if ( !root_present(*root) )
-    {
-        ret = 0;
-        goto out;
-    }
-    context_maddr = get_context_addr(*root);
-    context = (struct context_entry *)map_vtd_domain_page(context_maddr);
-    ret = context_present(context[devfn]);
-    unmap_vtd_domain_page(context);
- out:
-    unmap_vtd_domain_page(root_entries);
-    spin_unlock_irqrestore(&iommu->lock, flags);
-    return ret;
 }
 
 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
@@ -198,12 +168,11 @@ static u64 addr_to_dma_page_maddr(struct
     struct dma_pte *parent, *pte = NULL;
     int level = agaw_to_level(hd->agaw);
     int offset;
-    unsigned long flags;
     u64 pte_maddr = 0, maddr;
     u64 *vaddr = NULL;
 
     addr &= (((u64)1) << addr_width) - 1;
-    spin_lock_irqsave(&hd->mapping_lock, flags);
+    ASSERT(spin_is_locked(&hd->mapping_lock));
     if ( hd->pgd_maddr == 0 )
         if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
             goto out;
@@ -252,7 +221,6 @@ static u64 addr_to_dma_page_maddr(struct
 
     unmap_vtd_domain_page(parent);
  out:
-    spin_unlock_irqrestore(&hd->mapping_lock, flags);
     return pte_maddr;
 }
 
@@ -536,22 +504,30 @@ static void dma_pte_clear_one(struct dom
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
 
+    spin_lock(&hd->mapping_lock);
     /* get last level pte */
     pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
     if ( pg_maddr == 0 )
+    {
+        spin_unlock(&hd->mapping_lock);
         return;
+    }
+
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + address_level_offset(addr, 1);
 
     if ( !dma_pte_present(*pte) )
     {
+        spin_unlock(&hd->mapping_lock);
         unmap_vtd_domain_page(page);
         return;
     }
 
     dma_clear_pte(*pte); 
+    spin_unlock(&hd->mapping_lock);
     iommu_flush_cache_entry(pte);
 
+    /* No need pcidevs_lock here since do that on assign/deassign device*/
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
@@ -598,16 +574,18 @@ static int iommu_set_root_entry(struct i
     unsigned long flags;
     s_time_t start_time;
 
-    spin_lock_irqsave(&iommu->register_lock, flags);
+    spin_lock(&iommu->lock);
 
     if ( iommu->root_maddr == 0 )
         iommu->root_maddr = alloc_pgtable_maddr(NULL);
     if ( iommu->root_maddr == 0 )
     {
-        spin_unlock_irqrestore(&iommu->register_lock, flags);
+        spin_unlock(&iommu->lock);
         return -ENOMEM;
     }
 
+    spin_unlock(&iommu->lock);
+    spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
     cmd = iommu->gcmd | DMA_GCMD_SRTP;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
@@ -742,9 +720,7 @@ static void iommu_page_fault(int vector,
     dprintk(XENLOG_WARNING VTDPREFIX,
             "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
 
-    spin_lock_irqsave(&iommu->register_lock, flags);
     fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
-    spin_unlock_irqrestore(&iommu->register_lock, flags);
 
     iommu_fault_status(fault_status);
 
@@ -1057,21 +1033,30 @@ static int domain_context_mapping_one(
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct context_entry *context, *context_entries;
-    unsigned long flags;
     u64 maddr, pgd_maddr;
+    struct pci_dev *pdev = NULL;
     int agaw;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    spin_lock(&iommu->lock);
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
 
     if ( context_present(*context) )
     {
+        int res = 0;
+
+        pdev = pci_get_pdev(bus, devfn);
+        if (!pdev)
+            res = -ENODEV;
+        else if (pdev->domain != domain)
+            res = -EINVAL;
         unmap_vtd_domain_page(context_entries);
-        return 0;
-    }
-
-    spin_lock_irqsave(&iommu->lock, flags);
+        spin_unlock(&iommu->lock);
+        return res;
+    }
+
     if ( iommu_passthrough &&
          ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
     {
@@ -1080,6 +1065,8 @@ static int domain_context_mapping_one(
     }
     else
     {
+        spin_lock(&hd->mapping_lock);
+
         /* Ensure we have pagetables allocated down to leaf PTE. */
         if ( hd->pgd_maddr == 0 )
         {
@@ -1087,8 +1074,9 @@ static int domain_context_mapping_one(
             if ( hd->pgd_maddr == 0 )
             {
             nomem:
+                spin_unlock(&hd->mapping_lock);
+                spin_unlock(&iommu->lock);
                 unmap_vtd_domain_page(context_entries);
-                spin_unlock_irqrestore(&iommu->lock, flags);
                 return -ENOMEM;
             }
         }
@@ -1108,6 +1096,7 @@ static int domain_context_mapping_one(
 
         context_set_address_root(*context, pgd_maddr);
         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+        spin_unlock(&hd->mapping_lock);
     }
 
     /*
@@ -1119,8 +1108,7 @@ static int domain_context_mapping_one(
     context_set_fault_enable(*context);
     context_set_present(*context);
     iommu_flush_cache_entry(context);
-
-    unmap_vtd_domain_page(context_entries);
+    spin_unlock(&iommu->lock);
 
     /* Context entry was previously non-present (with domid 0). */
     if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
@@ -1130,7 +1118,8 @@ static int domain_context_mapping_one(
         iommu_flush_iotlb_dsi(iommu, 0, 1);
 
     set_bit(iommu->index, &hd->iommu_bitmap);
-    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    unmap_vtd_domain_page(context_entries);
 
     return 0;
 }
@@ -1174,17 +1163,15 @@ int pdev_type(u8 bus, u8 devfn)
 }
 
 #define MAX_BUSES 256
+static DEFINE_SPINLOCK(bus2bridge_lock);
 static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES];
 
-static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
+static int _find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
 {
     int cnt = 0;
     *secbus = *bus;
 
-    if ( *bus == 0 )
-        /* assume integrated PCI devices in RC have valid requester-id */
-        return 1;
-
+    ASSERT(spin_is_locked(&bus2bridge_lock));
     if ( !bus2bridge[*bus].map )
         return 0;
 
@@ -1200,6 +1187,21 @@ static int find_pcie_endpoint(u8 *bus, u
     return 1;
 }
 
+static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
+{
+    int ret = 0;
+
+    if ( *bus == 0 )
+        /* assume integrated PCI devices in RC have valid requester-id */
+        return 1;
+
+    spin_lock(&bus2bridge_lock);
+    ret = _find_pcie_endpoint(bus, devfn, secbus);
+    spin_unlock(&bus2bridge_lock);
+
+    return ret;
+}
+
 static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
 {
     struct acpi_drhd_unit *drhd;
@@ -1211,6 +1213,8 @@ static int domain_context_mapping(struct
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
     if ( !drhd )
         return -ENODEV;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     type = pdev_type(bus, devfn);
     switch ( type )
@@ -1226,12 +1230,14 @@ static int domain_context_mapping(struct
         if ( type == DEV_TYPE_PCIe_BRIDGE )
             break;
 
+        spin_lock(&bus2bridge_lock);
         for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
         {
             bus2bridge[sec_bus].map = 1;
             bus2bridge[sec_bus].bus =  bus;
             bus2bridge[sec_bus].devfn =  devfn;
         }
+        spin_unlock(&bus2bridge_lock);
         break;
 
     case DEV_TYPE_PCIe_ENDPOINT:
@@ -1290,8 +1296,10 @@ static int domain_context_unmap_one(
     u8 bus, u8 devfn)
 {
     struct context_entry *context, *context_entries;
-    unsigned long flags;
     u64 maddr;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    spin_lock(&iommu->lock);
 
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
@@ -1299,11 +1307,11 @@ static int domain_context_unmap_one(
 
     if ( !context_present(*context) )
     {
+        spin_unlock(&iommu->lock);
         unmap_vtd_domain_page(context_entries);
         return 0;
     }
 
-    spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
     context_clear_entry(*context);
     iommu_flush_cache_entry(context);
@@ -1315,8 +1323,8 @@ static int domain_context_unmap_one(
     else
         iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
 
+    spin_unlock(&iommu->lock);
     unmap_vtd_domain_page(context_entries);
-    spin_unlock_irqrestore(&iommu->lock, flags);
 
     return 0;
 }
@@ -1380,7 +1388,10 @@ static int reassign_device_ownership(
     struct iommu *pdev_iommu;
     int ret, found = 0;
 
-    if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev_by_domain(source, bus, devfn);
+
+    if (!pdev)
         return -ENODEV;
 
     drhd = acpi_find_matched_drhd_unit(bus, devfn);
@@ -1391,14 +1402,9 @@ static int reassign_device_ownership(
     if ( ret )
         return ret;
 
-    write_lock(&pcidevs_lock);
     list_move(&pdev->domain_list, &target->arch.pdev_list);
-    write_unlock(&pcidevs_lock);
     pdev->domain = target;
 
-    spin_unlock(&pdev->lock);
-
-    read_lock(&pcidevs_lock);
     for_each_pdev ( source, pdev )
     {
         drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
@@ -1408,7 +1414,6 @@ static int reassign_device_ownership(
             break;
         }
     }
-    read_unlock(&pcidevs_lock);
 
     if ( !found )
         clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
@@ -1423,20 +1428,13 @@ void iommu_domain_teardown(struct domain
     if ( list_empty(&acpi_drhd_units) )
         return;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    spin_lock(&hd->mapping_lock);
     iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
     hd->pgd_maddr = 0;
+    spin_unlock(&hd->mapping_lock);
+
     iommu_domid_release(d);
-}
-
-static int domain_context_mapped(u8 bus, u8 devfn)
-{
-    struct acpi_drhd_unit *drhd;
-
-    for_each_drhd_unit ( drhd )
-        if ( device_context_mapped(drhd->iommu, bus, devfn) )
-            return 1;
-
-    return 0;
 }
 
 int intel_iommu_map_page(
@@ -1457,17 +1455,27 @@ int intel_iommu_map_page(
          ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
 
+    spin_lock(&hd->mapping_lock);
+
     pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
     if ( pg_maddr == 0 )
+    {
+        spin_unlock(&hd->mapping_lock);
         return -ENOMEM;
+    }
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + (gfn & LEVEL_MASK);
     pte_present = dma_pte_present(*pte);
     dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
     iommu_flush_cache_entry(pte);
+    spin_unlock(&hd->mapping_lock);
     unmap_vtd_domain_page(page);
 
+    /*
+     * No need pcideves_lock here because we have flush
+     * when assign/deassign device
+     */
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
@@ -1510,6 +1518,7 @@ static int iommu_prepare_rmrr_dev(struct
     u64 base, end;
     unsigned long base_pfn, end_pfn;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(rmrr->base_address < rmrr->end_address);
     
     base = rmrr->base_address & PAGE_MASK_4K;
@@ -1523,8 +1532,7 @@ static int iommu_prepare_rmrr_dev(struct
         base_pfn++;
     }
 
-    if ( domain_context_mapped(bus, devfn) == 0 )
-        ret = domain_context_mapping(d, bus, devfn);
+    ret = domain_context_mapping(d, bus, devfn);
 
     return ret;
 }
@@ -1534,6 +1542,8 @@ static int intel_iommu_add_device(struct
     struct acpi_rmrr_unit *rmrr;
     u16 bdf;
     int ret, i;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -1689,6 +1699,7 @@ static void setup_dom0_rmrr(struct domai
     u16 bdf;
     int ret, i;
 
+    read_lock(&pcidevs_lock);
     for_each_rmrr_device ( rmrr, bdf, i )
     {
         ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf));
@@ -1696,6 +1707,7 @@ static void setup_dom0_rmrr(struct domai
             gdprintk(XENLOG_ERR VTDPREFIX,
                      "IOMMU: mapping reserved region failed\n");
     }
+    read_unlock(&pcidevs_lock);
 }
 
 int intel_vtd_setup(void)
@@ -1748,27 +1760,43 @@ int device_assigned(u8 bus, u8 devfn)
 {
     struct pci_dev *pdev;
 
-    if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
-    {
-        spin_unlock(&pdev->lock);
-        return 0;
-    }
-
-    return 1;
+    read_lock(&pcidevs_lock);
+    pdev = pci_get_pdev_by_domain(dom0, bus, devfn);
+    if (!pdev)
+    {
+        read_unlock(&pcidevs_lock);
+        return -1;
+    }
+
+    read_unlock(&pcidevs_lock);
+    return 0;
 }
 
 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     struct acpi_rmrr_unit *rmrr;
     int ret = 0, i;
+    struct pci_dev *pdev;
     u16 bdf;
 
     if ( list_empty(&acpi_drhd_units) )
         return -ENODEV;
 
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev(bus, devfn);
+    if (!pdev)
+        return -ENODEV;
+
+    if (pdev->domain != dom0)
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                "IOMMU: assign a assigned device\n");
+       return -EBUSY;
+    }
+
     ret = reassign_device_ownership(dom0, d, bus, devfn);
     if ( ret )
-        return ret;
+        goto done;
 
     /* Setup rmrr identity mapping */
     for_each_rmrr_device( rmrr, bdf, i )
@@ -1779,16 +1807,20 @@ int intel_iommu_assign_device(struct dom
              * ignore USB RMRR temporarily.
              */
             if ( is_usb_device(bus, devfn) )
-                return 0;
+            {
+                ret = 0;
+                goto done;
+            }
 
             ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
             if ( ret )
                 gdprintk(XENLOG_ERR VTDPREFIX,
                          "IOMMU: mapping reserved region failed\n");
-            return ret;
+            goto done; 
         }
     }
 
+done:
     return ret;
 }
 
diff -r c15244125a69 -r 2941b1a97c60 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/include/asm-x86/msi.h Thu Dec 11 11:48:19 2008 +0000
@@ -68,13 +68,17 @@ struct msi_msg {
        u32     data;           /* 16 bits of msi message data */
 };
 
+struct msi_desc;
 /* Helper functions */
 extern void mask_msi_vector(unsigned int vector);
 extern void unmask_msi_vector(unsigned int vector);
 extern void set_msi_affinity(unsigned int vector, cpumask_t mask);
-extern int pci_enable_msi(struct msi_info *msi);
-extern void pci_disable_msi(int vector);
+extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
+extern void pci_disable_msi(struct msi_desc *desc);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
+extern int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
+extern void teardown_msi_vector(int vector);
+extern int msi_free_vector(struct msi_desc *entry);
 
 struct msi_desc {
        struct {
diff -r c15244125a69 -r 2941b1a97c60 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/include/xen/iommu.h   Thu Dec 11 11:48:19 2008 +0000
@@ -62,7 +62,7 @@ void iommu_domain_destroy(struct domain 
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
 int assign_device(struct domain *d, u8 bus, u8 devfn);
-void deassign_device(struct domain *d, u8 bus, u8 devfn);
+int deassign_device(struct domain *d, u8 bus, u8 devfn);
 int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
diff -r c15244125a69 -r 2941b1a97c60 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Thu Dec 11 11:40:10 2008 +0000
+++ b/xen/include/xen/pci.h     Thu Dec 11 11:48:19 2008 +0000
@@ -36,7 +36,6 @@ struct pci_dev {
     struct domain *domain;
     const u8 bus;
     const u8 devfn;
-    spinlock_t lock;
 };
 
 #define for_each_pdev(domain, pdev) \
@@ -59,6 +58,8 @@ void pci_release_devices(struct domain *
 void pci_release_devices(struct domain *d);
 int pci_add_device(u8 bus, u8 devfn);
 int pci_remove_device(u8 bus, u8 devfn);
+struct pci_dev *pci_get_pdev(int bus, int devfn);
+struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
 
 uint8_t pci_conf_read8(
     unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.