|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [RFC PATCH 01/10] xen: pci: add per-domain pci list lock
On Wed, 31 Aug 2022, Volodymyr Babchuk wrote:
> domain->pdevs_lock protects access to domain->pdev_list.
> As this, it should be used when we are adding, removing on enumerating
> PCI devices assigned to a domain.
>
> This enables more granular locking instead of one huge pcidevs_lock that
> locks entire PCI subsystem. Please note that pcidevs_lock() is still
> used, we are going to remove it in subsequent patches.
>
> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>
I reviewed the patch, and made sure to pay extra attention to:
- error paths
- missing locks
- lock ordering
- interruptions
Here is what I found:
1) iommu.c:reassign_device_ownership and pci_amd_iommu.c:reassign_device
Both functions without any pdevs_lock locking do:
list_move(&pdev->domain_list, &target->pdev_list);
It seems to be it would need pdevs_lock. Maybe we need to change
list_move into list_del (protected by the pdevs_lock of the old domain)
and list_add (protected by the pdev_lock of the new domain).
2) has_arch_pdevs
has_arch_pdevs is implemented as list_empty and needs locking as well,
however no domain->pdevs_lock are added to protect has_arch_pdevs in
this patch. I think we need pdevs_lock around has_arch_pdevs.
Two more comments below about lock inversion and taking the same lock
twice
> ---
> xen/common/domain.c | 1 +
> xen/drivers/passthrough/amd/iommu_cmd.c | 4 ++-
> xen/drivers/passthrough/amd/pci_amd_iommu.c | 7 ++++-
> xen/drivers/passthrough/pci.c | 29 ++++++++++++++++++++-
> xen/drivers/passthrough/vtd/iommu.c | 9 +++++--
> xen/drivers/vpci/header.c | 3 +++
> xen/drivers/vpci/msi.c | 7 ++++-
> xen/drivers/vpci/vpci.c | 4 +--
> xen/include/xen/pci.h | 2 +-
> xen/include/xen/sched.h | 1 +
> 10 files changed, 58 insertions(+), 9 deletions(-)
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 7062393e37..4611141b87 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -618,6 +618,7 @@ struct domain *domain_create(domid_t domid,
>
> #ifdef CONFIG_HAS_PCI
> INIT_LIST_HEAD(&d->pdev_list);
> + spin_lock_init(&d->pdevs_lock);
> #endif
>
> /* All error paths can depend on the above setup. */
> diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c
> b/xen/drivers/passthrough/amd/iommu_cmd.c
> index 40ddf366bb..47c45398d4 100644
> --- a/xen/drivers/passthrough/amd/iommu_cmd.c
> +++ b/xen/drivers/passthrough/amd/iommu_cmd.c
> @@ -308,11 +308,12 @@ void amd_iommu_flush_iotlb(u8 devfn, const struct
> pci_dev *pdev,
> flush_command_buffer(iommu, iommu_dev_iotlb_timeout);
> }
>
> -static void amd_iommu_flush_all_iotlbs(const struct domain *d, daddr_t daddr,
> +static void amd_iommu_flush_all_iotlbs(struct domain *d, daddr_t daddr,
> unsigned int order)
> {
> struct pci_dev *pdev;
>
> + spin_lock(&d->pdevs_lock);
> for_each_pdev( d, pdev )
> {
> u8 devfn = pdev->devfn;
> @@ -323,6 +324,7 @@ static void amd_iommu_flush_all_iotlbs(const struct
> domain *d, daddr_t daddr,
> } while ( devfn != pdev->devfn &&
> PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
> }
> + spin_unlock(&d->pdevs_lock);
> }
>
> /* Flush iommu cache after p2m changes. */
> diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> index 4ba8e764b2..64c016491d 100644
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -96,20 +96,25 @@ static int __must_check allocate_domain_resources(struct
> domain *d)
> return rc;
> }
>
> -static bool any_pdev_behind_iommu(const struct domain *d,
> +static bool any_pdev_behind_iommu(struct domain *d,
> const struct pci_dev *exclude,
> const struct amd_iommu *iommu)
> {
> const struct pci_dev *pdev;
>
> + spin_lock(&d->pdevs_lock);
> for_each_pdev ( d, pdev )
> {
> if ( pdev == exclude )
> continue;
>
> if ( find_iommu_for_device(pdev->seg, pdev->sbdf.bdf) == iommu )
> + {
> + spin_unlock(&d->pdevs_lock);
> return true;
> + }
code style: tabs instead of spaces
> }
> + spin_unlock(&d->pdevs_lock);
>
> return false;
> }
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index cdaf5c247f..4366f8f965 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -523,7 +523,9 @@ static void __init _pci_hide_device(struct pci_dev *pdev)
> if ( pdev->domain )
> return;
> pdev->domain = dom_xen;
> + spin_lock(&dom_xen->pdevs_lock);
> list_add(&pdev->domain_list, &dom_xen->pdev_list);
> + spin_unlock(&dom_xen->pdevs_lock);
> }
>
> int __init pci_hide_device(unsigned int seg, unsigned int bus,
> @@ -595,7 +597,7 @@ struct pci_dev *pci_get_real_pdev(pci_sbdf_t sbdf)
> return pdev;
> }
>
> -struct pci_dev *pci_get_pdev(const struct domain *d, pci_sbdf_t sbdf)
> +struct pci_dev *pci_get_pdev(struct domain *d, pci_sbdf_t sbdf)
> {
> struct pci_dev *pdev;
>
> @@ -620,9 +622,16 @@ struct pci_dev *pci_get_pdev(const struct domain *d,
> pci_sbdf_t sbdf)
> return pdev;
> }
> else
> + {
> + spin_lock(&d->pdevs_lock);
> list_for_each_entry ( pdev, &d->pdev_list, domain_list )
> if ( pdev->sbdf.bdf == sbdf.bdf )
> + {
> + spin_unlock(&d->pdevs_lock);
> return pdev;
> + }
> + spin_unlock(&d->pdevs_lock);
> + }
>
> return NULL;
> }
> @@ -817,7 +826,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
> if ( !pdev->domain )
> {
> pdev->domain = hardware_domain;
> + spin_lock(&hardware_domain->pdevs_lock);
> list_add(&pdev->domain_list, &hardware_domain->pdev_list);
> + spin_unlock(&hardware_domain->pdevs_lock);
>
> /*
> * For devices not discovered by Xen during boot, add vPCI handlers
> @@ -827,7 +838,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
> if ( ret )
> {
> printk(XENLOG_ERR "Setup of vPCI failed: %d\n", ret);
> + spin_lock(&pdev->domain->pdevs_lock);
> list_del(&pdev->domain_list);
> + spin_unlock(&pdev->domain->pdevs_lock);
> pdev->domain = NULL;
> goto out;
> }
> @@ -835,7 +848,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
> if ( ret )
> {
> vpci_remove_device(pdev);
> + spin_lock(&pdev->domain->pdevs_lock);
> list_del(&pdev->domain_list);
> + spin_unlock(&pdev->domain->pdevs_lock);
> pdev->domain = NULL;
> goto out;
> }
> @@ -885,7 +900,11 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
> pci_cleanup_msi(pdev);
> ret = iommu_remove_device(pdev);
> if ( pdev->domain )
> + {
> + spin_lock(&pdev->domain->pdevs_lock);
> list_del(&pdev->domain_list);
> + spin_unlock(&pdev->domain->pdevs_lock);
> + }
> printk(XENLOG_DEBUG "PCI remove device %pp\n", &pdev->sbdf);
> free_pdev(pseg, pdev);
> break;
> @@ -967,12 +986,14 @@ int pci_release_devices(struct domain *d)
> pcidevs_unlock();
> return ret;
> }
> + spin_lock(&d->pdevs_lock);
> list_for_each_entry_safe ( pdev, tmp, &d->pdev_list, domain_list )
> {
> bus = pdev->bus;
> devfn = pdev->devfn;
> ret = deassign_device(d, pdev->seg, bus, devfn) ?: ret;
This causes pdevs_lock to be taken twice. deassign_device also takes a
pdevs_lock. Probably we need to change all the
spin_lock(&d->pdevs_lock) into spin_lock_recursive.
> }
> + spin_unlock(&d->pdevs_lock);
> pcidevs_unlock();
>
> return ret;
> @@ -1194,7 +1215,9 @@ static int __hwdom_init cf_check
> _setup_hwdom_pci_devices(
> if ( !pdev->domain )
> {
> pdev->domain = ctxt->d;
> + spin_lock(&pdev->domain->pdevs_lock);
> list_add(&pdev->domain_list, &ctxt->d->pdev_list);
> + spin_unlock(&pdev->domain->pdevs_lock);
> setup_one_hwdom_device(ctxt, pdev);
> }
> else if ( pdev->domain == dom_xen )
> @@ -1556,6 +1579,7 @@ static int iommu_get_device_group(
> return group_id;
>
> pcidevs_lock();
> + spin_lock(&d->pdevs_lock);
> for_each_pdev( d, pdev )
> {
> unsigned int b = pdev->bus;
> @@ -1571,6 +1595,7 @@ static int iommu_get_device_group(
> if ( sdev_id < 0 )
> {
> pcidevs_unlock();
> + spin_unlock(&d->pdevs_lock);
lock inversion
> return sdev_id;
> }
>
> @@ -1581,6 +1606,7 @@ static int iommu_get_device_group(
> if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
> {
> pcidevs_unlock();
> + spin_unlock(&d->pdevs_lock);
lock inversion
> return -EFAULT;
> }
> i++;
> @@ -1588,6 +1614,7 @@ static int iommu_get_device_group(
> }
>
> pcidevs_unlock();
> + spin_unlock(&d->pdevs_lock);
lock inversion
> return i;
> }
> diff --git a/xen/drivers/passthrough/vtd/iommu.c
> b/xen/drivers/passthrough/vtd/iommu.c
> index 62e143125d..fff1442265 100644
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -183,12 +183,13 @@ static void cleanup_domid_map(domid_t domid, struct
> vtd_iommu *iommu)
> }
> }
>
> -static bool any_pdev_behind_iommu(const struct domain *d,
> +static bool any_pdev_behind_iommu(struct domain *d,
> const struct pci_dev *exclude,
> const struct vtd_iommu *iommu)
> {
> const struct pci_dev *pdev;
>
> + spin_lock(&d->pdevs_lock);
> for_each_pdev ( d, pdev )
> {
> const struct acpi_drhd_unit *drhd;
> @@ -198,8 +199,12 @@ static bool any_pdev_behind_iommu(const struct domain *d,
>
> drhd = acpi_find_matched_drhd_unit(pdev);
> if ( drhd && drhd->iommu == iommu )
> + {
> + spin_unlock(&d->pdevs_lock);
> return true;
> + }
> }
> + spin_unlock(&d->pdevs_lock);
>
> return false;
> }
> @@ -208,7 +213,7 @@ static bool any_pdev_behind_iommu(const struct domain *d,
> * If no other devices under the same iommu owned by this domain,
> * clear iommu in iommu_bitmap and clear domain_id in domid_bitmap.
> */
> -static void check_cleanup_domid_map(const struct domain *d,
> +static void check_cleanup_domid_map(struct domain *d,
> const struct pci_dev *exclude,
> struct vtd_iommu *iommu)
> {
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index a1c928a0d2..a59aa7ad0b 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -267,6 +267,7 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> * Check for overlaps with other BARs. Note that only BARs that are
> * currently mapped (enabled) are checked for overlaps.
> */
> + spin_lock(&pdev->domain->pdevs_lock);
> for_each_pdev ( pdev->domain, tmp )
> {
> if ( tmp == pdev )
> @@ -306,11 +307,13 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
> start, end, rc);
> rangeset_destroy(mem);
> + spin_unlock( &pdev->domain->pdevs_lock);
> return rc;
> }
> }
> }
>
> + spin_unlock( &pdev->domain->pdevs_lock);
> ASSERT(dev);
>
> if ( system_state < SYS_STATE_active )
> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
> index 8f2b59e61a..8969c335b0 100644
> --- a/xen/drivers/vpci/msi.c
> +++ b/xen/drivers/vpci/msi.c
> @@ -265,7 +265,7 @@ REGISTER_VPCI_INIT(init_msi, VPCI_PRIORITY_LOW);
>
> void vpci_dump_msi(void)
> {
> - const struct domain *d;
> + struct domain *d;
>
> rcu_read_lock(&domlist_read_lock);
> for_each_domain ( d )
> @@ -277,6 +277,9 @@ void vpci_dump_msi(void)
>
> printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
>
> + if ( !spin_trylock(&d->pdevs_lock) )
> + continue;
> +
> for_each_pdev ( d, pdev )
> {
> const struct vpci_msi *msi;
> @@ -326,6 +329,8 @@ void vpci_dump_msi(void)
> spin_unlock(&pdev->vpci->lock);
> process_pending_softirqs();
> }
> + spin_unlock(&d->pdevs_lock);
> +
> }
> rcu_read_unlock(&domlist_read_lock);
> }
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 3467c0de86..7d1f9fd438 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -312,7 +312,7 @@ static uint32_t merge_result(uint32_t data, uint32_t new,
> unsigned int size,
>
> uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
> {
> - const struct domain *d = current->domain;
> + struct domain *d = current->domain;
> const struct pci_dev *pdev;
> const struct vpci_register *r;
> unsigned int data_offset = 0;
> @@ -415,7 +415,7 @@ static void vpci_write_helper(const struct pci_dev *pdev,
> void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
> uint32_t data)
> {
> - const struct domain *d = current->domain;
> + struct domain *d = current->domain;
> const struct pci_dev *pdev;
> const struct vpci_register *r;
> unsigned int data_offset = 0;
> diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
> index 5975ca2f30..19047b4b20 100644
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -177,7 +177,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
> int pci_remove_device(u16 seg, u8 bus, u8 devfn);
> int pci_ro_device(int seg, int bus, int devfn);
> int pci_hide_device(unsigned int seg, unsigned int bus, unsigned int devfn);
> -struct pci_dev *pci_get_pdev(const struct domain *d, pci_sbdf_t sbdf);
> +struct pci_dev *pci_get_pdev(struct domain *d, pci_sbdf_t sbdf);
> struct pci_dev *pci_get_real_pdev(pci_sbdf_t sbdf);
> void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 1cf629e7ec..0775228ba9 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -457,6 +457,7 @@ struct domain
>
> #ifdef CONFIG_HAS_PCI
> struct list_head pdev_list;
> + spinlock_t pdevs_lock;
I think it would be better called "pdev_lock" but OK either way
> #endif
>
> #ifdef CONFIG_HAS_PASSTHROUGH
> --
> 2.36.1
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |