|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [RFC PATCH] pci: introduce per-domain PCI rwlock
Add per-domain d->pci_lock that protects access to
d->pdev_list. Purpose of this lock is to give guarantees to VPCI code
that underlying pdev will not disappear under feet. Later it will also
protect pdev->vpci structure and pdev access in modify_bars().
Suggested-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
Suggested-by: Jan Beulich <jbeulich@xxxxxxxx>
Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>
---
This patch should be part of VPCI series, but I am posting it as a
sinle-patch RFC to discuss changes to x86 MM and IOMMU code.
I opted to factor out part of the changes from "vpci: introduce
per-domain lock to protect vpci structure" commit to ease up review process.
---
xen/arch/x86/hvm/hvm.c | 2 +
xen/arch/x86/hvm/vmx/vmcs.c | 2 +
xen/arch/x86/mm.c | 6 ++
xen/arch/x86/mm/p2m-pod.c | 7 +++
xen/arch/x86/mm/paging.c | 6 ++
xen/common/domain.c | 1 +
xen/drivers/passthrough/amd/iommu_cmd.c | 4 +-
xen/drivers/passthrough/amd/pci_amd_iommu.c | 15 ++++-
xen/drivers/passthrough/pci.c | 70 +++++++++++++++++----
xen/drivers/passthrough/vtd/iommu.c | 19 +++++-
xen/include/xen/sched.h | 1 +
11 files changed, 117 insertions(+), 16 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a67ef79dc0..089fbe38a7 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2381,12 +2381,14 @@ int hvm_set_cr0(unsigned long value, bool may_defer)
}
}
+ read_lock(&d->pci_lock);
if ( ((value ^ old_value) & X86_CR0_CD) &&
is_iommu_enabled(d) && hvm_funcs.handle_cd &&
(!rangeset_is_empty(d->iomem_caps) ||
!rangeset_is_empty(d->arch.ioport_caps) ||
has_arch_pdevs(d)) )
alternative_vcall(hvm_funcs.handle_cd, v, value);
+ read_unlock(&d->pci_lock);
hvm_update_cr(v, 0, value);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index b209563625..88bbcbbd99 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1889,6 +1889,7 @@ void cf_check vmx_do_resume(void)
* 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
* If VT-d engine can force snooping, we don't need to do these.
*/
+ read_lock(&v->domain->pci_lock);
if ( has_arch_pdevs(v->domain) && !iommu_snoop
&& !cpu_has_wbinvd_exiting )
{
@@ -1896,6 +1897,7 @@ void cf_check vmx_do_resume(void)
if ( cpu != -1 )
flush_mask(cpumask_of(cpu), FLUSH_CACHE);
}
+ read_unlock(&v->domain->pci_lock);
vmx_clear_vmcs(v);
vmx_load_vmcs(v);
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index be2b10a391..f1e882a980 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -858,12 +858,15 @@ get_page_from_l1e(
return 0;
}
+ read_lock(&l1e_owner->pci_lock);
if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) )
{
gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n",
l1f & l1_disallow_mask(l1e_owner));
+ read_unlock(&l1e_owner->pci_lock);
return -EINVAL;
}
+ read_unlock(&l1e_owner->pci_lock);
valid = mfn_valid(_mfn(mfn));
@@ -2142,12 +2145,15 @@ static int mod_l1_entry(l1_pgentry_t *pl1e,
l1_pgentry_t nl1e,
{
struct page_info *page = NULL;
+ read_lock(&pt_dom->pci_lock);
if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) )
{
gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n",
l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom));
+ read_unlock(&pt_dom->pci_lock);
return -EINVAL;
}
+ read_unlock(&pt_dom->pci_lock);
/* Translate foreign guest address. */
if ( cmd != MMU_PT_UPDATE_NO_TRANSLATE &&
diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index 9969eb45fa..07e0bedad7 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -349,10 +349,12 @@ p2m_pod_set_mem_target(struct domain *d, unsigned long
target)
ASSERT( pod_target >= p2m->pod.count );
+ read_lock(&d->pci_lock);
if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
ret = -ENOTEMPTY;
else
ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
+ read_unlock(&d->pci_lock);
out:
pod_unlock(p2m);
@@ -1401,8 +1403,13 @@ guest_physmap_mark_populate_on_demand(struct domain *d,
unsigned long gfn,
if ( !paging_mode_translate(d) )
return -EINVAL;
+ read_lock(&d->pci_lock);
if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
+ {
+ read_unlock(&d->pci_lock);
return -ENOTEMPTY;
+ }
+ read_unlock(&d->pci_lock);
do {
rc = mark_populate_on_demand(d, gfn, chunk_order);
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index 34d833251b..fb8f7ff7cf 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -205,21 +205,27 @@ static int paging_log_dirty_enable(struct domain *d)
{
int ret;
+ read_lock(&d->pci_lock);
if ( has_arch_pdevs(d) )
{
/*
* Refuse to turn on global log-dirty mode
* if the domain is sharing the P2M with the IOMMU.
*/
+ read_unlock(&d->pci_lock);
return -EINVAL;
}
if ( paging_mode_log_dirty(d) )
+ {
+ read_unlock(&d->pci_lock);
return -EINVAL;
+ }
domain_pause(d);
ret = d->arch.paging.log_dirty.ops->enable(d);
domain_unpause(d);
+ read_unlock(&d->pci_lock);
return ret;
}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index caaa402637..5d8a8836da 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -645,6 +645,7 @@ struct domain *domain_create(domid_t domid,
#ifdef CONFIG_HAS_PCI
INIT_LIST_HEAD(&d->pdev_list);
+ rwlock_init(&d->pci_lock);
#endif
/* All error paths can depend on the above setup. */
diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c
b/xen/drivers/passthrough/amd/iommu_cmd.c
index 40ddf366bb..b67aee31f6 100644
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -308,11 +308,12 @@ void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev
*pdev,
flush_command_buffer(iommu, iommu_dev_iotlb_timeout);
}
-static void amd_iommu_flush_all_iotlbs(const struct domain *d, daddr_t daddr,
+static void amd_iommu_flush_all_iotlbs(struct domain *d, daddr_t daddr,
unsigned int order)
{
struct pci_dev *pdev;
+ read_lock(&d->pci_lock);
for_each_pdev( d, pdev )
{
u8 devfn = pdev->devfn;
@@ -323,6 +324,7 @@ static void amd_iommu_flush_all_iotlbs(const struct domain
*d, daddr_t daddr,
} while ( devfn != pdev->devfn &&
PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
}
+ read_unlock(&d->pci_lock);
}
/* Flush iommu cache after p2m changes. */
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c
b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 94e3775506..8541b66a93 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -102,6 +102,8 @@ static bool any_pdev_behind_iommu(const struct domain *d,
{
const struct pci_dev *pdev;
+ ASSERT(rw_is_locked(&d->pci_lock));
+
for_each_pdev ( d, pdev )
{
if ( pdev == exclude )
@@ -467,17 +469,24 @@ static int cf_check reassign_device(
if ( !QUARANTINE_SKIP(target, pdev) )
{
+ read_lock(&target->pci_lock);
rc = amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
if ( rc )
return rc;
+ read_unlock(&target->pci_lock);
}
else
amd_iommu_disable_domain_device(source, iommu, devfn, pdev);
if ( devfn == pdev->devfn && pdev->domain != target )
{
- list_move(&pdev->domain_list, &target->pdev_list);
- pdev->domain = target;
+ write_lock(&pdev->domain->pci_lock);
+ list_del(&pdev->domain_list);
+ write_unlock(&pdev->domain->pci_lock);
+
+ write_lock(&target->pci_lock);
+ list_add(&pdev->domain_list, &target->pdev_list);
+ write_unlock(&target->pci_lock);
}
/*
@@ -628,12 +637,14 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct
pci_dev *pdev)
fresh_domid = true;
}
+ read_lock(&pdev->domain->pci_lock);
ret = amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
if ( ret && fresh_domid )
{
iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map);
pdev->arch.pseudo_domid = DOMID_INVALID;
}
+ read_unlock(&pdev->domain->pci_lock);
return ret;
}
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 07d1986d33..1831e1b0c0 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -454,7 +454,9 @@ static void __init _pci_hide_device(struct pci_dev *pdev)
if ( pdev->domain )
return;
pdev->domain = dom_xen;
+ write_lock(&dom_xen->pci_lock);
list_add(&pdev->domain_list, &dom_xen->pdev_list);
+ write_unlock(&dom_xen->pci_lock);
}
int __init pci_hide_device(unsigned int seg, unsigned int bus,
@@ -530,7 +532,7 @@ struct pci_dev *pci_get_pdev(const struct domain *d,
pci_sbdf_t sbdf)
{
struct pci_dev *pdev;
- ASSERT(d || pcidevs_locked());
+ ASSERT((d && rw_is_locked(&d->pci_lock)) || pcidevs_locked());
/*
* The hardware domain owns the majority of the devices in the system.
@@ -748,7 +750,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
if ( !pdev->domain )
{
pdev->domain = hardware_domain;
+ write_lock(&hardware_domain->pci_lock);
list_add(&pdev->domain_list, &hardware_domain->pdev_list);
+ write_unlock(&hardware_domain->pci_lock);
/*
* For devices not discovered by Xen during boot, add vPCI handlers
@@ -887,26 +891,62 @@ static int deassign_device(struct domain *d, uint16_t
seg, uint8_t bus,
int pci_release_devices(struct domain *d)
{
- struct pci_dev *pdev, *tmp;
- u8 bus, devfn;
- int ret;
+ int combined_ret;
+ LIST_HEAD(failed_pdevs);
pcidevs_lock();
- ret = arch_pci_clean_pirqs(d);
- if ( ret )
+ write_lock(&d->pci_lock);
+ combined_ret = arch_pci_clean_pirqs(d);
+ if ( combined_ret )
{
pcidevs_unlock();
- return ret;
+ write_unlock(&d->pci_lock);
+ return combined_ret;
}
- list_for_each_entry_safe ( pdev, tmp, &d->pdev_list, domain_list )
+
+ while ( !list_empty(&d->pdev_list) )
{
- bus = pdev->bus;
- devfn = pdev->devfn;
- ret = deassign_device(d, pdev->seg, bus, devfn) ?: ret;
+ struct pci_dev *pdev = list_first_entry(&d->pdev_list,
+ struct pci_dev,
+ domain_list);
+ uint16_t seg = pdev->seg;
+ uint8_t bus = pdev->bus;
+ uint8_t devfn = pdev->devfn;
+ int ret;
+
+ write_unlock(&d->pci_lock);
+ ret = deassign_device(d, seg, bus, devfn);
+ write_lock(&d->pci_lock);
+ if ( ret )
+ {
+ bool still_present = false;
+ const struct pci_dev *tmp;
+
+ /*
+ * We need to check if deassign_device() left our pdev in
+ * domain's list. As we dropped the lock, we can't be sure
+ * that list wasn't permutated in some random way, so we
+ * need to traverse the whole list.
+ */
+ for_each_pdev ( d, tmp )
+ {
+ if ( tmp == pdev )
+ {
+ still_present = true;
+ break;
+ }
+ }
+ if ( still_present )
+ list_move(&pdev->domain_list, &failed_pdevs);
+ combined_ret = ret;
+ }
}
+
+ list_splice(&failed_pdevs, &d->pdev_list);
+ write_unlock(&d->pci_lock);
pcidevs_unlock();
- return ret;
+ return combined_ret;
}
#define PCI_CLASS_BRIDGE_HOST 0x0600
@@ -1125,7 +1165,9 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
if ( !pdev->domain )
{
pdev->domain = ctxt->d;
+ write_lock(&ctxt->d->pci_lock);
list_add(&pdev->domain_list, &ctxt->d->pdev_list);
+ write_unlock(&ctxt->d->pci_lock);
setup_one_hwdom_device(ctxt, pdev);
}
else if ( pdev->domain == dom_xen )
@@ -1487,6 +1529,7 @@ static int iommu_get_device_group(
return group_id;
pcidevs_lock();
+ read_lock(&d->pci_lock);
for_each_pdev( d, pdev )
{
unsigned int b = pdev->bus;
@@ -1501,6 +1544,7 @@ static int iommu_get_device_group(
sdev_id = iommu_call(ops, get_device_group_id, seg, b, df);
if ( sdev_id < 0 )
{
+ read_unlock(&d->pci_lock);
pcidevs_unlock();
return sdev_id;
}
@@ -1511,6 +1555,7 @@ static int iommu_get_device_group(
if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
{
+ read_unlock(&d->pci_lock);
pcidevs_unlock();
return -EFAULT;
}
@@ -1518,6 +1563,7 @@ static int iommu_get_device_group(
}
}
+ read_unlock(&d->pci_lock);
pcidevs_unlock();
return i;
diff --git a/xen/drivers/passthrough/vtd/iommu.c
b/xen/drivers/passthrough/vtd/iommu.c
index 0e3062c820..6a36cc18fe 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -186,6 +186,8 @@ static bool any_pdev_behind_iommu(const struct domain *d,
{
const struct pci_dev *pdev;
+ ASSERT(rw_is_locked(&d->pci_lock));
+
for_each_pdev ( d, pdev )
{
const struct acpi_drhd_unit *drhd;
@@ -2765,6 +2767,7 @@ static int cf_check reassign_device_ownership(
if ( !QUARANTINE_SKIP(target, pdev->arch.vtd.pgd_maddr) )
{
+ read_lock(&target->pci_lock);
if ( !has_arch_pdevs(target) )
vmx_pi_hooks_assign(target);
@@ -2780,21 +2783,26 @@ static int cf_check reassign_device_ownership(
#endif
ret = domain_context_mapping(target, devfn, pdev);
+ read_unlock(&target->pci_lock);
if ( !ret && pdev->devfn == devfn &&
!QUARANTINE_SKIP(source, pdev->arch.vtd.pgd_maddr) )
{
const struct acpi_drhd_unit *drhd =
acpi_find_matched_drhd_unit(pdev);
+ read_lock(&source->pci_lock);
if ( drhd )
check_cleanup_domid_map(source, pdev, drhd->iommu);
+ read_unlock(&source->pci_lock);
}
}
else
{
const struct acpi_drhd_unit *drhd;
+ read_lock(&source->pci_lock);
drhd = domain_context_unmap(source, devfn, pdev);
+ read_unlock(&source->pci_lock);
ret = IS_ERR(drhd) ? PTR_ERR(drhd) : 0;
}
if ( ret )
@@ -2806,12 +2814,21 @@ static int cf_check reassign_device_ownership(
if ( devfn == pdev->devfn && pdev->domain != target )
{
- list_move(&pdev->domain_list, &target->pdev_list);
+ write_lock(&pdev->domain->pci_lock);
+ list_del(&pdev->domain_list);
+ write_unlock(&pdev->domain->pci_lock);
+
+ write_lock(&target->pci_lock);
+ list_add(&pdev->domain_list, &target->pdev_list);
+ write_unlock(&target->pci_lock);
+
pdev->domain = target;
}
+ read_lock(&source->pci_lock);
if ( !has_arch_pdevs(source) )
vmx_pi_hooks_deassign(source);
+ read_unlock(&source->pci_lock);
/*
* If the device belongs to the hardware domain, and it has RMRR, don't
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 85242a73d3..80dd150bbf 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -460,6 +460,7 @@ struct domain
#ifdef CONFIG_HAS_PCI
struct list_head pdev_list;
+ rwlock_t pci_lock;
#endif
#ifdef CONFIG_HAS_PASSTHROUGH
--
2.41.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |