[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH, v2] IOMMU: don't immediately disable bus mastering on faults
At 16:05 +0000 on 07 Nov (1352304352), Jan Beulich wrote: > Instead, give the owning domain at least a small opportunity of fixing > things up, and allow for rare faults to not bring down the device at > all. The amount of faults tolerated within a given time period (all > numbers are made up with no specific rationale) is higher for Dom0 than > for DomU-s. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Looks good to me, though I don't think dom0 should have any special treatment here. I'd be inclined to set the threshold to 10 for everyone. Tim. > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_ > > static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[]) > { > - u16 domain_id, device_id, bdf, cword, flags; > + u16 domain_id, device_id, bdf, flags; > u32 code; > u64 *addr; > int count = 0; > @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct > "fault address = %#"PRIx64", flags = %#x\n", > event_str[code-1], domain_id, device_id, *addr, flags); > > - /* Tell the device to stop DMAing; we can't rely on the guest to > - * control it for us. */ > for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > if ( get_dma_requestor_id(iommu->seg, bdf) == device_id ) > - { > - cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf), > - PCI_SLOT(bdf), PCI_FUNC(bdf), > - PCI_COMMAND); > - pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf), > - PCI_FUNC(bdf), PCI_COMMAND, > - cword & ~PCI_COMMAND_MASTER); > - } > + pci_check_disable_device(iommu->seg, PCI_BUS(bdf), > + PCI_DEVFN2(bdf)); > } > else > { > --- a/xen/drivers/passthrough/iommu.c > +++ b/xen/drivers/passthrough/iommu.c > @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b > static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) > { > struct hvm_iommu *hd = domain_hvm_iommu(d); > + struct pci_dev *pdev; > int rc = 0; > > if ( !iommu_enabled || !hd->platform_ops ) > @@ -228,6 +229,13 @@ static int assign_device(struct domain * > return -EXDEV; > > spin_lock(&pcidevs_lock); > + pdev = pci_get_pdev(seg, bus, devfn); > + if ( pdev ) > + { > + pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV; > + pdev->fault.count = 0; > + } > + > if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) ) > goto done; > > @@ -239,6 +247,8 @@ static int assign_device(struct domain * > goto done; > } > done: > + if ( pdev && pdev->domain != d ) > + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV; > spin_unlock(&pcidevs_lock); > return rc; > } > @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1 > return ret; > } > > + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV; > + pdev->fault.count = 0; > + > if ( !has_arch_pdevs(d) && need_iommu(d) ) > { > d->need_iommu = 0; > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct > *((u8*) &pdev->bus) = bus; > *((u8*) &pdev->devfn) = devfn; > pdev->domain = NULL; > + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV; > INIT_LIST_HEAD(&pdev->msi_list); > list_add(&pdev->alldevs_list, &pseg->alldevs_list); > spin_lock_init(&pdev->msix_table_lock); > @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8 > return 1; > } > > +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn) > +{ > + struct pci_dev *pdev; > + s_time_t now = NOW(); > + u16 cword; > + > + spin_lock(&pcidevs_lock); > + pdev = pci_get_pdev(seg, bus, devfn); > + if ( pdev ) > + { > + if ( now < pdev->fault.time || > + now - pdev->fault.time > MILLISECS(10) ) > + pdev->fault.count >>= 1; > + pdev->fault.time = now; > + if ( ++pdev->fault.count < pdev->fault.threshold ) > + pdev = NULL; > + } > + spin_unlock(&pcidevs_lock); > + > + if ( !pdev ) > + return; > + > + /* Tell the device to stop DMAing; we can't rely on the guest to > + * control it for us. */ > + cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), > + PCI_COMMAND); > + pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), > + PCI_COMMAND, cword & ~PCI_COMMAND_MASTER); > +} > + > /* > * scan pci devices to add all existed PCI devices to alldevs_list, > * and setup pci hierarchy in array bus2bridge. > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct > while (1) > { > u8 fault_reason; > - u16 source_id, cword; > + u16 source_id; > u32 data; > u64 guest_addr; > int type; > @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct > iommu_page_fault_do_one(iommu, type, fault_reason, > source_id, guest_addr); > > - /* Tell the device to stop DMAing; we can't rely on the guest to > - * control it for us. */ > - cword = pci_conf_read16(iommu->intel->drhd->segment, > - PCI_BUS(source_id), PCI_SLOT(source_id), > - PCI_FUNC(source_id), PCI_COMMAND); > - pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id), > - PCI_SLOT(source_id), PCI_FUNC(source_id), > - PCI_COMMAND, cword & ~PCI_COMMAND_MASTER); > + pci_check_disable_device(iommu->intel->drhd->segment, > + PCI_BUS(source_id), PCI_DEVFN2(source_id)); > > fault_index++; > if ( fault_index > cap_num_fault_regs(iommu->cap) ) > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -64,6 +64,13 @@ struct pci_dev { > const u8 devfn; > struct pci_dev_info info; > struct arch_pci_dev arch; > + struct { > + s_time_t time; > + unsigned int threshold; > +#define PT_FAULT_THRESHOLD_PRIV 10 > +#define PT_FAULT_THRESHOLD_UNPRIV 3 > + unsigned int count; > + } fault; > u64 vf_rlen[6]; > }; > > @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn); > struct pci_dev *pci_get_pdev(int seg, int bus, int devfn); > struct pci_dev *pci_get_pdev_by_domain( > struct domain *, int seg, int bus, int devfn); > +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn); > > uint8_t pci_conf_read8( > unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func, > > > IOMMU: don't immediately disable bus mastering on faults > > Instead, give the owning domain at least a small opportunity of fixing > things up, and allow for rare faults to not bring down the device at > all. The amount of faults tolerated within a given time period (all > numbers are made up with no specific rationale) is higher for Dom0 than > for DomU-s. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_ > > static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[]) > { > - u16 domain_id, device_id, bdf, cword, flags; > + u16 domain_id, device_id, bdf, flags; > u32 code; > u64 *addr; > int count = 0; > @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct > "fault address = %#"PRIx64", flags = %#x\n", > event_str[code-1], domain_id, device_id, *addr, flags); > > - /* Tell the device to stop DMAing; we can't rely on the guest to > - * control it for us. */ > for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > if ( get_dma_requestor_id(iommu->seg, bdf) == device_id ) > - { > - cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf), > - PCI_SLOT(bdf), PCI_FUNC(bdf), > - PCI_COMMAND); > - pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf), > - PCI_FUNC(bdf), PCI_COMMAND, > - cword & ~PCI_COMMAND_MASTER); > - } > + pci_check_disable_device(iommu->seg, PCI_BUS(bdf), > + PCI_DEVFN2(bdf)); > } > else > { > --- a/xen/drivers/passthrough/iommu.c > +++ b/xen/drivers/passthrough/iommu.c > @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b > static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) > { > struct hvm_iommu *hd = domain_hvm_iommu(d); > + struct pci_dev *pdev; > int rc = 0; > > if ( !iommu_enabled || !hd->platform_ops ) > @@ -228,6 +229,13 @@ static int assign_device(struct domain * > return -EXDEV; > > spin_lock(&pcidevs_lock); > + pdev = pci_get_pdev(seg, bus, devfn); > + if ( pdev ) > + { > + pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV; > + pdev->fault.count = 0; > + } > + > if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) ) > goto done; > > @@ -239,6 +247,8 @@ static int assign_device(struct domain * > goto done; > } > done: > + if ( pdev && pdev->domain != d ) > + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV; > spin_unlock(&pcidevs_lock); > return rc; > } > @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1 > return ret; > } > > + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV; > + pdev->fault.count = 0; > + > if ( !has_arch_pdevs(d) && need_iommu(d) ) > { > d->need_iommu = 0; > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct > *((u8*) &pdev->bus) = bus; > *((u8*) &pdev->devfn) = devfn; > pdev->domain = NULL; > + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV; > INIT_LIST_HEAD(&pdev->msi_list); > list_add(&pdev->alldevs_list, &pseg->alldevs_list); > spin_lock_init(&pdev->msix_table_lock); > @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8 > return 1; > } > > +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn) > +{ > + struct pci_dev *pdev; > + s_time_t now = NOW(); > + u16 cword; > + > + spin_lock(&pcidevs_lock); > + pdev = pci_get_pdev(seg, bus, devfn); > + if ( pdev ) > + { > + if ( now < pdev->fault.time || > + now - pdev->fault.time > MILLISECS(10) ) > + pdev->fault.count >>= 1; > + pdev->fault.time = now; > + if ( ++pdev->fault.count < pdev->fault.threshold ) > + pdev = NULL; > + } > + spin_unlock(&pcidevs_lock); > + > + if ( !pdev ) > + return; > + > + /* Tell the device to stop DMAing; we can't rely on the guest to > + * control it for us. */ > + cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), > + PCI_COMMAND); > + pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), > + PCI_COMMAND, cword & ~PCI_COMMAND_MASTER); > +} > + > /* > * scan pci devices to add all existed PCI devices to alldevs_list, > * and setup pci hierarchy in array bus2bridge. > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct > while (1) > { > u8 fault_reason; > - u16 source_id, cword; > + u16 source_id; > u32 data; > u64 guest_addr; > int type; > @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct > iommu_page_fault_do_one(iommu, type, fault_reason, > source_id, guest_addr); > > - /* Tell the device to stop DMAing; we can't rely on the guest to > - * control it for us. */ > - cword = pci_conf_read16(iommu->intel->drhd->segment, > - PCI_BUS(source_id), PCI_SLOT(source_id), > - PCI_FUNC(source_id), PCI_COMMAND); > - pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id), > - PCI_SLOT(source_id), PCI_FUNC(source_id), > - PCI_COMMAND, cword & ~PCI_COMMAND_MASTER); > + pci_check_disable_device(iommu->intel->drhd->segment, > + PCI_BUS(source_id), PCI_DEVFN2(source_id)); > > fault_index++; > if ( fault_index > cap_num_fault_regs(iommu->cap) ) > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -64,6 +64,13 @@ struct pci_dev { > const u8 devfn; > struct pci_dev_info info; > struct arch_pci_dev arch; > + struct { > + s_time_t time; > + unsigned int threshold; > +#define PT_FAULT_THRESHOLD_PRIV 10 > +#define PT_FAULT_THRESHOLD_UNPRIV 3 > + unsigned int count; > + } fault; > u64 vf_rlen[6]; > }; > > @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn); > struct pci_dev *pci_get_pdev(int seg, int bus, int devfn); > struct pci_dev *pci_get_pdev_by_domain( > struct domain *, int seg, int bus, int devfn); > +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn); > > uint8_t pci_conf_read8( > unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func, _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |