[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH, v2] IOMMU: don't immediately disable bus mastering on faults



>>> On 08.11.12 at 13:46, Tim Deegan <tim@xxxxxxx> wrote:
> At 16:05 +0000 on 07 Nov (1352304352), Jan Beulich wrote:
>> Instead, give the owning domain at least a small opportunity of fixing
>> things up, and allow for rare faults to not bring down the device at
>> all. The amount of faults tolerated within a given time period (all
>> numbers are made up with no specific rationale) is higher for Dom0 than
>> for DomU-s.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> Looks good to me, though I don't think dom0 should have any special
> treatment here.  I'd be inclined to set the threshold to 10 for
> everyone. 

No problem. I'll wait for the IOMMU folks to give some feedback,
and either re-submit (if further changes are requested), or commit
with their acks (and the change as you suggest, if they're in
agreement).

Of course, if they disagree we'll have to get to a common position
anyway first.

If no other changes are requested, do I take the above as an
"ack-with-that-change"?

Jan

>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>>  
>>  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>>  {
>> -    u16 domain_id, device_id, bdf, cword, flags;
>> +    u16 domain_id, device_id, bdf, flags;
>>      u32 code;
>>      u64 *addr;
>>      int count = 0;
>> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
>>                 "fault address = %#"PRIx64", flags = %#x\n",
>>                 event_str[code-1], domain_id, device_id, *addr, flags);
>>  
>> -        /* Tell the device to stop DMAing; we can't rely on the guest to
>> -         * control it for us. */
>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>> -            {
>> -                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
>> -                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
>> -                                        PCI_COMMAND);
>> -                pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
>> -                                 PCI_FUNC(bdf), PCI_COMMAND, 
>> -                                 cword & ~PCI_COMMAND_MASTER);
>> -            }
>> +                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>> +                                         PCI_DEVFN2(bdf));
>>      }
>>      else
>>      {
>> --- a/xen/drivers/passthrough/iommu.c
>> +++ b/xen/drivers/passthrough/iommu.c
>> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
>>  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
>>  {
>>      struct hvm_iommu *hd = domain_hvm_iommu(d);
>> +    struct pci_dev *pdev;
>>      int rc = 0;
>>  
>>      if ( !iommu_enabled || !hd->platform_ops )
>> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
>>          return -EXDEV;
>>  
>>      spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
>> +        pdev->fault.count     = 0;
>> +    }
>> +
>>      if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
>>          goto done;
>>  
>> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
>>          goto done;
>>      }
>>  done:
>> +    if ( pdev && pdev->domain != d )
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      spin_unlock(&pcidevs_lock);
>>      return rc;
>>  }
>> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
>>          return ret;
>>      }
>>  
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>> +    pdev->fault.count     = 0;
>> +
>>      if ( !has_arch_pdevs(d) && need_iommu(d) )
>>      {
>>          d->need_iommu = 0;
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
>>      *((u8*) &pdev->bus) = bus;
>>      *((u8*) &pdev->devfn) = devfn;
>>      pdev->domain = NULL;
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      INIT_LIST_HEAD(&pdev->msi_list);
>>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
>>      spin_lock_init(&pdev->msix_table_lock);
>> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
>>      return 1;
>>  }
>>  
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
>> +{
>> +    struct pci_dev *pdev;
>> +    s_time_t now = NOW();
>> +    u16 cword;
>> +
>> +    spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        if ( now < pdev->fault.time ||
>> +             now - pdev->fault.time > MILLISECS(10) )
>> +            pdev->fault.count >>= 1;
>> +        pdev->fault.time = now;
>> +        if ( ++pdev->fault.count < pdev->fault.threshold )
>> +            pdev = NULL;
>> +    }
>> +    spin_unlock(&pcidevs_lock);
>> +
>> +    if ( !pdev )
>> +        return;
>> +
>> +    /* Tell the device to stop DMAing; we can't rely on the guest to
>> +     * control it for us. */
>> +    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
>> +                            PCI_COMMAND);
>> +    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
>> +                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
>> +}
>> +
>>  /*
>>   * scan pci devices to add all existed PCI devices to alldevs_list,
>>   * and setup pci hierarchy in array bus2bridge.
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
>>      while (1)
>>      {
>>          u8 fault_reason;
>> -        u16 source_id, cword;
>> +        u16 source_id;
>>          u32 data;
>>          u64 guest_addr;
>>          int type;
>> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
>>          iommu_page_fault_do_one(iommu, type, fault_reason,
>>                                  source_id, guest_addr);
>>  
>> -        /* Tell the device to stop DMAing; we can't rely on the guest to
>> -         * control it for us. */
>> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
>> -                                PCI_BUS(source_id), PCI_SLOT(source_id),
>> -                                PCI_FUNC(source_id), PCI_COMMAND);
>> -        pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
>> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
>> -                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
>> +        pci_check_disable_device(iommu->intel->drhd->segment,
>> +                                 PCI_BUS(source_id), 
> PCI_DEVFN2(source_id));
>>  
>>          fault_index++;
>>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
>> --- a/xen/include/xen/pci.h
>> +++ b/xen/include/xen/pci.h
>> @@ -64,6 +64,13 @@ struct pci_dev {
>>      const u8 devfn;
>>      struct pci_dev_info info;
>>      struct arch_pci_dev arch;
>> +    struct {
>> +        s_time_t time;
>> +        unsigned int threshold;
>> +#define PT_FAULT_THRESHOLD_PRIV  10
>> +#define PT_FAULT_THRESHOLD_UNPRIV 3
>> +        unsigned int count;
>> +    } fault;
>>      u64 vf_rlen[6];
>>  };
>>  
>> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
>>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>>  struct pci_dev *pci_get_pdev_by_domain(
>>      struct domain *, int seg, int bus, int devfn);
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>>  
>>  uint8_t pci_conf_read8(
>>      unsigned int seg, unsigned int bus, unsigned int dev, unsigned int 
> func,
>> 
>> 
> 
>> IOMMU: don't immediately disable bus mastering on faults
>> 
>> Instead, give the owning domain at least a small opportunity of fixing
>> things up, and allow for rare faults to not bring down the device at
>> all. The amount of faults tolerated within a given time period (all
>> numbers are made up with no specific rationale) is higher for Dom0 than
>> for DomU-s.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>> 
>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>>  
>>  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>>  {
>> -    u16 domain_id, device_id, bdf, cword, flags;
>> +    u16 domain_id, device_id, bdf, flags;
>>      u32 code;
>>      u64 *addr;
>>      int count = 0;
>> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
>>                 "fault address = %#"PRIx64", flags = %#x\n",
>>                 event_str[code-1], domain_id, device_id, *addr, flags);
>>  
>> -        /* Tell the device to stop DMAing; we can't rely on the guest to
>> -         * control it for us. */
>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>> -            {
>> -                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
>> -                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
>> -                                        PCI_COMMAND);
>> -                pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
>> -                                 PCI_FUNC(bdf), PCI_COMMAND, 
>> -                                 cword & ~PCI_COMMAND_MASTER);
>> -            }
>> +                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>> +                                         PCI_DEVFN2(bdf));
>>      }
>>      else
>>      {
>> --- a/xen/drivers/passthrough/iommu.c
>> +++ b/xen/drivers/passthrough/iommu.c
>> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
>>  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
>>  {
>>      struct hvm_iommu *hd = domain_hvm_iommu(d);
>> +    struct pci_dev *pdev;
>>      int rc = 0;
>>  
>>      if ( !iommu_enabled || !hd->platform_ops )
>> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
>>          return -EXDEV;
>>  
>>      spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
>> +        pdev->fault.count     = 0;
>> +    }
>> +
>>      if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
>>          goto done;
>>  
>> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
>>          goto done;
>>      }
>>  done:
>> +    if ( pdev && pdev->domain != d )
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      spin_unlock(&pcidevs_lock);
>>      return rc;
>>  }
>> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
>>          return ret;
>>      }
>>  
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>> +    pdev->fault.count     = 0;
>> +
>>      if ( !has_arch_pdevs(d) && need_iommu(d) )
>>      {
>>          d->need_iommu = 0;
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
>>      *((u8*) &pdev->bus) = bus;
>>      *((u8*) &pdev->devfn) = devfn;
>>      pdev->domain = NULL;
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      INIT_LIST_HEAD(&pdev->msi_list);
>>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
>>      spin_lock_init(&pdev->msix_table_lock);
>> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
>>      return 1;
>>  }
>>  
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
>> +{
>> +    struct pci_dev *pdev;
>> +    s_time_t now = NOW();
>> +    u16 cword;
>> +
>> +    spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        if ( now < pdev->fault.time ||
>> +             now - pdev->fault.time > MILLISECS(10) )
>> +            pdev->fault.count >>= 1;
>> +        pdev->fault.time = now;
>> +        if ( ++pdev->fault.count < pdev->fault.threshold )
>> +            pdev = NULL;
>> +    }
>> +    spin_unlock(&pcidevs_lock);
>> +
>> +    if ( !pdev )
>> +        return;
>> +
>> +    /* Tell the device to stop DMAing; we can't rely on the guest to
>> +     * control it for us. */
>> +    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
>> +                            PCI_COMMAND);
>> +    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
>> +                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
>> +}
>> +
>>  /*
>>   * scan pci devices to add all existed PCI devices to alldevs_list,
>>   * and setup pci hierarchy in array bus2bridge.
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
>>      while (1)
>>      {
>>          u8 fault_reason;
>> -        u16 source_id, cword;
>> +        u16 source_id;
>>          u32 data;
>>          u64 guest_addr;
>>          int type;
>> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
>>          iommu_page_fault_do_one(iommu, type, fault_reason,
>>                                  source_id, guest_addr);
>>  
>> -        /* Tell the device to stop DMAing; we can't rely on the guest to
>> -         * control it for us. */
>> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
>> -                                PCI_BUS(source_id), PCI_SLOT(source_id),
>> -                                PCI_FUNC(source_id), PCI_COMMAND);
>> -        pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
>> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
>> -                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
>> +        pci_check_disable_device(iommu->intel->drhd->segment,
>> +                                 PCI_BUS(source_id), 
> PCI_DEVFN2(source_id));
>>  
>>          fault_index++;
>>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
>> --- a/xen/include/xen/pci.h
>> +++ b/xen/include/xen/pci.h
>> @@ -64,6 +64,13 @@ struct pci_dev {
>>      const u8 devfn;
>>      struct pci_dev_info info;
>>      struct arch_pci_dev arch;
>> +    struct {
>> +        s_time_t time;
>> +        unsigned int threshold;
>> +#define PT_FAULT_THRESHOLD_PRIV  10
>> +#define PT_FAULT_THRESHOLD_UNPRIV 3
>> +        unsigned int count;
>> +    } fault;
>>      u64 vf_rlen[6];
>>  };
>>  
>> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
>>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>>  struct pci_dev *pci_get_pdev_by_domain(
>>      struct domain *, int seg, int bus, int devfn);
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>>  
>>  uint8_t pci_conf_read8(
>>      unsigned int seg, unsigned int bus, unsigned int dev, unsigned int 
> func,



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.