[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."



Well, this *is* the correct fix to the original problem.  It's likely
then that there's either a bug in a BIOS somewhere, or a bug in the
per-device intremap table code that was hidden by global intremap
being the default.

However, from a practical perspective, if Wei doesn't have time to
work on it, I may not for several weeks; during which time, I think
reverting this patch to un-break testing for everyone else makes
sense.

This bug will likely be affecting our upcoming XenServer as well, so
let me see if I can prioritize working on it.

 -George

On Tue, Aug 16, 2011 at 12:09 PM, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> wrote:
> 23733:fbf3768e5934 causes xen-unstable not to boot on several of the
> xen.org AMD test systems.  We get an endless series of these:
>
>  (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0x00a0, fault address = 
> 0xfdf8f10144
>
> I have constructed the attached patch which reverts c/s 23733
> (adjusted for conflicts due to subsequent patches).  With this
> reversion Xen once more boots on these machines.
>
> 23733 has been in the tree for some time now, causing this breakage,
> and has already been fingered by the automatic bisector and discussed
> on xen-devel as the cause of boot failures.  I think it is now time to
> revert it pending a correct fix to the original problem.
>
> Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
>
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_acpi.c
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c  Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c  Tue Aug 16 11:57:01 2011 +0100
> @@ -66,8 +66,15 @@ static void __init add_ivrs_mapping_entr
>     if (ivrs_mappings[alias_id].intremap_table == NULL )
>     {
>          /* allocate per-device interrupt remapping table */
> -         ivrs_mappings[alias_id].intremap_table =
> +         if ( amd_iommu_perdev_intremap )
> +             ivrs_mappings[alias_id].intremap_table =
>                 amd_iommu_alloc_intremap_table();
> +         else
> +         {
> +             if ( shared_intremap_table == NULL  )
> +                 shared_intremap_table = amd_iommu_alloc_intremap_table();
> +             ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
> +         }
>     }
>     /* assgin iommu hardware */
>     ivrs_mappings[bdf].iommu = iommu;
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_init.c
> --- a/xen/drivers/passthrough/amd/iommu_init.c  Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_init.c  Tue Aug 16 11:57:01 2011 +0100
> @@ -500,7 +500,7 @@ static void parse_event_log_entry(u32 en
>         /* Tell the device to stop DMAing; we can't rely on the guest to
>          * control it for us. */
>         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
> -            if ( get_requestor_id(bdf) == device_id )
> +            if ( get_dma_requestor_id(bdf) == device_id )
>             {
>                 cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf),
>                                 PCI_FUNC(bdf), PCI_COMMAND);
> @@ -772,7 +772,8 @@ static int __init init_ivrs_mapping(void
>         ivrs_mappings[bdf].dte_ext_int_pass = IOMMU_CONTROL_DISABLED;
>         ivrs_mappings[bdf].dte_init_pass = IOMMU_CONTROL_DISABLED;
>
> -        spin_lock_init(&ivrs_mappings[bdf].intremap_lock);
> +        if ( amd_iommu_perdev_intremap )
> +            spin_lock_init(&ivrs_mappings[bdf].intremap_lock);
>     }
>     return 0;
>  }
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_intr.c
> --- a/xen/drivers/passthrough/amd/iommu_intr.c  Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c  Tue Aug 16 11:57:01 2011 +0100
> @@ -28,10 +28,20 @@
>  #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
>
>  int ioapic_bdf[MAX_IO_APICS];
> +void *shared_intremap_table;
> +static DEFINE_SPINLOCK(shared_intremap_lock);
>
>  static spinlock_t* get_intremap_lock(int req_id)
>  {
> -    return &ivrs_mappings[req_id].intremap_lock;
> +    return (amd_iommu_perdev_intremap ?
> +           &ivrs_mappings[req_id].intremap_lock:
> +           &shared_intremap_lock);
> +}
> +
> +static int get_intremap_requestor_id(int bdf)
> +{
> +    ASSERT( bdf < ivrs_bdf_entries );
> +    return ivrs_mappings[bdf].dte_requestor_id;
>  }
>
>  static int get_intremap_offset(u8 vector, u8 dm)
> @@ -115,7 +125,7 @@ static void update_intremap_entry_from_i
>     spinlock_t *lock;
>     int offset;
>
> -    req_id = get_requestor_id(bdf);
> +    req_id = get_intremap_requestor_id(bdf);
>     lock = get_intremap_lock(req_id);
>
>     delivery_mode = rte->delivery_mode;
> @@ -173,7 +183,7 @@ int __init amd_iommu_setup_ioapic_remapp
>                 continue;
>             }
>
> -            req_id = get_requestor_id(bdf);
> +            req_id = get_intremap_requestor_id(bdf);
>             lock = get_intremap_lock(req_id);
>
>             delivery_mode = rte.delivery_mode;
> @@ -273,13 +283,14 @@ static void update_intremap_entry_from_m
>  {
>     unsigned long flags;
>     u32* entry;
> -    u16 bdf, req_id;
> +    u16 bdf, req_id, alias_id;
>     u8 delivery_mode, dest, vector, dest_mode;
>     spinlock_t *lock;
>     int offset;
>
>     bdf = (pdev->bus << 8) | pdev->devfn;
> -    req_id = get_requestor_id(bdf);
> +    req_id = get_dma_requestor_id(bdf);
> +    alias_id = get_intremap_requestor_id(bdf);
>
>     if ( msg == NULL )
>     {
> @@ -288,6 +299,14 @@ static void update_intremap_entry_from_m
>         free_intremap_entry(req_id, msi_desc->remap_index);
>         spin_unlock_irqrestore(lock, flags);
>
> +        if ( ( req_id != alias_id ) &&
> +            ivrs_mappings[alias_id].intremap_table != NULL )
> +        {
> +            lock = get_intremap_lock(alias_id);
> +            spin_lock_irqsave(lock, flags);
> +            free_intremap_entry(alias_id, msi_desc->remap_index);
> +            spin_unlock_irqrestore(lock, flags);
> +        }
>         goto done;
>     }
>
> @@ -305,11 +324,30 @@ static void update_intremap_entry_from_m
>     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
>     spin_unlock_irqrestore(lock, flags);
>
> +    /*
> +     * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
> +     * will use alias id to index interrupt remapping table.
> +     * We have to setup a secondary interrupt remapping entry to satisfy 
> those
> +     * devices.
> +     */
> +
> +    lock = get_intremap_lock(alias_id);
> +    if ( ( req_id != alias_id ) &&
> +        ivrs_mappings[alias_id].intremap_table != NULL )
> +    {
> +        spin_lock_irqsave(lock, flags);
> +        entry = (u32*)get_intremap_entry(alias_id, offset);
> +        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
> +        spin_unlock_irqrestore(lock, flags);
> +    }
> +
>  done:
>     if ( iommu->enabled )
>     {
>         spin_lock_irqsave(&iommu->lock, flags);
>         invalidate_interrupt_table(iommu, req_id);
> +        if ( alias_id != req_id )
> +            invalidate_interrupt_table(iommu, alias_id);
>         flush_command_buffer(iommu);
>         spin_unlock_irqrestore(&iommu->lock, flags);
>     }
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_map.c
> --- a/xen/drivers/passthrough/amd/iommu_map.c   Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_map.c   Tue Aug 16 11:57:01 2011 +0100
> @@ -543,7 +543,7 @@ static int update_paging_mode(struct dom
>         for_each_pdev( d, pdev )
>         {
>             bdf = (pdev->bus << 8) | pdev->devfn;
> -            req_id = get_requestor_id(bdf);
> +            req_id = get_dma_requestor_id(bdf);
>             iommu = find_iommu_for_device(bdf);
>             if ( !iommu )
>             {
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/pci_amd_iommu.c
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Sat Aug 13 10:14:58 
> 2011 +0100
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Tue Aug 16 11:57:01 
> 2011 +0100
> @@ -34,10 +34,25 @@ struct amd_iommu *find_iommu_for_device(
>     return ivrs_mappings[bdf].iommu;
>  }
>
> -int get_requestor_id(u16 bdf)
> +/*
> + * Some devices will use alias id and original device id to index interrupt
> + * table and I/O page table respectively. Such devices will have
> + * both alias entry and select entry in IVRS structure.
> + *
> + * Return original device id, if device has valid interrupt remapping
> + * table setup for both select entry and alias entry.
> + */
> +int get_dma_requestor_id(u16 bdf)
>  {
> +    int req_id;
> +
>     BUG_ON ( bdf >= ivrs_bdf_entries );
> -    return ivrs_mappings[bdf].dte_requestor_id;
> +    req_id = ivrs_mappings[bdf].dte_requestor_id;
> +    if ( (ivrs_mappings[bdf].intremap_table != NULL) &&
> +         (ivrs_mappings[req_id].intremap_table != NULL) )
> +        req_id = bdf;
> +
> +    return req_id;
>  }
>
>  static int is_translation_valid(u32 *entry)
> @@ -79,7 +94,7 @@ static void amd_iommu_setup_domain_devic
>         valid = 0;
>
>     /* get device-table entry */
> -    req_id = get_requestor_id(bdf);
> +    req_id = get_dma_requestor_id(bdf);
>     dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
>
>     spin_lock_irqsave(&iommu->lock, flags);
> @@ -252,7 +267,7 @@ static void amd_iommu_disable_domain_dev
>     int req_id;
>
>     BUG_ON ( iommu->dev_table.buffer == NULL );
> -    req_id = get_requestor_id(bdf);
> +    req_id = get_dma_requestor_id(bdf);
>     dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
>
>     spin_lock_irqsave(&iommu->lock, flags);
> @@ -314,7 +329,7 @@ static int amd_iommu_assign_device(struc
>  static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
>  {
>     int bdf = (bus << 8) | devfn;
> -    int req_id = get_requestor_id(bdf);
> +    int req_id = get_dma_requestor_id(bdf);
>
>     if ( ivrs_mappings[req_id].unity_map_enable )
>     {
> @@ -433,7 +448,7 @@ static int amd_iommu_group_id(u8 bus, u8
>     int rt;
>     int bdf = (bus << 8) | devfn;
>     rt = ( bdf < ivrs_bdf_entries ) ?
> -        get_requestor_id(bdf) :
> +        get_dma_requestor_id(bdf) :
>         bdf;
>     return rt;
>  }
> diff -r 8d6edc3d26d2 xen/drivers/passthrough/iommu.c
> --- a/xen/drivers/passthrough/iommu.c   Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/drivers/passthrough/iommu.c   Tue Aug 16 11:57:01 2011 +0100
> @@ -50,6 +50,7 @@ bool_t __read_mostly iommu_hap_pt_share;
>  bool_t __read_mostly iommu_hap_pt_share;
>  bool_t __read_mostly iommu_debug;
>  bool_t __read_mostly iommu_amd_perdev_vector_map = 1;
> +bool_t __read_mostly amd_iommu_perdev_intremap;
>
>  static void __init parse_iommu_param(char *s)
>  {
> @@ -76,6 +77,8 @@ static void __init parse_iommu_param(cha
>             iommu_intremap = 0;
>         else if ( !strcmp(s, "debug") )
>             iommu_debug = 1;
> +        else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
> +            amd_iommu_perdev_intremap = 1;
>         else if ( !strcmp(s, "dom0-passthrough") )
>             iommu_passthrough = 1;
>         else if ( !strcmp(s, "dom0-strict") )
> diff -r 8d6edc3d26d2 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Sat Aug 13 10:14:58 
> 2011 +0100
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Tue Aug 16 11:57:01 
> 2011 +0100
> @@ -65,7 +65,7 @@ void amd_iommu_share_p2m(struct domain *
>  void amd_iommu_share_p2m(struct domain *d);
>
>  /* device table functions */
> -int get_requestor_id(u16 bdf);
> +int get_dma_requestor_id(u16 bdf);
>  void amd_iommu_add_dev_table_entry(
>     u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass,
>     u8 nmi_pass, u8 ext_int_pass, u8 init_pass);
> @@ -97,6 +97,7 @@ unsigned int amd_iommu_read_ioapic_from_
>     unsigned int apic, unsigned int reg);
>
>  extern int ioapic_bdf[MAX_IO_APICS];
> +extern void *shared_intremap_table;
>
>  /* power management support */
>  void amd_iommu_resume(void);
> diff -r 8d6edc3d26d2 xen/include/xen/iommu.h
> --- a/xen/include/xen/iommu.h   Sat Aug 13 10:14:58 2011 +0100
> +++ b/xen/include/xen/iommu.h   Tue Aug 16 11:57:01 2011 +0100
> @@ -32,6 +32,7 @@ extern bool_t iommu_snoop, iommu_qinval,
>  extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
>  extern bool_t iommu_hap_pt_share;
>  extern bool_t iommu_debug;
> +extern bool_t amd_iommu_perdev_intremap;
>
>  extern struct rangeset *mmio_ro_ranges;
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.