|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 05/12] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
On Thu, Jul 25, 2019 at 01:31:02PM +0000, Jan Beulich wrote:
> This is in preparation of actually enabling x2APIC mode, which requires
> this wider IRTE format to be used.
>
> A specific remark regarding the first hunk changing
> amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
> i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
> tables when creating new one"). Other code introduced by that change has
> meanwhile disappeared or further changed, and I wonder if - rather than
> adding an x2apic_enabled check to the conditional - the bypass couldn't
> be deleted altogether. For now the goal is to affect the non-x2APIC
> paths as little as possible.
>
> Take the liberty and use the new "fresh" flag to suppress an unneeded
> flush in update_intremap_entry_from_ioapic().
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Brian Woods <brian.woods@xxxxxxx>
> ---
> v4: Re-base. Do away with standalone struct irte_full. Use smp_wmb().
> v3: Avoid unrelated type changes in update_intremap_entry_from_ioapic().
> Drop irte_mode enum and variable. Convert INTREMAP_TABLE_ORDER into
> a static helper. Comment barrier() uses. Switch boolean bitfields to
> bool.
> v2: Add cast in get_full_dest(). Re-base over changes earlier in the
> series. Don't use cmpxchg16b. Use barrier() instead of wmb().
> ---
> Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
> mode is unavailable in x2APIC mode, but they've confirmed this to be a
> mistake on their part.
>
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -39,12 +39,36 @@ union irte32 {
> } flds;
> };
>
> +union irte128 {
> + uint64_t raw[2];
> + struct {
> + bool remap_en:1;
> + bool sup_io_pf:1;
> + unsigned int int_type:3;
> + bool rq_eoi:1;
> + bool dm:1;
> + bool guest_mode:1; /* MBZ */
> + unsigned int dest_lo:24;
> + unsigned int :32;
> + unsigned int vector:8;
> + unsigned int :24;
> + unsigned int :24;
> + unsigned int dest_hi:8;
> + } full;
> +};
> +
> union irte_ptr {
> void *ptr;
> union irte32 *ptr32;
> + union irte128 *ptr128;
> };
>
> -#define INTREMAP_TABLE_ORDER 1
> +union irte_cptr {
> + const void *ptr;
> + const union irte32 *ptr32;
> + const union irte128 *ptr128;
> +} __transparent__;
> +
> #define INTREMAP_LENGTH 0xB
> #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
>
> @@ -57,6 +81,13 @@ unsigned int nr_ioapic_sbdf;
>
> static void dump_intremap_tables(unsigned char key);
>
> +static unsigned int __init intremap_table_order(const struct amd_iommu
> *iommu)
> +{
> + return iommu->ctrl.ga_en
> + ? get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte128))
> + : get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte32));
> +}
> +
> unsigned int ioapic_id_to_index(unsigned int apic_id)
> {
> unsigned int idx;
> @@ -131,7 +162,10 @@ static union irte_ptr get_intremap_entry
>
> ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
>
> - table.ptr32 += index;
> + if ( iommu->ctrl.ga_en )
> + table.ptr128 += index;
> + else
> + table.ptr32 += index;
>
> return table;
> }
> @@ -141,7 +175,22 @@ static void free_intremap_entry(const st
> {
> union irte_ptr entry = get_intremap_entry(iommu, bdf, index);
>
> - ACCESS_ONCE(entry.ptr32->raw) = 0;
> + if ( iommu->ctrl.ga_en )
> + {
> + ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
> + /*
> + * Low half (containing RemapEn) needs to be cleared first. Note
> that
> + * strictly speaking smp_wmb() isn't enough, as conceptually it
> expands
> + * to just barrier() when !CONFIG_SMP. But wmb() would be more than
> we
> + * need, since the IOMMU is a cache-coherent entity on the bus. And
> + * given that we don't allow CONFIG_SMP to be turned off, the SMP
> + * variant will do.
> + */
> + smp_wmb();
> + entry.ptr128->raw[1] = 0;
> + }
> + else
> + ACCESS_ONCE(entry.ptr32->raw) = 0;
>
> __clear_bit(index, get_ivrs_mappings(iommu->seg)[bdf].intremap_inuse);
> }
> @@ -151,17 +200,44 @@ static void update_intremap_entry(const
> unsigned int vector, unsigned int
> int_type,
> unsigned int dest_mode, unsigned int dest)
> {
> - union irte32 irte = {
> - .flds = {
> - .remap_en = true,
> - .int_type = int_type,
> - .dm = dest_mode,
> - .dest = dest,
> - .vector = vector,
> - },
> - };
> + if ( iommu->ctrl.ga_en )
> + {
> + union irte128 irte = {
> + .full = {
> + .remap_en = true,
> + .int_type = int_type,
> + .dm = dest_mode,
> + .dest_lo = dest,
> + .dest_hi = dest >> 24,
> + .vector = vector,
> + },
> + };
> +
> + ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
> + /*
> + * Low half, in particular RemapEn, needs to be cleared first. See
> + * comment in free_intremap_entry() regarding the choice of barrier.
> + */
> + smp_wmb();
> + entry.ptr128->raw[1] = irte.raw[1];
> + /* High half needs to be set before low one (containing RemapEn). */
> + smp_wmb();
> + ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
> + }
> + else
> + {
> + union irte32 irte = {
> + .flds = {
> + .remap_en = true,
> + .int_type = int_type,
> + .dm = dest_mode,
> + .dest = dest,
> + .vector = vector,
> + },
> + };
>
> - ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
> + ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
> + }
> }
>
> static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
> @@ -175,6 +251,11 @@ static inline void set_rte_index(struct
> rte->delivery_mode = offset >> 8;
> }
>
> +static inline unsigned int get_full_dest(const union irte128 *entry)
> +{
> + return entry->full.dest_lo | ((unsigned int)entry->full.dest_hi << 24);
> +}
> +
> static int update_intremap_entry_from_ioapic(
> int bdf,
> struct amd_iommu *iommu,
> @@ -184,10 +265,11 @@ static int update_intremap_entry_from_io
> {
> unsigned long flags;
> union irte_ptr entry;
> - u8 delivery_mode, dest, vector, dest_mode;
> + uint8_t delivery_mode, vector, dest_mode;
> int req_id;
> spinlock_t *lock;
> - unsigned int offset;
> + unsigned int dest, offset;
> + bool fresh = false;
>
> req_id = get_intremap_requestor_id(iommu->seg, bdf);
> lock = get_intremap_lock(iommu->seg, req_id);
> @@ -195,7 +277,7 @@ static int update_intremap_entry_from_io
> delivery_mode = rte->delivery_mode;
> vector = rte->vector;
> dest_mode = rte->dest_mode;
> - dest = rte->dest.logical.logical_dest;
> + dest = x2apic_enabled ? rte->dest.dest32 :
> rte->dest.logical.logical_dest;
>
> spin_lock_irqsave(lock, flags);
>
> @@ -210,25 +292,40 @@ static int update_intremap_entry_from_io
> return -ENOSPC;
> }
> *index = offset;
> - lo_update = 1;
> + fresh = true;
> }
>
> entry = get_intremap_entry(iommu, req_id, offset);
> - if ( !lo_update )
> + if ( fresh )
> + /* nothing */;
> + else if ( !lo_update )
> {
> /*
> * Low half of incoming RTE is already in remapped format,
> * so need to recover vector and delivery mode from IRTE.
> */
> ASSERT(get_rte_index(rte) == offset);
> - vector = entry.ptr32->flds.vector;
> + if ( iommu->ctrl.ga_en )
> + vector = entry.ptr128->full.vector;
> + else
> + vector = entry.ptr32->flds.vector;
> + /* The IntType fields match for both formats. */
> delivery_mode = entry.ptr32->flds.int_type;
> }
> + else if ( x2apic_enabled )
> + {
> + /*
> + * High half of incoming RTE was read from the I/O APIC and hence may
> + * not hold the full destination, so need to recover full destination
> + * from IRTE.
> + */
> + dest = get_full_dest(entry.ptr128);
> + }
> update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode,
> dest);
>
> spin_unlock_irqrestore(lock, flags);
>
> - if ( iommu->enabled )
> + if ( iommu->enabled && !fresh )
> {
> spin_lock_irqsave(&iommu->lock, flags);
> amd_iommu_flush_intremap(iommu, req_id);
> @@ -286,6 +383,18 @@ int __init amd_iommu_setup_ioapic_remapp
> dest_mode = rte.dest_mode;
> dest = rte.dest.logical.logical_dest;
>
> + if ( iommu->ctrl.xt_en )
> + {
> + /*
> + * In x2APIC mode we have no way of discovering the high 24
> + * bits of the destination of an already enabled interrupt.
> + * We come here earlier than for xAPIC mode, so no interrupts
> + * should have been set up before.
> + */
> + AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC
> mode\n",
> + IO_APIC_ID(apic), pin);
> + }
> +
> spin_lock_irqsave(lock, flags);
> offset = alloc_intremap_entry(seg, req_id, 1);
> BUG_ON(offset >= INTREMAP_ENTRIES);
> @@ -320,7 +429,8 @@ void amd_iommu_ioapic_update_ire(
> struct IO_APIC_route_entry new_rte = { 0 };
> unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
> unsigned int pin = (reg - 0x10) / 2;
> - int saved_mask, seg, bdf, rc;
> + int seg, bdf, rc;
> + bool saved_mask, fresh = false;
> struct amd_iommu *iommu;
> unsigned int idx;
>
> @@ -362,12 +472,22 @@ void amd_iommu_ioapic_update_ire(
> *(((u32 *)&new_rte) + 1) = value;
> }
>
> - if ( new_rte.mask &&
> - ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
> + if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
> {
> ASSERT(saved_mask);
> - __io_apic_write(apic, reg, value);
> - return;
> +
> + /*
> + * There's nowhere except the IRTE to store a full 32-bit
> destination,
> + * so we may not bypass entry allocation and updating of the low RTE
> + * half in the (usual) case of the high RTE half getting written
> first.
> + */
> + if ( new_rte.mask && !x2apic_enabled )
> + {
> + __io_apic_write(apic, reg, value);
> + return;
> + }
> +
> + fresh = true;
> }
>
> /* mask the interrupt while we change the intremap table */
> @@ -396,8 +516,12 @@ void amd_iommu_ioapic_update_ire(
> if ( reg == rte_lo )
> return;
>
> - /* unmask the interrupt after we have updated the intremap table */
> - if ( !saved_mask )
> + /*
> + * Unmask the interrupt after we have updated the intremap table. Also
> + * write the low half if a fresh entry was allocated for a high half
> + * update in x2APIC mode.
> + */
> + if ( !saved_mask || (x2apic_enabled && fresh) )
> {
> old_rte.mask = saved_mask;
> __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
> @@ -411,31 +535,40 @@ unsigned int amd_iommu_read_ioapic_from_
> unsigned int offset;
> unsigned int val = __io_apic_read(apic, reg);
> unsigned int pin = (reg - 0x10) / 2;
> + uint16_t seg, bdf, req_id;
> + const struct amd_iommu *iommu;
> + union irte_ptr entry;
>
> idx = ioapic_id_to_index(IO_APIC_ID(apic));
> if ( idx == MAX_IO_APICS )
> return val;
>
> offset = ioapic_sbdf[idx].pin_2_idx[pin];
> + if ( offset >= INTREMAP_ENTRIES )
> + return val;
>
> - if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
> - {
> - u16 bdf = ioapic_sbdf[idx].bdf;
> - u16 seg = ioapic_sbdf[idx].seg;
> - u16 req_id = get_intremap_requestor_id(seg, bdf);
> - const struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
> - union irte_ptr entry;
> + seg = ioapic_sbdf[idx].seg;
> + bdf = ioapic_sbdf[idx].bdf;
> + iommu = find_iommu_for_device(seg, bdf);
> + if ( !iommu )
> + return val;
> + req_id = get_intremap_requestor_id(seg, bdf);
> + entry = get_intremap_entry(iommu, req_id, offset);
>
> - if ( !iommu )
> - return val;
> + if ( !(reg & 1) )
> + {
> ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
> - entry = get_intremap_entry(iommu, req_id, offset);
> val &= ~(INTREMAP_ENTRIES - 1);
> + /* The IntType fields match for both formats. */
> val |= MASK_INSR(entry.ptr32->flds.int_type,
> IO_APIC_REDIR_DELIV_MODE_MASK);
> - val |= MASK_INSR(entry.ptr32->flds.vector,
> + val |= MASK_INSR(iommu->ctrl.ga_en
> + ? entry.ptr128->full.vector
> + : entry.ptr32->flds.vector,
> IO_APIC_REDIR_VECTOR_MASK);
> }
> + else if ( x2apic_enabled )
> + val = get_full_dest(entry.ptr128);
>
> return val;
> }
> @@ -447,9 +580,9 @@ static int update_intremap_entry_from_ms
> unsigned long flags;
> union irte_ptr entry;
> u16 req_id, alias_id;
> - u8 delivery_mode, dest, vector, dest_mode;
> + uint8_t delivery_mode, vector, dest_mode;
> spinlock_t *lock;
> - unsigned int offset, i;
> + unsigned int dest, offset, i;
>
> req_id = get_dma_requestor_id(iommu->seg, bdf);
> alias_id = get_intremap_requestor_id(iommu->seg, bdf);
> @@ -470,7 +603,12 @@ static int update_intremap_entry_from_ms
> dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
> delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
> vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
> - dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
> +
> + if ( x2apic_enabled )
> + dest = msg->dest32;
> + else
> + dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
> +
> offset = *remap_index;
> if ( offset >= INTREMAP_ENTRIES )
> {
> @@ -616,10 +754,21 @@ void amd_iommu_read_msi_from_ire(
> }
>
> msg->data &= ~(INTREMAP_ENTRIES - 1);
> + /* The IntType fields match for both formats. */
> msg->data |= MASK_INSR(entry.ptr32->flds.int_type,
> MSI_DATA_DELIVERY_MODE_MASK);
> - msg->data |= MASK_INSR(entry.ptr32->flds.vector,
> - MSI_DATA_VECTOR_MASK);
> + if ( iommu->ctrl.ga_en )
> + {
> + msg->data |= MASK_INSR(entry.ptr128->full.vector,
> + MSI_DATA_VECTOR_MASK);
> + msg->dest32 = get_full_dest(entry.ptr128);
> + }
> + else
> + {
> + msg->data |= MASK_INSR(entry.ptr32->flds.vector,
> + MSI_DATA_VECTOR_MASK);
> + msg->dest32 = entry.ptr32->flds.dest;
> + }
> }
>
> int __init amd_iommu_free_intremap_table(
> @@ -631,7 +780,7 @@ int __init amd_iommu_free_intremap_table
>
> if ( tb )
> {
> - __free_amd_iommu_tables(tb, INTREMAP_TABLE_ORDER);
> + __free_amd_iommu_tables(tb, intremap_table_order(iommu));
> ivrs_mapping->intremap_table = NULL;
> }
>
> @@ -641,10 +790,10 @@ int __init amd_iommu_free_intremap_table
> void *__init amd_iommu_alloc_intremap_table(
> const struct amd_iommu *iommu, unsigned long **inuse_map)
> {
> - void *tb;
> - tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
> + void *tb = __alloc_amd_iommu_tables(intremap_table_order(iommu));
> +
> BUG_ON(tb == NULL);
> - memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
> + memset(tb, 0, PAGE_SIZE << intremap_table_order(iommu));
> *inuse_map = xzalloc_array(unsigned long,
> BITS_TO_LONGS(INTREMAP_ENTRIES));
> BUG_ON(*inuse_map == NULL);
> return tb;
> @@ -685,18 +834,29 @@ int __init amd_setup_hpet_msi(struct msi
> return rc;
> }
>
> -static void dump_intremap_table(const u32 *table)
> +static void dump_intremap_table(const struct amd_iommu *iommu,
> + union irte_cptr tbl)
> {
> - u32 count;
> + unsigned int count;
>
> - if ( !table )
> + if ( !tbl.ptr )
> return;
>
> for ( count = 0; count < INTREMAP_ENTRIES; count++ )
> {
> - if ( !table[count] )
> - continue;
> - printk(" IRTE[%03x] %08x\n", count, table[count]);
> + if ( iommu->ctrl.ga_en )
> + {
> + if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
> + continue;
> + printk(" IRTE[%03x] %016lx_%016lx\n",
> + count, tbl.ptr128[count].raw[1],
> tbl.ptr128[count].raw[0]);
> + }
> + else
> + {
> + if ( !tbl.ptr32[count].raw )
> + continue;
> + printk(" IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
> + }
> }
> }
>
> @@ -714,7 +874,7 @@ static int dump_intremap_mapping(const s
> PCI_FUNC(ivrs_mapping->dte_requestor_id));
>
> spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
> - dump_intremap_table(ivrs_mapping->intremap_table);
> + dump_intremap_table(iommu, ivrs_mapping->intremap_table);
> spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
>
> process_pending_softirqs();
> @@ -733,6 +893,8 @@ static void dump_intremap_tables(unsigne
> printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
>
> spin_lock_irqsave(&shared_intremap_lock, flags);
> - dump_intremap_table(shared_intremap_table);
> + dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
> + list),
> + shared_intremap_table);
> spin_unlock_irqrestore(&shared_intremap_lock, flags);
> }
>
--
Brian Woods
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |