[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 05/10] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
This is in preparation of actually enabling x2APIC mode, which requires this wider IRTE format to be used. A specific remark regarding the first hunk changing amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36, i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping tables when creating new one"). Other code introduced by that change has meanwhile disappeared or further changed, and I wonder if - rather than adding an x2apic_enabled check to the conditional - the bypass couldn't be deleted altogether. For now the goal is to affect the non-x2APIC paths as little as possible. Take the liberty and use the new "fresh" flag to suppress an unneeded flush in update_intremap_entry_from_ioapic(). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v2: Add cast in get_full_dest(). Re-base over changes earlier in the series. Don't use cmpxchg16b. Use barrier() instead of wmb(). --- Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming) mode is unavailable in x2APIC mode, but they've confirmed this to be a mistake on their part. --- a/xen/drivers/passthrough/amd/iommu_intr.c +++ b/xen/drivers/passthrough/amd/iommu_intr.c @@ -40,12 +40,45 @@ union irte32 { struct irte_basic basic; }; +struct irte_full { + unsigned int remap_en:1; + unsigned int sup_io_pf:1; + unsigned int int_type:3; + unsigned int rq_eoi:1; + unsigned int dm:1; + unsigned int guest_mode:1; /* MBZ */ + unsigned int dest_lo:24; + unsigned int :32; + unsigned int vector:8; + unsigned int :24; + unsigned int :24; + unsigned int dest_hi:8; +}; + +union irte128 { + uint64_t raw[2]; + struct irte_full full; +}; + +static enum { + irte32, + irte128, + irteUNK, +} irte_mode __read_mostly = irteUNK; + union irte_ptr { void *ptr; union irte32 *ptr32; + union irte128 *ptr128; }; -#define INTREMAP_TABLE_ORDER 1 +union irte_cptr { + const void *ptr; + const union irte32 *ptr32; + const union irte128 *ptr128; +} __transparent__; + +#define INTREMAP_TABLE_ORDER (irte_mode == irte32 ? 1 : 3) #define INTREMAP_LENGTH 0xB #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH) @@ -132,7 +165,19 @@ static union irte_ptr get_intremap_entry ASSERT(table.ptr && (index < INTREMAP_ENTRIES)); - table.ptr32 += index; + switch ( irte_mode ) + { + case irte32: + table.ptr32 += index; + break; + + case irte128: + table.ptr128 += index; + break; + + default: + ASSERT_UNREACHABLE(); + } return table; } @@ -142,7 +187,21 @@ static void free_intremap_entry(unsigned { union irte_ptr entry = get_intremap_entry(seg, bdf, index); - ACCESS_ONCE(entry.ptr32->raw[0]) = 0; + switch ( irte_mode ) + { + case irte32: + ACCESS_ONCE(entry.ptr32->raw[0]) = 0; + break; + + case irte128: + ACCESS_ONCE(entry.ptr128->raw[0]) = 0; + barrier(); + entry.ptr128->raw[1] = 0; + break; + + default: + ASSERT_UNREACHABLE(); + } __clear_bit(index, get_ivrs_mappings(seg)[bdf].intremap_inuse); } @@ -160,9 +219,37 @@ static void update_intremap_entry(union .dest = dest, .vector = vector, }; + struct irte_full full = { + .remap_en = 1, + .sup_io_pf = 0, + .int_type = int_type, + .rq_eoi = 0, + .dm = dest_mode, + .dest_lo = dest, + .dest_hi = dest >> 24, + .vector = vector, + }; + + switch ( irte_mode ) + { + case irte32: + ACCESS_ONCE(entry.ptr32->raw[0]) = + container_of(&basic, union irte32, basic)->raw[0]; + break; + + case irte128: + ACCESS_ONCE(entry.ptr128->raw[0]) = 0; + barrier(); + entry.ptr128->raw[1] = + container_of(&full, union irte128, full)->raw[1]; + barrier(); + ACCESS_ONCE(entry.ptr128->raw[0]) = + container_of(&full, union irte128, full)->raw[0]; + break; - ACCESS_ONCE(entry.ptr32->raw[0]) = - container_of(&basic, union irte32, basic)->raw[0]; + default: + ASSERT_UNREACHABLE(); + } } static inline int get_rte_index(const struct IO_APIC_route_entry *rte) @@ -176,6 +263,11 @@ static inline void set_rte_index(struct rte->delivery_mode = offset >> 8; } +static inline unsigned int get_full_dest(const union irte128 *entry) +{ + return entry->full.dest_lo | ((unsigned int)entry->full.dest_hi << 24); +} + static int update_intremap_entry_from_ioapic( int bdf, struct amd_iommu *iommu, @@ -185,10 +277,11 @@ static int update_intremap_entry_from_io { unsigned long flags; union irte_ptr entry; - u8 delivery_mode, dest, vector, dest_mode; + unsigned int delivery_mode, dest, vector, dest_mode; int req_id; spinlock_t *lock; unsigned int offset; + bool fresh = false; req_id = get_intremap_requestor_id(iommu->seg, bdf); lock = get_intremap_lock(iommu->seg, req_id); @@ -196,7 +289,7 @@ static int update_intremap_entry_from_io delivery_mode = rte->delivery_mode; vector = rte->vector; dest_mode = rte->dest_mode; - dest = rte->dest.logical.logical_dest; + dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest; spin_lock_irqsave(lock, flags); @@ -211,25 +304,40 @@ static int update_intremap_entry_from_io return -ENOSPC; } *index = offset; - lo_update = 1; + fresh = true; } entry = get_intremap_entry(iommu->seg, req_id, offset); - if ( !lo_update ) + if ( fresh ) + /* nothing */; + else if ( !lo_update ) { /* * Low half of incoming RTE is already in remapped format, * so need to recover vector and delivery mode from IRTE. */ ASSERT(get_rte_index(rte) == offset); - vector = entry.ptr32->basic.vector; + if ( irte_mode == irte32 ) + vector = entry.ptr32->basic.vector; + else + vector = entry.ptr128->full.vector; + /* The IntType fields match for both formats. */ delivery_mode = entry.ptr32->basic.int_type; } + else if ( x2apic_enabled ) + { + /* + * High half of incoming RTE was read from the I/O APIC and hence may + * not hold the full destination, so need to recover full destination + * from IRTE. + */ + dest = get_full_dest(entry.ptr128); + } update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); spin_unlock_irqrestore(lock, flags); - if ( iommu->enabled ) + if ( iommu->enabled && !fresh ) { spin_lock_irqsave(&iommu->lock, flags); amd_iommu_flush_intremap(iommu, req_id); @@ -253,6 +361,19 @@ int __init amd_iommu_setup_ioapic_remapp spinlock_t *lock; unsigned int offset; + for_each_amd_iommu ( iommu ) + { + if ( irte_mode != irteUNK ) + { + if ( iommu->ctrl.ga_en == (irte_mode == irte32) ) + return -ENXIO; + } + else if ( iommu->ctrl.ga_en ) + irte_mode = irte128; + else + irte_mode = irte32; + } + /* Read ioapic entries and update interrupt remapping table accordingly */ for ( apic = 0; apic < nr_ioapics; apic++ ) { @@ -287,6 +408,18 @@ int __init amd_iommu_setup_ioapic_remapp dest_mode = rte.dest_mode; dest = rte.dest.logical.logical_dest; + if ( iommu->ctrl.xt_en ) + { + /* + * In x2APIC mode we have no way of discovering the high 24 + * bits of the destination of an already enabled interrupt. + * We come here earlier than for xAPIC mode, so no interrupts + * should have been set up before. + */ + AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n", + IO_APIC_ID(apic), pin); + } + spin_lock_irqsave(lock, flags); offset = alloc_intremap_entry(seg, req_id, 1); BUG_ON(offset >= INTREMAP_ENTRIES); @@ -321,7 +454,8 @@ void amd_iommu_ioapic_update_ire( struct IO_APIC_route_entry new_rte = { 0 }; unsigned int rte_lo = (reg & 1) ? reg - 1 : reg; unsigned int pin = (reg - 0x10) / 2; - int saved_mask, seg, bdf, rc; + int seg, bdf, rc; + bool saved_mask, fresh = false; struct amd_iommu *iommu; unsigned int idx; @@ -363,12 +497,22 @@ void amd_iommu_ioapic_update_ire( *(((u32 *)&new_rte) + 1) = value; } - if ( new_rte.mask && - ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES ) + if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES ) { ASSERT(saved_mask); - __io_apic_write(apic, reg, value); - return; + + /* + * There's nowhere except the IRTE to store a full 32-bit destination, + * so we may not bypass entry allocation and updating of the low RTE + * half in the (usual) case of the high RTE half getting written first. + */ + if ( new_rte.mask && !x2apic_enabled ) + { + __io_apic_write(apic, reg, value); + return; + } + + fresh = true; } /* mask the interrupt while we change the intremap table */ @@ -397,8 +541,12 @@ void amd_iommu_ioapic_update_ire( if ( reg == rte_lo ) return; - /* unmask the interrupt after we have updated the intremap table */ - if ( !saved_mask ) + /* + * Unmask the interrupt after we have updated the intremap table. Also + * write the low half if a fresh entry was allocated for a high half + * update in x2APIC mode. + */ + if ( !saved_mask || (x2apic_enabled && fresh) ) { old_rte.mask = saved_mask; __io_apic_write(apic, rte_lo, *((u32 *)&old_rte)); @@ -412,27 +560,36 @@ unsigned int amd_iommu_read_ioapic_from_ unsigned int offset; unsigned int val = __io_apic_read(apic, reg); unsigned int pin = (reg - 0x10) / 2; + uint16_t seg, req_id; + union irte_ptr entry; idx = ioapic_id_to_index(IO_APIC_ID(apic)); if ( idx == MAX_IO_APICS ) return -EINVAL; offset = ioapic_sbdf[idx].pin_2_idx[pin]; + if ( offset >= INTREMAP_ENTRIES ) + return val; - if ( !(reg & 1) && offset < INTREMAP_ENTRIES ) + seg = ioapic_sbdf[idx].seg; + req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf); + entry = get_intremap_entry(seg, req_id, offset); + + if ( !(reg & 1) ) { - u16 bdf = ioapic_sbdf[idx].bdf; - u16 seg = ioapic_sbdf[idx].seg; - u16 req_id = get_intremap_requestor_id(seg, bdf); - union irte_ptr entry = get_intremap_entry(seg, req_id, offset); ASSERT(offset == (val & (INTREMAP_ENTRIES - 1))); val &= ~(INTREMAP_ENTRIES - 1); + /* The IntType fields match for both formats. */ val |= MASK_INSR(entry.ptr32->basic.int_type, IO_APIC_REDIR_DELIV_MODE_MASK); - val |= MASK_INSR(entry.ptr32->basic.vector, + val |= MASK_INSR(irte_mode == irte32 + ? entry.ptr32->basic.vector + : entry.ptr128->full.vector, IO_APIC_REDIR_VECTOR_MASK); } + else if ( x2apic_enabled ) + val = get_full_dest(entry.ptr128); return val; } @@ -444,9 +601,9 @@ static int update_intremap_entry_from_ms unsigned long flags; union irte_ptr entry; u16 req_id, alias_id; - u8 delivery_mode, dest, vector, dest_mode; + uint8_t delivery_mode, vector, dest_mode; spinlock_t *lock; - unsigned int offset, i; + unsigned int dest, offset, i; req_id = get_dma_requestor_id(iommu->seg, bdf); alias_id = get_intremap_requestor_id(iommu->seg, bdf); @@ -467,7 +624,12 @@ static int update_intremap_entry_from_ms dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1; delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1; vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK; - dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff; + + if ( x2apic_enabled ) + dest = msg->dest32; + else + dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK); + offset = *remap_index; if ( offset >= INTREMAP_ENTRIES ) { @@ -612,10 +774,21 @@ void amd_iommu_read_msi_from_ire( } msg->data &= ~(INTREMAP_ENTRIES - 1); + /* The IntType fields match for both formats. */ msg->data |= MASK_INSR(entry.ptr32->basic.int_type, MSI_DATA_DELIVERY_MODE_MASK); - msg->data |= MASK_INSR(entry.ptr32->basic.vector, - MSI_DATA_VECTOR_MASK); + if ( irte_mode == irte32 ) + { + msg->data |= MASK_INSR(entry.ptr32->basic.vector, + MSI_DATA_VECTOR_MASK); + msg->dest32 = entry.ptr32->basic.dest; + } + else + { + msg->data |= MASK_INSR(entry.ptr128->full.vector, + MSI_DATA_VECTOR_MASK); + msg->dest32 = get_full_dest(entry.ptr128); + } } int __init amd_iommu_free_intremap_table( @@ -678,18 +851,28 @@ int __init amd_setup_hpet_msi(struct msi return rc; } -static void dump_intremap_table(const u32 *table) +static void dump_intremap_table(union irte_cptr tbl) { - u32 count; + unsigned int count; - if ( !table ) + if ( !tbl.ptr || irte_mode == irteUNK ) return; for ( count = 0; count < INTREMAP_ENTRIES; count++ ) { - if ( !table[count] ) - continue; - printk(" IRTE[%03x] %08x\n", count, table[count]); + if ( irte_mode == irte32 ) + { + if ( !tbl.ptr32[count].raw[0] ) + continue; + printk(" IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw[0]); + } + else + { + if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] ) + continue; + printk(" IRTE[%03x] %016lx_%016lx\n", + count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]); + } } } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |