[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
This is in preparation of actually enabling x2APIC mode, which requires this wider IRTE format to be used. A specific remark regarding the first hunk changing amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36, i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping tables when creating new one"). Other code introduced by that change has meanwhile disappeared or further changed, and I wonder if - rather than adding an x2apic_enabled check to the conditional - the bypass couldn't be deleted altogether. For now the goal is to affect the non-x2APIC paths as little as possible. Take the liberty and use the new "fresh" flag to suppress an unneeded flush in update_intremap_entry_from_ioapic(). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming) mode is unavailable in x2APIC mode, but they've confirmed this to be a mistake on their part. --- a/xen/drivers/passthrough/amd/iommu_intr.c +++ b/xen/drivers/passthrough/amd/iommu_intr.c @@ -35,12 +35,34 @@ struct irte_basic { unsigned int :8; }; +struct irte_full { + unsigned int remap_en:1; + unsigned int sup_io_pf:1; + unsigned int int_type:3; + unsigned int rq_eoi:1; + unsigned int dm:1; + unsigned int guest_mode:1; /* MBZ */ + unsigned int dest_lo:24; + unsigned int :32; + unsigned int vector:8; + unsigned int :24; + unsigned int :24; + unsigned int dest_hi:8; +}; + +static enum { + irte_basic, + irte_full, + irte_unset, +} irte_mode __read_mostly = irte_unset; + union irte_ptr { void *raw; struct irte_basic *basic; + struct irte_full *full; }; -#define INTREMAP_TABLE_ORDER 1 +#define INTREMAP_TABLE_ORDER (irte_mode == irte_basic ? 1 : 3) #define INTREMAP_LENGTH 0xB #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH) @@ -127,7 +149,19 @@ static union irte_ptr get_intremap_entry ASSERT(table.raw && (offset < INTREMAP_ENTRIES)); - table.basic += offset; + switch ( irte_mode ) + { + case irte_basic: + table.basic += offset; + break; + + case irte_full: + table.full += offset; + break; + + default: + ASSERT_UNREACHABLE(); + } return table; } @@ -136,7 +170,21 @@ static void free_intremap_entry(unsigned { union irte_ptr entry = get_intremap_entry(seg, bdf, offset); - *entry.basic = (struct irte_basic){}; + switch ( irte_mode ) + { + case irte_basic: + *entry.basic = (struct irte_basic){}; + break; + + case irte_full: + entry.full->remap_en = 0; + wmb(); + *entry.full = (struct irte_full){}; + break; + + default: + ASSERT_UNREACHABLE(); + } __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse); } @@ -154,8 +202,38 @@ static void update_intremap_entry(union .dest = dest, .vector = vector, }; + struct irte_full full = { + .remap_en = 1, + .sup_io_pf = 0, + .int_type = int_type, + .rq_eoi = 0, + .dm = dest_mode, + .dest_lo = dest, + .dest_hi = dest >> 24, + .vector = vector, + }; + + switch ( irte_mode ) + { + __uint128_t ret; + union { + __uint128_t raw; + struct irte_full full; + } old; + + case irte_basic: + *entry.basic = basic; + break; + + case irte_full: + old.full = *entry.full; + ret = cmpxchg16b(entry.full, &old, &full); + ASSERT(ret == old.raw); + break; - *entry.basic = basic; + default: + ASSERT_UNREACHABLE(); + } } static inline int get_rte_index(const struct IO_APIC_route_entry *rte) @@ -169,6 +247,11 @@ static inline void set_rte_index(struct rte->delivery_mode = offset >> 8; } +static inline unsigned int get_full_dest(const struct irte_full *entry) +{ + return entry->dest_lo | (entry->dest_hi << 24); +} + static int update_intremap_entry_from_ioapic( int bdf, struct amd_iommu *iommu, @@ -178,10 +261,11 @@ static int update_intremap_entry_from_io { unsigned long flags; union irte_ptr entry; - u8 delivery_mode, dest, vector, dest_mode; + unsigned int delivery_mode, dest, vector, dest_mode; int req_id; spinlock_t *lock; unsigned int offset; + bool fresh = false; req_id = get_intremap_requestor_id(iommu->seg, bdf); lock = get_intremap_lock(iommu->seg, req_id); @@ -189,7 +273,7 @@ static int update_intremap_entry_from_io delivery_mode = rte->delivery_mode; vector = rte->vector; dest_mode = rte->dest_mode; - dest = rte->dest.logical.logical_dest; + dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest; spin_lock_irqsave(lock, flags); @@ -204,25 +288,40 @@ static int update_intremap_entry_from_io return -ENOSPC; } *index = offset; - lo_update = 1; + fresh = true; } entry = get_intremap_entry(iommu->seg, req_id, offset); - if ( !lo_update ) + if ( fresh ) + /* nothing */; + else if ( !lo_update ) { /* * Low half of incoming RTE is already in remapped format, * so need to recover vector and delivery mode from IRTE. */ ASSERT(get_rte_index(rte) == offset); - vector = entry.basic->vector; + if ( irte_mode == irte_basic ) + vector = entry.basic->vector; + else + vector = entry.full->vector; + /* The IntType fields match for both formats. */ delivery_mode = entry.basic->int_type; } + else if ( x2apic_enabled ) + { + /* + * High half of incoming RTE was read from the I/O APIC and hence may + * not hold the full destination, so need to recover full destination + * from IRTE. + */ + dest = get_full_dest(entry.full); + } update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); spin_unlock_irqrestore(lock, flags); - if ( iommu->enabled ) + if ( iommu->enabled && !fresh ) { spin_lock_irqsave(&iommu->lock, flags); amd_iommu_flush_intremap(iommu, req_id); @@ -246,6 +345,19 @@ int __init amd_iommu_setup_ioapic_remapp spinlock_t *lock; unsigned int offset; + for_each_amd_iommu ( iommu ) + { + if ( irte_mode != irte_unset ) + { + if ( iommu->ctrl.ga_en == (irte_mode == irte_basic) ) + return -ENXIO; + } + else if ( iommu->ctrl.ga_en ) + irte_mode = irte_full; + else + irte_mode = irte_basic; + } + /* Read ioapic entries and update interrupt remapping table accordingly */ for ( apic = 0; apic < nr_ioapics; apic++ ) { @@ -280,6 +392,18 @@ int __init amd_iommu_setup_ioapic_remapp dest_mode = rte.dest_mode; dest = rte.dest.logical.logical_dest; + if ( iommu->ctrl.xt_en ) + { + /* + * In x2APIC mode we have no way of discovering the high 24 + * bits of the destination of an already enabled interrupt. + * We come here earlier than for xAPIC mode, so no interrupts + * should have been set up before. + */ + AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n", + IO_APIC_ID(apic), pin); + } + spin_lock_irqsave(lock, flags); offset = alloc_intremap_entry(seg, req_id, 1); BUG_ON(offset >= INTREMAP_ENTRIES); @@ -314,7 +438,8 @@ void amd_iommu_ioapic_update_ire( struct IO_APIC_route_entry new_rte = { 0 }; unsigned int rte_lo = (reg & 1) ? reg - 1 : reg; unsigned int pin = (reg - 0x10) / 2; - int saved_mask, seg, bdf, rc; + int seg, bdf, rc; + bool saved_mask, fresh = false; struct amd_iommu *iommu; unsigned int idx; @@ -356,12 +481,22 @@ void amd_iommu_ioapic_update_ire( *(((u32 *)&new_rte) + 1) = value; } - if ( new_rte.mask && - ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES ) + if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES ) { ASSERT(saved_mask); - __io_apic_write(apic, reg, value); - return; + + /* + * There's nowhere except the IRTE to store a full 32-bit destination, + * so we may not bypass entry allocation and updating of the low RTE + * half in the (usual) case of the high RTE half getting written first. + */ + if ( new_rte.mask && !x2apic_enabled ) + { + __io_apic_write(apic, reg, value); + return; + } + + fresh = true; } /* mask the interrupt while we change the intremap table */ @@ -390,8 +525,12 @@ void amd_iommu_ioapic_update_ire( if ( reg == rte_lo ) return; - /* unmask the interrupt after we have updated the intremap table */ - if ( !saved_mask ) + /* + * Unmask the interrupt after we have updated the intremap table. Also + * write the low half if a fresh entry was allocated for a high half + * update in x2APIC mode. + */ + if ( !saved_mask || (x2apic_enabled && fresh) ) { old_rte.mask = saved_mask; __io_apic_write(apic, rte_lo, *((u32 *)&old_rte)); @@ -405,25 +544,35 @@ unsigned int amd_iommu_read_ioapic_from_ unsigned int offset; unsigned int val = __io_apic_read(apic, reg); unsigned int pin = (reg - 0x10) / 2; + uint16_t seg, req_id; + union irte_ptr entry; idx = ioapic_id_to_index(IO_APIC_ID(apic)); if ( idx == MAX_IO_APICS ) return -EINVAL; offset = ioapic_sbdf[idx].pin_2_idx[pin]; + if ( offset >= INTREMAP_ENTRIES ) + return val; - if ( !(reg & 1) && offset < INTREMAP_ENTRIES ) + seg = ioapic_sbdf[idx].seg; + req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf); + entry = get_intremap_entry(seg, req_id, offset); + + if ( !(reg & 1) ) { - u16 bdf = ioapic_sbdf[idx].bdf; - u16 seg = ioapic_sbdf[idx].seg; - u16 req_id = get_intremap_requestor_id(seg, bdf); - union irte_ptr entry = get_intremap_entry(seg, req_id, offset); ASSERT(offset == (val & (INTREMAP_ENTRIES - 1))); val &= ~(INTREMAP_ENTRIES - 1); + /* The IntType fields match for both formats. */ val |= MASK_INSR(entry.basic->int_type, IO_APIC_REDIR_DELIV_MODE_MASK); - val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK); + if ( irte_mode == irte_basic ) + val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK); + else + val |= MASK_INSR(entry.full->vector, IO_APIC_REDIR_VECTOR_MASK); } + else if ( x2apic_enabled ) + val = get_full_dest(entry.full); return val; } @@ -435,9 +584,9 @@ static int update_intremap_entry_from_ms unsigned long flags; union irte_ptr entry; u16 req_id, alias_id; - u8 delivery_mode, dest, vector, dest_mode; + uint8_t delivery_mode, vector, dest_mode; spinlock_t *lock; - unsigned int offset, i; + unsigned int dest, offset, i; req_id = get_dma_requestor_id(iommu->seg, bdf); alias_id = get_intremap_requestor_id(iommu->seg, bdf); @@ -458,7 +607,12 @@ static int update_intremap_entry_from_ms dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1; delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1; vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK; - dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff; + + if ( x2apic_enabled ) + dest = msg->dest32; + else + dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK); + offset = *remap_index; if ( offset >= INTREMAP_ENTRIES ) { @@ -603,8 +757,18 @@ void amd_iommu_read_msi_from_ire( } msg->data &= ~(INTREMAP_ENTRIES - 1); + /* The IntType fields match for both formats. */ msg->data |= MASK_INSR(entry.basic->int_type, MSI_DATA_DELIVERY_MODE_MASK); - msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK); + if ( irte_mode == irte_basic ) + { + msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK); + msg->dest32 = entry.basic->dest; + } + else + { + msg->data |= MASK_INSR(entry.full->vector, MSI_DATA_VECTOR_MASK); + msg->dest32 = get_full_dest(entry.full); + } } int __init amd_iommu_free_intremap_table( @@ -667,18 +831,33 @@ int __init amd_setup_hpet_msi(struct msi return rc; } -static void dump_intremap_table(const u32 *table) +static void dump_intremap_table(const void *table) { - u32 count; + unsigned int count; + union { + const void *raw; + const uint32_t *basic; + const uint64_t (*full)[2]; + } tbl = { .raw = table }; - if ( !table ) + if ( !table || irte_mode == irte_unset ) return; for ( count = 0; count < INTREMAP_ENTRIES; count++ ) { - if ( !table[count] ) - continue; - printk(" IRTE[%03x] %08x\n", count, table[count]); + if ( irte_mode == irte_basic ) + { + if ( !tbl.basic[count] ) + continue; + printk(" IRTE[%03x] %08x\n", count, tbl.basic[count]); + } + else + { + if ( !tbl.full[count][0] && !tbl.full[count][1] ) + continue; + printk(" IRTE[%03x] %016lx_%016lx\n", + count, tbl.full[count][1], tbl.full[count][0]); + } } } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |