|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
This is in preparation of actually enabling x2APIC mode, which requires
this wider IRTE format to be used.
A specific remark regarding the first hunk changing
amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
tables when creating new one"). Other code introduced by that change has
meanwhile disappeared or further changed, and I wonder if - rather than
adding an x2apic_enabled check to the conditional - the bypass couldn't
be deleted altogether. For now the goal is to affect the non-x2APIC
paths as little as possible.
Take the liberty and use the new "fresh" flag to suppress an unneeded
flush in update_intremap_entry_from_ioapic().
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
mode is unavailable in x2APIC mode, but they've confirmed this to be a
mistake on their part.
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -35,12 +35,34 @@ struct irte_basic {
unsigned int :8;
};
+struct irte_full {
+ unsigned int remap_en:1;
+ unsigned int sup_io_pf:1;
+ unsigned int int_type:3;
+ unsigned int rq_eoi:1;
+ unsigned int dm:1;
+ unsigned int guest_mode:1; /* MBZ */
+ unsigned int dest_lo:24;
+ unsigned int :32;
+ unsigned int vector:8;
+ unsigned int :24;
+ unsigned int :24;
+ unsigned int dest_hi:8;
+};
+
+static enum {
+ irte_basic,
+ irte_full,
+ irte_unset,
+} irte_mode __read_mostly = irte_unset;
+
union irte_ptr {
void *raw;
struct irte_basic *basic;
+ struct irte_full *full;
};
-#define INTREMAP_TABLE_ORDER 1
+#define INTREMAP_TABLE_ORDER (irte_mode == irte_basic ? 1 : 3)
#define INTREMAP_LENGTH 0xB
#define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
@@ -127,7 +149,19 @@ static union irte_ptr get_intremap_entry
ASSERT(table.raw && (offset < INTREMAP_ENTRIES));
- table.basic += offset;
+ switch ( irte_mode )
+ {
+ case irte_basic:
+ table.basic += offset;
+ break;
+
+ case irte_full:
+ table.full += offset;
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
return table;
}
@@ -136,7 +170,21 @@ static void free_intremap_entry(unsigned
{
union irte_ptr entry = get_intremap_entry(seg, bdf, offset);
- *entry.basic = (struct irte_basic){};
+ switch ( irte_mode )
+ {
+ case irte_basic:
+ *entry.basic = (struct irte_basic){};
+ break;
+
+ case irte_full:
+ entry.full->remap_en = 0;
+ wmb();
+ *entry.full = (struct irte_full){};
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
__clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
@@ -154,8 +202,38 @@ static void update_intremap_entry(union
.dest = dest,
.vector = vector,
};
+ struct irte_full full = {
+ .remap_en = 1,
+ .sup_io_pf = 0,
+ .int_type = int_type,
+ .rq_eoi = 0,
+ .dm = dest_mode,
+ .dest_lo = dest,
+ .dest_hi = dest >> 24,
+ .vector = vector,
+ };
+
+ switch ( irte_mode )
+ {
+ __uint128_t ret;
+ union {
+ __uint128_t raw;
+ struct irte_full full;
+ } old;
+
+ case irte_basic:
+ *entry.basic = basic;
+ break;
+
+ case irte_full:
+ old.full = *entry.full;
+ ret = cmpxchg16b(entry.full, &old, &full);
+ ASSERT(ret == old.raw);
+ break;
- *entry.basic = basic;
+ default:
+ ASSERT_UNREACHABLE();
+ }
}
static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -169,6 +247,11 @@ static inline void set_rte_index(struct
rte->delivery_mode = offset >> 8;
}
+static inline unsigned int get_full_dest(const struct irte_full *entry)
+{
+ return entry->dest_lo | (entry->dest_hi << 24);
+}
+
static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
@@ -178,10 +261,11 @@ static int update_intremap_entry_from_io
{
unsigned long flags;
union irte_ptr entry;
- u8 delivery_mode, dest, vector, dest_mode;
+ unsigned int delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
unsigned int offset;
+ bool fresh = false;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -189,7 +273,7 @@ static int update_intremap_entry_from_io
delivery_mode = rte->delivery_mode;
vector = rte->vector;
dest_mode = rte->dest_mode;
- dest = rte->dest.logical.logical_dest;
+ dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
@@ -204,25 +288,40 @@ static int update_intremap_entry_from_io
return -ENOSPC;
}
*index = offset;
- lo_update = 1;
+ fresh = true;
}
entry = get_intremap_entry(iommu->seg, req_id, offset);
- if ( !lo_update )
+ if ( fresh )
+ /* nothing */;
+ else if ( !lo_update )
{
/*
* Low half of incoming RTE is already in remapped format,
* so need to recover vector and delivery mode from IRTE.
*/
ASSERT(get_rte_index(rte) == offset);
- vector = entry.basic->vector;
+ if ( irte_mode == irte_basic )
+ vector = entry.basic->vector;
+ else
+ vector = entry.full->vector;
+ /* The IntType fields match for both formats. */
delivery_mode = entry.basic->int_type;
}
+ else if ( x2apic_enabled )
+ {
+ /*
+ * High half of incoming RTE was read from the I/O APIC and hence may
+ * not hold the full destination, so need to recover full destination
+ * from IRTE.
+ */
+ dest = get_full_dest(entry.full);
+ }
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
- if ( iommu->enabled )
+ if ( iommu->enabled && !fresh )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
@@ -246,6 +345,19 @@ int __init amd_iommu_setup_ioapic_remapp
spinlock_t *lock;
unsigned int offset;
+ for_each_amd_iommu ( iommu )
+ {
+ if ( irte_mode != irte_unset )
+ {
+ if ( iommu->ctrl.ga_en == (irte_mode == irte_basic) )
+ return -ENXIO;
+ }
+ else if ( iommu->ctrl.ga_en )
+ irte_mode = irte_full;
+ else
+ irte_mode = irte_basic;
+ }
+
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
{
@@ -280,6 +392,18 @@ int __init amd_iommu_setup_ioapic_remapp
dest_mode = rte.dest_mode;
dest = rte.dest.logical.logical_dest;
+ if ( iommu->ctrl.xt_en )
+ {
+ /*
+ * In x2APIC mode we have no way of discovering the high 24
+ * bits of the destination of an already enabled interrupt.
+ * We come here earlier than for xAPIC mode, so no interrupts
+ * should have been set up before.
+ */
+ AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC
mode\n",
+ IO_APIC_ID(apic), pin);
+ }
+
spin_lock_irqsave(lock, flags);
offset = alloc_intremap_entry(seg, req_id, 1);
BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -314,7 +438,8 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf, rc;
+ int seg, bdf, rc;
+ bool saved_mask, fresh = false;
struct amd_iommu *iommu;
unsigned int idx;
@@ -356,12 +481,22 @@ void amd_iommu_ioapic_update_ire(
*(((u32 *)&new_rte) + 1) = value;
}
- if ( new_rte.mask &&
- ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+ if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
- __io_apic_write(apic, reg, value);
- return;
+
+ /*
+ * There's nowhere except the IRTE to store a full 32-bit destination,
+ * so we may not bypass entry allocation and updating of the low RTE
+ * half in the (usual) case of the high RTE half getting written first.
+ */
+ if ( new_rte.mask && !x2apic_enabled )
+ {
+ __io_apic_write(apic, reg, value);
+ return;
+ }
+
+ fresh = true;
}
/* mask the interrupt while we change the intremap table */
@@ -390,8 +525,12 @@ void amd_iommu_ioapic_update_ire(
if ( reg == rte_lo )
return;
- /* unmask the interrupt after we have updated the intremap table */
- if ( !saved_mask )
+ /*
+ * Unmask the interrupt after we have updated the intremap table. Also
+ * write the low half if a fresh entry was allocated for a high half
+ * update in x2APIC mode.
+ */
+ if ( !saved_mask || (x2apic_enabled && fresh) )
{
old_rte.mask = saved_mask;
__io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -405,25 +544,35 @@ unsigned int amd_iommu_read_ioapic_from_
unsigned int offset;
unsigned int val = __io_apic_read(apic, reg);
unsigned int pin = (reg - 0x10) / 2;
+ uint16_t seg, req_id;
+ union irte_ptr entry;
idx = ioapic_id_to_index(IO_APIC_ID(apic));
if ( idx == MAX_IO_APICS )
return -EINVAL;
offset = ioapic_sbdf[idx].pin_2_idx[pin];
+ if ( offset >= INTREMAP_ENTRIES )
+ return val;
- if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
+ seg = ioapic_sbdf[idx].seg;
+ req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf);
+ entry = get_intremap_entry(seg, req_id, offset);
+
+ if ( !(reg & 1) )
{
- u16 bdf = ioapic_sbdf[idx].bdf;
- u16 seg = ioapic_sbdf[idx].seg;
- u16 req_id = get_intremap_requestor_id(seg, bdf);
- union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
val &= ~(INTREMAP_ENTRIES - 1);
+ /* The IntType fields match for both formats. */
val |= MASK_INSR(entry.basic->int_type, IO_APIC_REDIR_DELIV_MODE_MASK);
- val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+ if ( irte_mode == irte_basic )
+ val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+ else
+ val |= MASK_INSR(entry.full->vector, IO_APIC_REDIR_VECTOR_MASK);
}
+ else if ( x2apic_enabled )
+ val = get_full_dest(entry.full);
return val;
}
@@ -435,9 +584,9 @@ static int update_intremap_entry_from_ms
unsigned long flags;
union irte_ptr entry;
u16 req_id, alias_id;
- u8 delivery_mode, dest, vector, dest_mode;
+ uint8_t delivery_mode, vector, dest_mode;
spinlock_t *lock;
- unsigned int offset, i;
+ unsigned int dest, offset, i;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -458,7 +607,12 @@ static int update_intremap_entry_from_ms
dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
- dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+ if ( x2apic_enabled )
+ dest = msg->dest32;
+ else
+ dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
offset = *remap_index;
if ( offset >= INTREMAP_ENTRIES )
{
@@ -603,8 +757,18 @@ void amd_iommu_read_msi_from_ire(
}
msg->data &= ~(INTREMAP_ENTRIES - 1);
+ /* The IntType fields match for both formats. */
msg->data |= MASK_INSR(entry.basic->int_type, MSI_DATA_DELIVERY_MODE_MASK);
- msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+ if ( irte_mode == irte_basic )
+ {
+ msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+ msg->dest32 = entry.basic->dest;
+ }
+ else
+ {
+ msg->data |= MASK_INSR(entry.full->vector, MSI_DATA_VECTOR_MASK);
+ msg->dest32 = get_full_dest(entry.full);
+ }
}
int __init amd_iommu_free_intremap_table(
@@ -667,18 +831,33 @@ int __init amd_setup_hpet_msi(struct msi
return rc;
}
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(const void *table)
{
- u32 count;
+ unsigned int count;
+ union {
+ const void *raw;
+ const uint32_t *basic;
+ const uint64_t (*full)[2];
+ } tbl = { .raw = table };
- if ( !table )
+ if ( !table || irte_mode == irte_unset )
return;
for ( count = 0; count < INTREMAP_ENTRIES; count++ )
{
- if ( !table[count] )
- continue;
- printk(" IRTE[%03x] %08x\n", count, table[count]);
+ if ( irte_mode == irte_basic )
+ {
+ if ( !tbl.basic[count] )
+ continue;
+ printk(" IRTE[%03x] %08x\n", count, tbl.basic[count]);
+ }
+ else
+ {
+ if ( !tbl.full[count][0] && !tbl.full[count][1] )
+ continue;
+ printk(" IRTE[%03x] %016lx_%016lx\n",
+ count, tbl.full[count][1], tbl.full[count][0]);
+ }
}
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |