On 11/11/11 16:00, Jan Beulich wrote:
Following the prevention of vector sharing for MSIs, this change
enforces the same within IO-APICs: Pin based interrupts use the IO-APIC
as their identifying device under the AMD IOMMU (and just like for
MSIs, only the identifying device is used to remap interrupts here,
with no regard to an interrupt's destination).
Additionally, LAPIC initiated EOIs (for level triggered interrupts) too
use only the vector for identifying which interrupts to end. While this
generally causes no significant problem (at worst an interrupt would be
re-raised without a new interrupt event actually having occurred)
At worst, hardware asserts a line interrupt, deasserts it later, and
an EOI broadcast gets rid of any record that the IRQ was ever
raised. While I would classify this as buggy behavior, I believe I
have seen some hardware doing this when investigating the line level
IRQ migration bug, as clearing the IRR did not immediately cause
another interrupt to be generated.
, it
still seems better to avoid the situation.
For this second aspect, a distinction is being made between the
traditional and the directed-EOI cases: In the former, vectors should
not be shared throughout all IO-APICs in the system, while in the
latter case only individual IO-APICs need to be contrained (or, if the
firmware indicates so, sub- groups of them having the same GSI appear
at multiple pins).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Provisional nack because it is my understanding that under all
circumstances, you must maintain a vector exclusivity map across all
IO-APICs because of the broadcast problem. Or have I made a mistake
in my reasoning?
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -395,6 +395,11 @@ static vmask_t *irq_get_used_vector_mask
}
}
}
+ else if ( IO_APIC_IRQ(irq) &&
+ opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
+ {
+ ret = io_apic_get_used_vector_map(irq);
+ }
return ret;
}
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -72,6 +72,33 @@ int __read_mostly nr_ioapics;
#define io_apic_eoi_vector(apic, vector) io_apic_eoi((apic), (vector), -1)
#define io_apic_eoi_pin(apic, pin) io_apic_eoi((apic), -1, (pin))
+static int apic_pin_2_gsi_irq(int apic, int pin);
+
+static vmask_t *__read_mostly vector_map[MAX_IO_APICS];
+
+static void share_vector_maps(unsigned int src, unsigned int dst)
+{
+ unsigned int pin;
+
+ if (vector_map[src] == vector_map[dst])
+ return;
+
+ bitmap_or(vector_map[src]->_bits, vector_map[src]->_bits,
+ vector_map[dst]->_bits, NR_VECTORS);
+
+ for (pin = 0; pin < nr_ioapic_entries[dst]; ++pin) {
+ int irq = apic_pin_2_gsi_irq(dst, pin);
+ struct irq_desc *desc;
+
+ if (irq < 0)
+ continue;
+ desc = irq_to_desc(irq);
+ if (desc->arch.used_vectors == vector_map[dst])
+ desc->arch.used_vectors = vector_map[src];
+ }
+
+ vector_map[dst] = vector_map[src];
+}
/*
* This is performance-critical, we want to do it O(1)
@@ -113,6 +140,7 @@ static void add_pin_to_irq(unsigned int
}
entry->apic = apic;
entry->pin = pin;
+ share_vector_maps(irq_2_pin[irq].apic, apic);
}
/*
@@ -128,6 +156,7 @@ static void __init replace_pin_at_irq(un
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
+ share_vector_maps(oldapic, newapic);
}
if (!entry->next)
break;
@@ -135,6 +164,16 @@ static void __init replace_pin_at_irq(un
}
}
+vmask_t *io_apic_get_used_vector_map(unsigned int irq)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ if (entry->pin == -1)
+ return NULL;
+
+ return vector_map[entry->apic];
+}
+
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
int apic;
@@ -1244,6 +1283,18 @@ static void __init enable_IO_APIC(void)
for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
irq_2_pin[i].next = i + 1;
+ if (directed_eoi_enabled) {
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ vector_map[apic] = xzalloc(vmask_t);
+ BUG_ON(!vector_map[apic]);
+ }
+ } else {
+ vector_map[0] = xzalloc(vmask_t);
+ BUG_ON(!vector_map[0]);
+ for (apic = 1; apic < nr_ioapics; apic++)
+ vector_map[apic] = vector_map[0];
+ }
+
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
/* See if any of the pins is in ExtINT mode */
@@ -2345,13 +2396,12 @@ int ioapic_guest_write(unsigned long phy
}
if ( desc->arch.vector <= 0 || desc->arch.vector > LAST_DYNAMIC_VECTOR ) {
+ add_pin_to_irq(irq, apic, pin);
vector = assign_irq_vector(irq);
if ( vector < 0 )
return vector;
printk(XENLOG_INFO "allocated vector %02x for irq %d\n", vector, irq);
-
- add_pin_to_irq(irq, apic, pin);
}
spin_lock(&dom0->event_lock);
ret = map_domain_pirq(dom0, pirq, irq,
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -116,6 +116,7 @@ int i8259A_resume(void);
void setup_IO_APIC(void);
void disable_IO_APIC(void);
void setup_ioapic_dest(void);
+vmask_t *io_apic_get_used_vector_map(unsigned int irq);
extern unsigned int io_apic_irqs;
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
|