VT-d: bind IRQs to CPUs local to the node the IOMMU is on This extends create_irq() to take a node parameter, allowing the resulting IRQ to have its destination set to a CPU on that node right away, which is more natural than having to post-adjust this (and get e.g. a new IRQ vector assigned despite a fresh one was just obtained). All other callers of create_irq() pass NUMA_NO_NODE for the time being. Signed-off-by: Jan Beulich --- 2011-12-12.orig/xen/arch/x86/hpet.c 2011-11-11 09:40:55.000000000 +0100 +++ 2011-12-12/xen/arch/x86/hpet.c 2011-12-06 16:36:24.000000000 +0100 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -334,7 +335,7 @@ static int __init hpet_assign_irq(unsign { int irq; - if ( (irq = create_irq()) < 0 ) + if ( (irq = create_irq(NUMA_NO_NODE)) < 0 ) return irq; if ( hpet_setup_msi_irq(irq, hpet_events + idx) ) --- 2011-12-12.orig/xen/arch/x86/io_apic.c 2011-11-22 10:57:09.000000000 +0100 +++ 2011-12-12/xen/arch/x86/io_apic.c 2011-12-12 15:24:10.000000000 +0100 @@ -995,7 +995,7 @@ static void __init setup_IO_APIC_irqs(vo continue; if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); + vector = assign_irq_vector(irq, NULL); BUG_ON(vector < 0); entry.vector = vector; ioapic_register_intr(irq, IOAPIC_AUTO); @@ -2188,7 +2188,7 @@ int io_apic_set_pci_routing (int ioapic, if (!platform_legacy_irq(irq)) add_pin_to_irq(irq, ioapic, pin); - vector = assign_irq_vector(irq); + vector = assign_irq_vector(irq, NULL); if (vector < 0) return vector; entry.vector = vector; @@ -2340,7 +2340,7 @@ int ioapic_guest_write(unsigned long phy if ( desc->arch.vector <= 0 || desc->arch.vector > LAST_DYNAMIC_VECTOR ) { add_pin_to_irq(irq, apic, pin); - vector = assign_irq_vector(irq); + vector = assign_irq_vector(irq, NULL); if ( vector < 0 ) return vector; --- 2011-12-12.orig/xen/arch/x86/irq.c 2011-12-12 09:01:34.000000000 +0100 +++ 2011-12-12/xen/arch/x86/irq.c 2011-12-12 15:24:15.000000000 +0100 @@ -151,7 +151,7 @@ int __init bind_irq_vector(int irq, int /* * Dynamic irq allocate and deallocation for MSI */ -int create_irq(void) +int create_irq(int node) { int irq, ret; struct irq_desc *desc; @@ -168,7 +168,17 @@ int create_irq(void) ret = init_one_irq_desc(desc); if (!ret) - ret = assign_irq_vector(irq); + { + cpumask_t *mask = NULL; + + if (node != NUMA_NO_NODE && node >= 0) + { + mask = &node_to_cpumask(node); + if (cpumask_empty(mask)) + mask = NULL; + } + ret = assign_irq_vector(irq, mask); + } if (ret < 0) { desc->arch.used = IRQ_UNUSED; @@ -514,7 +524,7 @@ next: return err; } -int assign_irq_vector(int irq) +int assign_irq_vector(int irq, const cpumask_t *mask) { int ret; unsigned long flags; @@ -523,7 +533,7 @@ int assign_irq_vector(int irq) BUG_ON(irq >= nr_irqs || irq <0); spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, desc, TARGET_CPUS); + ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS); if (!ret) { ret = desc->arch.vector; cpumask_copy(desc->affinity, desc->arch.cpu_mask); --- 2011-12-12.orig/xen/arch/x86/physdev.c 2011-12-12 09:01:34.000000000 +0100 +++ 2011-12-12/xen/arch/x86/physdev.c 2011-12-06 16:28:10.000000000 +0100 @@ -132,7 +132,7 @@ int physdev_map_pirq(domid_t domid, int case MAP_PIRQ_TYPE_MSI: irq = *index; if ( irq == -1 ) - irq = create_irq(); + irq = create_irq(NUMA_NO_NODE); if ( irq < 0 || irq >= nr_irqs ) { --- 2011-12-12.orig/xen/drivers/passthrough/amd/iommu_init.c 2011-11-29 09:19:05.000000000 +0100 +++ 2011-12-12/xen/drivers/passthrough/amd/iommu_init.c 2011-12-06 16:27:49.000000000 +0100 @@ -551,7 +551,7 @@ static int __init set_iommu_interrupt_ha { int irq, ret; - irq = create_irq(); + irq = create_irq(NUMA_NO_NODE); if ( irq <= 0 ) { dprintk(XENLOG_ERR, "IOMMU: no irqs\n"); --- 2011-12-12.orig/xen/drivers/passthrough/vtd/iommu.c 2011-11-22 10:57:10.000000000 +0100 +++ 2011-12-12/xen/drivers/passthrough/vtd/iommu.c 2011-12-06 16:35:42.000000000 +0100 @@ -1096,11 +1096,13 @@ static hw_irq_controller dma_msi_type = .set_affinity = dma_msi_set_affinity, }; -static int __init iommu_set_interrupt(struct iommu *iommu) +static int __init iommu_set_interrupt(struct acpi_drhd_unit *drhd) { int irq, ret; + struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd); - irq = create_irq(); + irq = create_irq(rhsa ? pxm_to_node(rhsa->proximity_domain) + : NUMA_NO_NODE); if ( irq <= 0 ) { dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no irq available!\n"); @@ -1109,9 +1111,9 @@ static int __init iommu_set_interrupt(st irq_desc[irq].handler = &dma_msi_type; #ifdef CONFIG_X86 - ret = request_irq(irq, iommu_page_fault, 0, "dmar", iommu); + ret = request_irq(irq, iommu_page_fault, 0, "dmar", drhd->iommu); #else - ret = request_irq_vector(irq, iommu_page_fault, 0, "dmar", iommu); + ret = request_irq_vector(irq, iommu_page_fault, 0, "dmar", drhd->iommu); #endif if ( ret ) { @@ -2133,7 +2135,7 @@ int __init intel_vtd_setup(void) if ( !vtd_ept_page_compatible(iommu) ) iommu_hap_pt_share = 0; - ret = iommu_set_interrupt(iommu); + ret = iommu_set_interrupt(drhd); if ( ret < 0 ) { dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n"); --- 2011-12-12.orig/xen/include/asm-x86/io_apic.h 2011-10-04 08:22:17.000000000 +0200 +++ 2011-12-12/xen/include/asm-x86/io_apic.h 2011-12-06 16:24:22.000000000 +0100 @@ -213,9 +213,6 @@ static inline void ioapic_suspend(void) static inline void ioapic_resume(void) {} #endif -extern int assign_irq_vector(int irq); -extern int free_irq_vector(int vector); - unsigned highest_gsi(void); int ioapic_guest_read( unsigned long physbase, unsigned int reg, u32 *pval); --- 2011-12-12.orig/xen/include/asm-x86/irq.h 2011-11-22 10:57:10.000000000 +0100 +++ 2011-12-12/xen/include/asm-x86/irq.h 2011-12-06 16:25:22.000000000 +0100 @@ -155,8 +155,9 @@ int init_irq_data(void); void clear_irq_vector(int irq); int irq_to_vector(int irq); -int create_irq(void); +int create_irq(int node); void destroy_irq(unsigned int irq); +int assign_irq_vector(int irq, const cpumask_t *); extern void irq_complete_move(struct irq_desc *);