[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XenPPC] [PATCH] SMP and IPI support



Enable SMP and IPI support, including remote function invocation.

There are a number of subtle issues fixed in this patch.  I believe it
is a candidate for merging.

I have tested this extensively on JS21 and model 884221X JS20 blades,
and to a degree on Maple.  I would appreciate further testing on Maple
and model 884241X JS20 blades.

Signed-off-by: Amos Waterland <apw@xxxxxxxxxx>

---

 changeset   : 25c51961bd3f
 machines    : kpblade1 cso91 cso103
 pass        : 162
 fail        : 0
 transient   : 1
 total       : 163
 reliability : 100%

---

 arch/powerpc/external.c                        |   32 +++++
 arch/powerpc/mpic.c                            |    9 -
 arch/powerpc/mpic_init.c                       |   48 ++++++++
 arch/powerpc/setup.c                           |   49 +++++----
 arch/powerpc/smp.c                             |  135 +++++++++++++++++++++++--
 include/asm-powerpc/mach-default/irq_vectors.h |   22 ----
 include/asm-powerpc/smp.h                      |   17 +++
 7 files changed, 256 insertions(+), 56 deletions(-)

diff -r 3dfeb3e4a03f xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c       Fri Oct 13 11:00:32 2006 -0400
+++ b/xen/arch/powerpc/external.c       Thu Oct 26 16:49:44 2006 -0400
@@ -82,7 +82,16 @@ void do_external(struct cpu_user_regs *r
 
     vec = xen_mpic_get_irq(regs);
 
-    if (vec != -1) {
+    if (vector_is_ipi(vec)) {
+       /* do_IRQ is fundamentally broken for reliable IPI delivery.  */
+       irq_desc_t *desc = &irq_desc[vec];
+       regs->entry_vector = vec;
+       spin_lock(&desc->lock);
+       desc->handler->ack(vec);
+       desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
+       desc->handler->end(vec);
+       spin_unlock(&desc->lock);
+    } else if (vec != -1) {
         DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
         regs->entry_vector = vec;
         do_IRQ(regs);
@@ -253,3 +262,24 @@ int ioapic_guest_write(unsigned long phy
     BUG_ON(val != val);
     return 0;
 }
+
+void send_IPI_mask(cpumask_t mask, int vector)
+{
+    unsigned int cpus;
+    int const bits = 8 * sizeof(cpus);
+
+    switch(vector) {
+    case CALL_FUNCTION_VECTOR:
+    case EVENT_CHECK_VECTOR:
+       break;
+    default:
+       BUG();
+       return;
+    }
+
+    BUG_ON(NR_CPUS > bits);
+    BUG_ON(fls(mask.bits[0]) > bits);
+
+    cpus = mask.bits[0];
+    mpic_send_ipi(vector, cpus);
+}
diff -r 3dfeb3e4a03f xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c   Fri Oct 13 11:00:32 2006 -0400
+++ b/xen/arch/powerpc/mpic.c   Mon Oct 23 22:57:51 2006 -0400
@@ -27,10 +27,6 @@
 
 
 #define alloc_bootmem(x) xmalloc_bytes(x)
-#define request_irq(irq, handler, f, devname, dev_id) \
-    panic("IPI requested: %d: %p: %s: %p\n", irq, handler, devname, dev_id)
-
-typedef int irqreturn_t;
 
 #define IRQ_NONE       (0)
 #define IRQ_HANDLED    (1)
@@ -96,11 +92,6 @@ typedef int irqreturn_t;
 #endif
 #include <asm/mpic.h>
 #include <asm/smp.h>
-
-static inline void smp_message_recv(int msg, struct pt_regs *regs)
-{
-    return;
-}
 
 #ifdef DEBUG
 #define DBG(fmt...) printk(fmt)
diff -r 3dfeb3e4a03f xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c      Fri Oct 13 11:00:32 2006 -0400
+++ b/xen/arch/powerpc/mpic_init.c      Thu Oct 26 16:48:19 2006 -0400
@@ -22,6 +22,7 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <asm/mpic.h>
+#include <errno.h>
 #include "mpic_init.h"
 #include "oftree.h"
 #include "of-devtree.h"
@@ -358,6 +359,42 @@ static struct hw_interrupt_type *share_m
 
 #endif
 
+static unsigned int mpic_startup_ipi(unsigned int irq)
+{
+    mpic->hc_ipi.enable(irq);
+    return 0;
+}
+
+int request_irq(unsigned int irq,
+               irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+               unsigned long irqflags, const char * devname, void *dev_id)
+{
+    int retval;
+    struct irqaction *action;
+    void (*func)(int, void *, struct cpu_user_regs *);
+
+    action = xmalloc(struct irqaction);
+    if (!action) {
+       BUG();
+       return -ENOMEM;
+    }
+
+    /* Xen's handler prototype is slightly different than Linux's.  */
+    func = (void (*)(int, void *, struct cpu_user_regs *))handler;
+
+    action->handler = func;
+    action->name = devname;
+    action->dev_id = dev_id;
+
+    retval = setup_irq(irq, action);
+    if (retval) {
+       BUG();
+       xfree(action);
+    }
+
+    return retval;
+}
+
 struct hw_interrupt_type *xen_mpic_init(struct hw_interrupt_type *xen_irq)
 {
     unsigned int isu_size;
@@ -397,6 +434,11 @@ struct hw_interrupt_type *xen_mpic_init(
     hit = share_mpic(&mpic->hc_irq, xen_irq);
 
     printk("%s: success\n", __func__);
+
+    mpic->hc_ipi.ack = xen_irq->ack;
+    mpic->hc_ipi.startup = mpic_startup_ipi;
+    mpic_request_ipis();
+
     return hit;
 }
 
@@ -406,3 +448,9 @@ int xen_mpic_get_irq(struct cpu_user_reg
 
        return mpic_get_one_irq(mpic, regs);
 }
+
+int vector_is_ipi(int vector)
+{
+    BUG_ON(!mpic);
+    return (mpic->ipi_offset <= vector) && (vector < mpic->ipi_offset + 4);
+}
diff -r 3dfeb3e4a03f xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c  Fri Oct 13 11:00:32 2006 -0400
+++ b/xen/arch/powerpc/setup.c  Thu Oct 26 18:29:38 2006 -0400
@@ -37,6 +37,7 @@
 #include <xen/keyhandler.h>
 #include <acm/acm_hooks.h>
 #include <public/version.h>
+#include <asm/mpic.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
 #include <asm/cache.h>
@@ -88,6 +89,8 @@ struct ns16550_defaults ns16550;
 
 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
 
+static struct domain *idle_domain;
+
 volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
 
 int is_kernel_text(unsigned long addr)
@@ -159,8 +162,6 @@ static void percpu_free_unused_areas(voi
 
 static void __init start_of_day(void)
 {
-    struct domain *idle_domain;
-
     init_IRQ();
 
     scheduler_init();
@@ -175,23 +176,6 @@ static void __init start_of_day(void)
     /* for some reason we need to set our own bit in the thread map */
     cpu_set(0, cpu_sibling_map[0]);
 
-    percpu_free_unused_areas();
-
-    {
-        /* FIXME: Xen assumes that an online CPU is a schedualable
-         * CPU, but we just are not there yet. Remove this fragment when
-         * scheduling processors actually works. */
-        int cpuid;
-
-        printk("WARNING!: Taking all secondary CPUs offline\n");
-
-        for_each_online_cpu(cpuid) {
-            if (cpuid == 0)
-                continue;
-            cpu_clear(cpuid, cpu_online_map);
-        }
-    }
-
     initialize_keytable();
     /* Register another key that will allow for the the Harware Probe
      * to be contacted, this works with RiscWatch probes and should
@@ -201,7 +185,6 @@ static void __init start_of_day(void)
     timer_init();
     serial_init_postirq();
     do_initcalls();
-    schedulers_start();
 }
 
 void startup_cpu_idle_loop(void)
@@ -263,9 +246,22 @@ static int kick_secondary_cpus(int maxcp
 /* This is the first C code that secondary processors invoke.  */
 int secondary_cpu_init(int cpuid, unsigned long r4)
 {
+    struct vcpu *vcpu;
+
     cpu_initialize(cpuid);
     smp_generic_take_timebase();
+
+    /* If we are online, we must be able to ACK IPIs.  */
+    mpic_setup_this_cpu();
     cpu_set(cpuid, cpu_online_map);
+
+    vcpu = alloc_vcpu(idle_domain, cpuid, cpuid);
+    BUG_ON(vcpu == NULL);
+
+    set_current(idle_domain->vcpu[cpuid]);
+    idle_vcpu[cpuid] = current;
+    startup_cpu_idle_loop();
+
     while(1);
 }
 
@@ -340,6 +336,10 @@ static void __init __start_xen(multiboot
         debugger_trap_immediate();
 #endif
 
+    start_of_day();
+
+    mpic_setup_this_cpu();
+
     /* Deal with secondary processors.  */
     if (opt_nosmp || ofd_boot_cpu == -1) {
         printk("nosmp: leaving secondary processors spinning forever\n");
@@ -348,7 +348,11 @@ static void __init __start_xen(multiboot
         kick_secondary_cpus(max_cpus);
     }
 
-    start_of_day();
+    /* Secondary processors must be online before we call this.  */
+    schedulers_start();
+
+    /* This cannot be called before secondary cpus are marked online.  */
+    percpu_free_unused_areas();
 
     /* Create initial domain 0. */
     dom0 = domain_create(0);
@@ -406,6 +410,9 @@ static void __init __start_xen(multiboot
     console_end_sync();
 
     domain_unpause_by_systemcontroller(dom0);
+#ifdef DEBUG_IPI
+    ipi_torture_test();
+#endif
     startup_cpu_idle_loop();
 }
 
diff -r 3dfeb3e4a03f xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.c    Fri Oct 13 11:00:32 2006 -0400
+++ b/xen/arch/powerpc/smp.c    Thu Oct 26 19:19:18 2006 -0400
@@ -22,6 +22,8 @@
 #include <xen/smp.h>
 #include <asm/flushtlb.h>
 #include <asm/debugger.h>
+#include <asm/mpic.h>
+#include <asm/mach-default/irq_vectors.h>
 
 int smp_num_siblings = 1;
 int smp_num_cpus = 1;
@@ -50,7 +52,7 @@ void smp_send_event_check_mask(cpumask_t
 {
     cpu_clear(smp_processor_id(), mask);
     if (!cpus_empty(mask))
-        unimplemented();
+        send_IPI_mask(mask, EVENT_CHECK_VECTOR);
 }
 
 
@@ -65,8 +67,20 @@ int smp_call_function(void (*func) (void
 
 void smp_send_stop(void)
 {
-    unimplemented();
-}
+    BUG();
+}
+
+struct call_data_struct {
+    void (*func) (void *info);
+    void *info;
+    int wait;
+    atomic_t started;
+    atomic_t finished;
+    cpumask_t selected;
+};
+
+static DEFINE_SPINLOCK(call_lock);
+static struct call_data_struct call_data;
 
 int on_selected_cpus(
     cpumask_t selected,
@@ -75,6 +89,115 @@ int on_selected_cpus(
     int retry,
     int wait)
 {
-    unimplemented();
-    return 0;
-}
+    int t, retval = 0, nr_cpus = cpus_weight(selected);
+
+    spin_lock(&call_lock);
+
+    call_data.func = func;
+    call_data.info = info;
+    call_data.wait = wait;
+    call_data.wait = 1;  /* Until we get RCU around call_data.  */
+    atomic_set(&call_data.started, 0);
+    atomic_set(&call_data.finished, 0);
+    mb();
+
+    send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
+
+    /* We always wait for an initiation ACK from remote CPU.  */
+    for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
+       if (t && t % timebase_freq == 0) {
+           printk("IPI start stall: %d ACKS to %d SYNS\n", 
+                  atomic_read(&call_data.started), nr_cpus);
+       }
+    }
+
+    /* If told to, we wait for a completion ACK from remote CPU.  */
+    if (wait) {
+       for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) {
+           if (t && t % timebase_freq == 0) {
+               printk("IPI finish stall: %d ACKS to %d SYNS\n", 
+                      atomic_read(&call_data.finished), nr_cpus);
+           }
+       }
+    }
+
+    spin_unlock(&call_lock);
+
+    return retval;
+}
+
+void smp_call_function_interrupt(struct cpu_user_regs *regs)
+{
+
+    void (*func)(void *info) = call_data.func;
+    void *info = call_data.info;
+    int wait = call_data.wait;
+
+    atomic_inc(&call_data.started);
+    mb();
+    (*func)(info);
+    mb();
+
+    if (wait)
+       atomic_inc(&call_data.finished);
+
+    return;
+}
+
+void smp_event_check_interrupt(void)
+{
+    /* We are knocked out of NAP state at least.  */
+    return;
+}
+
+void smp_message_recv(int msg, struct cpu_user_regs *regs)
+{
+    switch(msg) {
+    case CALL_FUNCTION_VECTOR:
+       smp_call_function_interrupt(regs);
+       break;
+    case EVENT_CHECK_VECTOR:
+        smp_event_check_interrupt();
+       break;
+    default:
+       BUG();
+       break;
+    }
+}
+
+#ifdef DEBUG_IPI
+static void debug_ipi_ack(void *info)
+{
+    return;
+}
+
+void ipi_torture_test(void)
+{
+    int cpu;
+    unsigned long before, after, delta;
+    unsigned long min = ~0, max = 0, mean = 0, sum = 0, tick = 0;
+    cpumask_t mask;
+
+    cpus_clear(mask);
+
+    while (tick < 1000000) {
+       for_each_online_cpu(cpu) {
+           cpu_set(cpu, mask);
+           before = mftb();
+           on_selected_cpus(mask, debug_ipi_ack, NULL, 1, 1);
+           after = mftb();
+           cpus_clear(mask);
+
+           delta = after - before;
+           if (delta > max) max = delta;
+           if (delta < min) min = delta;
+           sum += delta;
+           tick++;
+       }
+    }
+
+    mean = sum / tick;
+
+    printk("IPI tb ticks: min = %ld max = %ld mean = %ld\n", min, max, mean);
+}
+#endif
diff -r 3dfeb3e4a03f xen/include/asm-powerpc/mach-default/irq_vectors.h
--- a/xen/include/asm-powerpc/mach-default/irq_vectors.h        Fri Oct 13 
11:00:32 2006 -0400
+++ b/xen/include/asm-powerpc/mach-default/irq_vectors.h        Wed Oct 25 
23:44:46 2006 -0400
@@ -37,26 +37,10 @@
 #define FAST_TRAP -1 /* 0x80 */
 #define FIRST_SYSTEM_VECTOR    -1
 
+#define CALL_FUNCTION_VECTOR   0x0
+#define EVENT_CHECK_VECTOR     0x1
+
 #if 0
-
-/*
- * Vectors 0-16 in some cases are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR   0xff
-#define ERROR_APIC_VECTOR      0xfe
-#define INVALIDATE_TLB_VECTOR  0xfd
-#define EVENT_CHECK_VECTOR     0xfc
-#define CALL_FUNCTION_VECTOR   0xfb
 
 #define THERMAL_APIC_VECTOR    0xf0
 /*
diff -r 3dfeb3e4a03f xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h     Fri Oct 13 11:00:32 2006 -0400
+++ b/xen/include/asm-powerpc/smp.h     Thu Oct 26 20:56:44 2006 -0400
@@ -35,4 +35,21 @@ extern cpumask_t cpu_core_map[];
 extern cpumask_t cpu_core_map[];
 extern void __devinit smp_generic_take_timebase(void);
 extern void __devinit smp_generic_give_timebase(void);
+
+#define SA_INTERRUPT   0x20000000u
+typedef int irqreturn_t;
+extern int request_irq(unsigned int irq,
+    irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+    unsigned long irqflags, const char * devname, void *dev_id);
+void smp_message_recv(int msg, struct cpu_user_regs *regs);
+void smp_call_function_interrupt(struct cpu_user_regs *regs);
+void smp_event_check_interrupt(void);
+void send_IPI_mask(cpumask_t mask, int vector);
+int vector_is_ipi(int vector);
+
+#undef DEBUG_IPI
+#ifdef DEBUG_IPI
+void ipi_torture_test(void);
 #endif
+
+#endif

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.