[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XenPPC] [PATCH] Enable smp_processor_id and present/online cpu maps



Handshake with secondary processors, move all of them out of their OF
spinloop even if there are more of them than NR_CPUS, give them a stack,
make them run C code to initialize their HIDs, initialize their r13
registers to a per-processor pointer, enable smp_processor_id and and
properly set the maps of present and online CPUs.

Note that this patch needs the "Linker script causes SMP memory corruption" 
patch to work properly on large SMP systems.  It supercedes
"Init secondary processors up to assigning r13 (respin)".  Several
subtle MP issues are fixed in this patch versus previous versions.

Tested on 2-way Maple, 2-way JS20, 4-way JS21, and 16-way systemsim-gpul, 
with NR_CPUS from 1 to 64.

Signed-off-by: Amos Waterland <apw@xxxxxxxxxx>

---

 arch/powerpc/boot_of.c                   |   67 +++++++++++++++++++++++++++++--
 arch/powerpc/powerpc64/exceptions.S      |   36 ++++++++++++++++
 arch/powerpc/powerpc64/ppc970.c          |   22 +++++-----
 arch/powerpc/setup.c                     |   56 +++++++++++++++++++++++++
 include/asm-powerpc/config.h             |    2 
 include/asm-powerpc/current.h            |    2 
 include/asm-powerpc/powerpc64/procarea.h |    1 
 include/asm-powerpc/processor.h          |    2 
 include/asm-powerpc/smp.h                |    4 -
 9 files changed, 172 insertions(+), 20 deletions(-)

diff -r 539a1e666982 xen/arch/powerpc/boot_of.c
--- a/xen/arch/powerpc/boot_of.c        Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/arch/powerpc/boot_of.c        Fri Aug 18 21:58:14 2006 -0400
@@ -31,6 +31,9 @@
 #include <asm/io.h>
 #include "exceptions.h"
 #include "of-devtree.h"
+
+/* Secondary processors use this for handshaking with main processor.  */
+volatile unsigned int __spin_ack;
 
 static ulong of_vec;
 static ulong of_msr;
@@ -955,7 +958,7 @@ static int __init boot_of_cpus(void)
 static int __init boot_of_cpus(void)
 {
     int cpus;
-    int cpu;
+    int cpu, bootcpu, logical;
     int result;
     u32 cpu_clock[2];
 
@@ -980,10 +983,65 @@ static int __init boot_of_cpus(void)
     cpu_khz /= 1000;
     of_printf("OF: clock-frequency = %ld KHz\n", cpu_khz);
 
-    /* FIXME: should not depend on the boot CPU bring the first child */
+    /* Look up which CPU we are running on right now.  */
+    result = of_getprop(bof_chosen, "cpu", &bootcpu, sizeof (bootcpu));
+    if (result == OF_FAILURE)
+        of_panic("Failed to look up boot cpu\n");
+
     cpu = of_getpeer(cpu);
-    while (cpu > 0) {
-        of_start_cpu(cpu, (ulong)spin_start, 0);
+
+    /* We want a continuous logical cpu number space.  */
+    cpu_set(0, cpu_present_map);
+    cpu_set(0, cpu_online_map);
+
+    /* Spin up all CPUS, even if there are more than NR_CPUS, because
+     * Open Firmware has them spinning on cache lines which will
+     * eventually be scrubbed, which could lead to random CPU activation.
+     */
+    for (logical = 1; cpu > 0; logical++) {
+        unsigned int cpuid, ping, pong;
+        unsigned long now, then, timeout;
+
+        if (cpu == bootcpu) {
+            of_printf("skipping boot cpu!\n");
+            continue;
+        }
+
+        result = of_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
+        if (result == OF_FAILURE)
+            of_panic("cpuid lookup failed\n");
+
+        of_printf("spinning up secondary processor #%d: ", logical);
+
+        __spin_ack = ~0x0;
+        ping = __spin_ack;
+        pong = __spin_ack;
+        of_printf("ping = 0x%x: ", ping);
+
+        mb();
+        result = of_start_cpu(cpu, (ulong)spin_start, logical);
+        if (result == OF_FAILURE)
+            of_panic("start cpu failed\n");
+
+        /* We will give the secondary processor five seconds to reply.  */
+        then = mftb();
+        timeout = then + (5 * timebase_freq);
+
+        do {
+            now = mftb();
+            if (now >= timeout) {
+                of_printf("BROKEN: ");
+                break;
+            }
+
+            mb();
+            pong = __spin_ack;
+        } while (pong == ping);
+        of_printf("pong = 0x%x\n", pong);
+
+        if (pong != ping)
+            cpu_set(logical, cpu_present_map);
+
         cpu = of_getpeer(cpu);
     }
     return 1;
@@ -1031,6 +1089,7 @@ multiboot_info_t __init *boot_of_init(
     boot_of_rtas();
 
     /* end of OF */
+    of_printf("Quiescing Open Firmware ...\n");
     of_call("quiesce", 0, 0, NULL);
 
     return &mbi;
diff -r 539a1e666982 xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S   Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/arch/powerpc/powerpc64/exceptions.S   Fri Aug 18 21:06:13 2006 -0400
@@ -514,6 +514,42 @@ _GLOBAL(sleep)
     mtmsrd r3
     blr
 
+/* The primary processor issues a firmware call to spin us up at this
+ * address, passing our CPU number in r3.  We only need a function
+ * entry point instead of a descriptor since this is never called from
+ * C code.
+ */    
     .globl spin_start
 spin_start:
+    /* Write our processor number as an acknowledgment that we're alive.  */
+    LOADADDR(r14, __spin_ack)
+    stw r3, 0(r14)
+    sync
+    /* If NR_CPUS is too small, we should just spin forever.  */
+    LOADADDR(r15, NR_CPUS)
+    cmpd r3, r15
+    blt 2f     
     b .
+    /* Find our index in the array of processor_area struct pointers.  */
+2:  LOADADDR(r14, global_cpu_table)
+    muli r15, r3, 8
+    add r14, r14, r15
+    /* Spin until the pointer for our processor goes valid.  */
+1:  ld r15, 0(r14)
+    cmpldi r15, 0
+    beq 1b
+    /* Dereference the pointer and load our stack pointer.  */
+    isync
+    ld r1, PAREA_stack(r15)
+    li r14, STACK_FRAME_OVERHEAD
+    sub r1, r1, r14
+    /* We must not speculatively execute beyond these loads.  */
+    LOADADDR(r14, secondary_cpu_init)
+    ld r2, 8(r14)
+    ld r11, 0(r14)
+    mtctr r11
+    isync
+    /* Jump into C code now.  */
+    bctrl
+    nop
+    b .
diff -r 539a1e666982 xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c       Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/arch/powerpc/powerpc64/ppc970.c       Fri Aug 18 17:28:04 2006 -0400
@@ -31,6 +31,8 @@
 
 #undef SERIALIZE
 
+extern volatile struct processor_area * volatile global_cpu_table[];
+
 unsigned int cpu_rma_order(void)
 {
     /* XXX what about non-HV mode? */
@@ -38,18 +40,15 @@ unsigned int cpu_rma_order(void)
     return rma_log_size - PAGE_SHIFT;
 }
 
-void cpu_initialize(void)
+void cpu_initialize(int cpuid)
 {
-    ulong stack;
+    ulong r1, r2;
+    __asm__ __volatile__ ("mr %0, 1" : "=r" (r1));
+    __asm__ __volatile__ ("mr %0, 2" : "=r" (r2));
 
-    parea = xmalloc(struct processor_area);
+    /* This is SMP safe because the compiler must use r13 for it.  */
+    parea = global_cpu_table[cpuid];
     ASSERT(parea != NULL);
-
-    stack = (ulong)alloc_xenheap_pages(STACK_ORDER);
-
-    ASSERT(stack != 0);
-    parea->hyp_stack_base = (void *)(stack + STACK_SIZE);
-    printk("stack is here: %p\n", parea->hyp_stack_base);
 
     mthsprg0((ulong)parea); /* now ready for exceptions */
 
@@ -79,7 +78,10 @@ void cpu_initialize(void)
     s |= 1UL << (63-3);     /* ser-gp */
     hid0.word |= s;
 #endif
-    printk("hid0: 0x%016lx\n", hid0.word);
+
+    printk("CPU #%d: Hello World! SP = %lx TOC = %lx HID0 = %lx\n", 
+           smp_processor_id(), r1, r2, hid0.word);
+
     mthid0(hid0.word);
 
     union hid1 hid1;
diff -r 539a1e666982 xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c  Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/arch/powerpc/setup.c  Sat Aug 19 02:24:40 2006 -0400
@@ -65,6 +65,7 @@ ulong oftree_end;
 
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
+cpumask_t cpu_present_map;
 
 /* XXX get this from ISA node in device tree */
 void *vgabase;
@@ -76,6 +77,13 @@ extern void idle_loop(void);
 
 /* move us to a header file */
 extern void initialize_keytable(void);
+
+static void init_parea(int cpuid);
+
+volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
+
+static int kick_secondary_cpus(void);
+int secondary_cpu_init(int cpuid, unsigned long);
 
 int is_kernel_text(unsigned long addr)
 {
@@ -362,13 +370,16 @@ static void __init __start_xen(multiboot
 
     percpu_init_areas();
 
-    cpu_initialize();
+    init_parea(0);
+    cpu_initialize(0);
 
 #ifdef CONFIG_GDB
     initialise_gdb();
     if (opt_earlygdb)
         debugger_trap_immediate();
 #endif
+
+    kick_secondary_cpus();
 
     start_of_day();
 
@@ -445,6 +456,49 @@ extern void arch_get_xen_caps(xen_capabi
 extern void arch_get_xen_caps(xen_capabilities_info_t info);
 void arch_get_xen_caps(xen_capabilities_info_t info)
 {
+}
+
+static void init_parea(int cpuid)
+{
+    /* Be careful not to shadow the global variable.  */
+    volatile struct processor_area *pa;
+    void *stack;
+
+    pa = xmalloc(struct processor_area);
+    if (pa == NULL)
+        panic("failed to allocate parea\n");
+
+    stack = alloc_xenheap_pages(STACK_ORDER);
+    if (stack == NULL)
+        panic("failed to allocate stack\n");
+
+    pa->whoami = cpuid;
+    pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+
+    /* This store has the effect of invoking secondary_cpu_init.  */
+    global_cpu_table[cpuid] = pa;
+    mb();
+}
+
+static int kick_secondary_cpus()
+{
+    int cpuid;
+
+    for_each_present_cpu(cpuid) {
+        if (cpuid == 0)
+            continue;
+        init_parea(cpuid);
+        cpu_set(cpuid, cpu_online_map);
+    }
+
+    return 0;
+}
+
+/* This is the first C code that secondary processors invoke.  */
+int secondary_cpu_init(int cpuid, unsigned long r4)
+{
+    cpu_initialize(cpuid);
+    while(1);
 }
 
 /*
diff -r 539a1e666982 xen/include/asm-powerpc/config.h
--- a/xen/include/asm-powerpc/config.h  Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/include/asm-powerpc/config.h  Sat Aug 19 02:26:18 2006 -0400
@@ -51,7 +51,7 @@ extern char __bss_start[];
 #define CONFIG_GDB 1
 #define CONFIG_SMP 1
 #define CONFIG_PCI 1
-#define NR_CPUS 1
+#define NR_CPUS 16
 
 #ifndef ELFSIZE
 #define ELFSIZE 64
diff -r 539a1e666982 xen/include/asm-powerpc/current.h
--- a/xen/include/asm-powerpc/current.h Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/include/asm-powerpc/current.h Fri Aug 18 15:08:29 2006 -0400
@@ -27,7 +27,7 @@
 
 struct vcpu;
 
-register struct processor_area *parea asm("r13");
+register volatile struct processor_area *parea asm("r13");
 
 static inline struct vcpu *get_current(void)
 {
diff -r 539a1e666982 xen/include/asm-powerpc/powerpc64/procarea.h
--- a/xen/include/asm-powerpc/powerpc64/procarea.h      Fri Aug 18 14:07:50 
2006 -0400
+++ b/xen/include/asm-powerpc/powerpc64/procarea.h      Fri Aug 18 21:01:52 
2006 -0400
@@ -28,6 +28,7 @@ struct gdb_state;
 
 struct processor_area
 {
+    unsigned int whoami;
     struct vcpu *cur_vcpu;
     void *hyp_stack_base;
     ulong saved_regs[2];
diff -r 539a1e666982 xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h       Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/include/asm-powerpc/processor.h       Fri Aug 18 15:08:29 2006 -0400
@@ -40,7 +40,7 @@ extern void show_registers(struct cpu_us
 extern void show_registers(struct cpu_user_regs *);
 extern void show_execution_state(struct cpu_user_regs *);
 extern unsigned int cpu_rma_order(void);
-extern void cpu_initialize(void);
+extern void cpu_initialize(int cpuid);
 extern void cpu_init_vcpu(struct vcpu *);
 extern void save_cpu_sprs(struct vcpu *);
 extern void load_cpu_sprs(struct vcpu *);
diff -r 539a1e666982 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h     Fri Aug 18 14:07:50 2006 -0400
+++ b/xen/include/asm-powerpc/smp.h     Fri Aug 18 22:44:11 2006 -0400
@@ -28,8 +28,8 @@ extern int smp_num_siblings;
 
 /* revisit when we support SMP */
 #define get_hard_smp_processor_id(i) i
-#define hard_smp_processor_id() 0
-#define raw_smp_processor_id() 0
+#define raw_smp_processor_id() (parea->whoami)
+#define hard_smp_processor_id() raw_smp_processor_id()
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
 

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.