[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC 5/7] xen: arm: rewrite start of day page table and cpu bring up



On 09/17/2013 02:40 AM, Ian Campbell wrote:
> This is unfortunately a rather large monolithic patch.
> 
> Rather than bringing up all CPUs in lockstep as we setup paging and relocate
> Xen instead create a simplified set of dedicated boot time pagetables.
> 
> This allows secondary CPUs to remain powered down or in the firmware until we
> actually want to enable them. The bringup is now done later on in C and can be
> driven by DT etc. I have included code for the vexpress platform, but other
> platforms will need to be added.
> 
> The mechanism for deciding how to bring up a CPU differs between arm32 and
> arm64. On arm32 it is essentially a per-platform property, with the exception
> of PSCI which can be implemented globally (but isn't here). On arm64 there is 
> a
> per-cpu property in the device tree.
> 
> Secondary CPUs are brought up directly into the relocated Xen image, instead 
> of
> relying on being able to launch on the unrelocated Xen and hoping that it
> hasn't been clobbered.
> 
> As part of this change drop support for switching from secure mode to NS HYP 
> as
> well as the early CPU kick. Xen now requires that it is launched in NS HYP
> mode and that firmware configure things such that secondary CPUs can be woken
> up by a primarly CPU in HYP mode. This may require fixes to bootloaders or the
> use of a boot wrapper.
> 
> The changes done here (re)exposed an issue with relocating Xen and the 
> compiler
> spilling values to the stack between the copy and the actual switch to the
> relocaed copy of Xen in setup_pagetables. Therefore switch to doing the copy
> and switch in a single asm function where we can control precisely what gets
> spilled to the stack etc.
> 
> Since we now have a separate set of boot pagetables it is much easier to build
> the real Xen pagetables inplace before relocating rather than the more complex
> approach of rewriting the pagetables in the relocated copy before switching.
> 
> This will also enable Xen to be loaded above the 4GB boundary on 64-bit.
> Still TODO:
>   - integrate with Julien's "Dissociate logical and hardware CPU ID"
>   - cpu initialisation for other platforms
> 
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
>  xen/arch/arm/arm32/Makefile              |   2 +-
>  xen/arch/arm/arm32/head.S                | 349 ++++++++++++++++++-----------
>  xen/arch/arm/arm32/mode_switch.S         | 158 -------------
>  xen/arch/arm/arm32/smpboot.c             |  24 ++
>  xen/arch/arm/arm64/Makefile              |   2 +-
>  xen/arch/arm/arm64/head.S                | 367 
> ++++++++++++++++++++++---------
>  xen/arch/arm/arm64/mode_switch.S         |  89 --------
>  xen/arch/arm/arm64/smpboot.c             |  80 +++++++
>  xen/arch/arm/mm.c                        | 184 +++++++++-------
>  xen/arch/arm/platform.c                  |  18 ++
>  xen/arch/arm/platforms/vexpress.c        |  39 ++++
>  xen/arch/arm/setup.c                     |   1 -
>  xen/arch/arm/smpboot.c                   |  51 ++---
>  xen/include/asm-arm/platform.h           |   9 +
>  xen/include/asm-arm/platforms/exynos5.h  |  14 --
>  xen/include/asm-arm/platforms/vexpress.h |  11 -
>  xen/include/asm-arm/smp.h                |  12 +-
>  17 files changed, 786 insertions(+), 624 deletions(-)
>  delete mode 100644 xen/arch/arm/arm32/mode_switch.S
>  create mode 100644 xen/arch/arm/arm32/smpboot.c
>  delete mode 100644 xen/arch/arm/arm64/mode_switch.S
>  create mode 100644 xen/arch/arm/arm64/smpboot.c
> 
> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> index 18522dc..aacdcb9 100644
> --- a/xen/arch/arm/arm32/Makefile
> +++ b/xen/arch/arm/arm32/Makefile
> @@ -1,11 +1,11 @@
>  subdir-y += lib
>  
>  obj-y += entry.o
> -obj-y += mode_switch.o
>  obj-y += proc-v7.o
>  
>  obj-y += traps.o
>  obj-y += domain.o
>  obj-y += vfp.o
> +obj-y += smpboot.o
>  
>  obj-$(EARLY_PRINTK) += debug.o
> diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
> index b8334e2..abaacfd 100644
> --- a/xen/arch/arm/arm32/head.S
> +++ b/xen/arch/arm/arm32/head.S
> @@ -37,6 +37,25 @@
>  #include EARLY_PRINTK_INC
>  #endif
>  
> +/*
> + * Common register usage in this file:
> + *   r0  -
> + *   r1  -
> + *   r2  -
> + *   r3  -
> + *   r4  -
> + *   r5  -
> + *   r6  -
> + *   r7  - CPUID
> + *   r8  - DTB address (boot CPU only)
> + *   r9  - paddr(start)
> + *   r10 - phys offset
> + *   r11 - UART address
> + *   r12 - !!is_boot_cpu
> + *   r13 - SP
> + *   r14 - LR
> + *   r15 - PC
> + */
>  /* Macro to print a string to the UART, if there is one.
>   * Clobbers r0-r3. */
>  #ifdef EARLY_PRINTK
> @@ -77,7 +96,6 @@ past_zImage:
>          cpsid aif                    /* Disable all interrupts */
>  
>          /* Save the bootloader arguments in less-clobberable registers */
> -        mov   r5, r1                 /* r5: ARM-linux machine type */
>          mov   r8, r2                 /* r8 := DTB base address */
>  
>          /* Find out where we are */
> @@ -91,53 +109,39 @@ past_zImage:
>          add   r8, r10                /* r8 := paddr(DTB) */
>  #endif
>  
> -        /* Are we the boot CPU? */
> -        mov   r12, #0                /* r12 := CPU ID */
> -        mrc   CP32(r0, MPIDR)
> -        tst   r0, #(1<<31)           /* Multiprocessor extension supported? 
> */
> -        beq   boot_cpu
> -        tst   r0, #(1<<30)           /* Uniprocessor system? */
> -        bne   boot_cpu
> -        bics  r12, r0, #(0xff << 24) /* Mask out flags to get CPU ID */
> -        beq   boot_cpu               /* If we're CPU 0, boot now */
> -
> -        /* Non-boot CPUs wait here to be woken up one at a time. */
> -1:      dsb
> -        ldr   r0, =smp_up_cpu        /* VA of gate */
> -        add   r0, r0, r10            /* PA of gate */
> -        ldr   r1, [r0]               /* Which CPU is being booted? */
> -        teq   r1, r12                /* Is it us? */
> -        wfene
> -        bne   1b
> +        mov   r12, #0                /* r12 := !!is_boot_cpu */

Do you mean !is_boot_cpu?

> +
> +        b     common_start
> +
> +GLOBAL(init_secondary)
> +        cpsid aif                    /* Disable all interrupts */
> +
> +        /* Find out where we are */
> +        ldr   r0, =start
> +        adr   r9, start              /* r9  := paddr (start) */
> +        sub   r10, r9, r0            /* r10 := phys-offset */
> +
> +        mov   r12, #1                /* r12 := !!is_boot_cpu */

Same here.

> +
> +common_start:
> +        mov   r7, #0                 /* r13 := CPU ID */
> +        mrc   CP32(r1, MPIDR)
> +        tst   r1, #(1<<31)           /* Multiprocessor extension supported? 
> */
> +        beq   1f
> +        tst   r1, #(1<<30)           /* Uniprocessor system? */
> +        bne   1f
> +        bic   r7, r1, #(0xff << 24)  /* Mask out flags to get CPU ID */
> +1:
>  
> -boot_cpu:
>  #ifdef EARLY_PRINTK
>          ldr   r11, =EARLY_UART_BASE_ADDRESS  /* r11 := UART base address */
> -        teq   r12, #0                   /* CPU 0 sets up the UART too */
> +        teq   r12, #0                /* Boot CPU sets up the UART too */
>          bleq  init_uart
>          PRINT("- CPU ")
> -        mov   r0, r12
> +        mov   r0, r7
>          bl    putn
>          PRINT(" booting -\r\n")
>  #endif
> -        /* Secondary CPUs doesn't have machine ID
> -         *  - Store machine ID on boot CPU
> -         *  - Load machine ID on secondary CPUs
> -         * Machine ID is needed in kick_cpus and enter_hyp_mode */
> -        ldr   r0, =machine_id           /* VA of machine_id */
> -        add   r0, r0, r10               /* PA of machine_id */
> -        teq   r12, #0
> -        streq r5, [r0]                  /* On boot CPU save machine ID */
> -        ldrne r5, [r0]                  /* If non boot cpu r5 := machine ID 
> */
> -
> -        /* Wake up secondary cpus */
> -        teq   r12, #0
> -        bleq  kick_cpus
> -
> -        PRINT("- Machine ID ")
> -        mov   r0, r5
> -        bl    putn
> -        PRINT(" -\r\n")
>  
>          /* Check that this CPU has Hyp mode */
>          mrc   CP32(r0, ID_PFR1)
> @@ -146,29 +150,19 @@ boot_cpu:
>          beq   1f
>          PRINT("- CPU doesn't support the virtualization extensions -\r\n")
>          b     fail
> -1:
> -        /* Check if we're already in it */
> -        mrs   r0, cpsr
> +
> +        /* Check that we're already in Hyp mode */
> +1:      mrs   r0, cpsr
>          and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
>          teq   r0, #0x1a              /* Hyp Mode? */
> -        bne   1f
> -        PRINT("- Started in Hyp mode -\r\n")
> -        b     hyp
> -1:
> -        /* Otherwise, it must have been Secure Supervisor mode */
> -        mrc   CP32(r0, SCR)
> -        tst   r0, #0x1               /* Not-Secure bit set? */
> -        beq   1f
> -        PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
> +        beq   hyp
> +
> +        /* OK, we're boned. */
> +        PRINT("- Xen must be entered in NS Hyp mode -\r\n" \
> +              "- Please update the bootloader -\r\n")
>          b     fail
> -1:
> -        /* OK, we're in Secure state. */
> -        PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
> -        ldr   r0, =enter_hyp_mode    /* VA of function */
> -        adr   lr, hyp                /* Set return address for call */
> -        add   pc, r0, r10            /* Call PA of function */
>  
> -hyp:
> +hyp:    PRINT("- Xen starting in Hyp mode -\r\n")
>  
>          /* Zero BSS On the boot CPU to avoid nasty surprises */
>          teq   r12, #0
> @@ -242,18 +236,12 @@ cpu_init_done:
>          ldr   r0, =(HSCTLR_BASE|SCTLR_A)
>          mcr   CP32(r0, HSCTLR)
>  
> -        /* Write Xen's PT's paddr into the HTTBR */
> -        ldr   r4, =boot_pgtable
> -        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
> -        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
> -        mcrr  CP64(r4, r5, HTTBR)
> -
> -        /* Non-boot CPUs don't need to rebuild the pagetable */
> -        teq   r12, #0
> -        bne   pt_ready
> -
>          /* console fixmap */
>  #if defined(EARLY_PRINTK)
> +        /* Non-boot CPUs don't need to rebuild the fixmap */
> +        teq   r12, #0
> +        bne   1f
> +
>          ldr   r1, =xen_fixmap
>          add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
>          mov   r3, #0
> @@ -262,48 +250,77 @@ cpu_init_done:
>          orr   r2, r2, #PT_UPPER(DEV_L3)
>          orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including 
> UART */
>          strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first 
> fixmap's slot */
> +1:
>  #endif
>  
> -        /* Build the baseline idle pagetable's first-level entries */
> -        ldr   r1, =xen_second
> -        add   r1, r1, r10            /* r1 := paddr (xen_second) */
> +        /*
> +         * Rebuild the boot pagetable's first-level entries. The structure
> +         * is described in mm.c.
> +         *
> +         * After the CPU enables paging it will add the fixmap mapping
> +         * to these page tables, however this may clash with the 1:1
> +         * mapping. So each CPU must rebuild the page tables here with
> +         * the 1:1 in place.
> +         */
> +
> +        /* Write Xen's PT's paddr into the HTTBR */
> +        ldr   r4, =boot_pgtable
> +        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */

s/xen_pagetable/boot_pgtable/?

> +        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */

Same here.

> +        mcrr  CP64(r4, r5, HTTBR)
> +
> +        /* Setup boot_pgtable: */
> +        ldr   r1, =boot_second
> +        add   r1, r1, r10            /* r1 := paddr (boot_second) */
>          mov   r3, #0x0
> +
> +        /* ... map boot_second in boot_pgtable[0] */
>          orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of xen_second */
s/xen_second/boot_second/?

>          orr   r2, r2, #PT_LOWER(PT)  /* (+ rights for linear PT) */
>          strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
> -        add   r2, r2, #0x1000
> -        strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
> -        add   r2, r2, #0x1000
> -        strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
> -        add   r2, r2, #0x1000
> -        strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
> -
> -        /* Now set up the second-level entries */
> +
> +        /* ... map of paddr(start) in boot_pgtable */
> +        lsrs  r1, r9, #30            /* Offset of base paddr in boot_pgtable 
> */
> +        beq   1f                     /* If it is in slot 0 then map in 
> xen_second

Same here.

> +                                      * later on */
> +        lsl   r2, r1, #30            /* Base address for 1GB mapping */
> +        orr   r2, r2, #PT_UPPER(MEM) /* r2:r3 := section map */
> +        orr   r2, r2, #PT_LOWER(MEM)
> +        lsl   r1, r1, #3             /* r1 := Slot offset */
> +        strd  r2, r3, [r4, r1]       /* Mapping of paddr(start) */
> +
> +1:      /* Setup boot_second: */
> +        ldr   r4, =boot_second
> +        add   r4, r4, r10            /* r1 := paddr (xen_second) */

Same here.

> +
> +        lsr   r2, r9, #20            /* Base address for 2MB mapping */
> +        lsl   r2, r2, #20
> +        orr   r2, r2, #PT_UPPER(MEM) /* r2:r3 := section map */
> +        orr   r2, r2, #PT_LOWER(MEM)
> +
> +        /* ... map of vaddr(start) in boot_second */
> +        ldr   r1, =start
> +        lsr   r1, #18                /* Slot for vaddr(start) */
> +        strd  r2, r3, [r4, r1]       /* Map vaddr(start) */
> +
> +        /* ... map of paddr(start) in boot_second */
>          orr   r2, r9, #PT_UPPER(MEM)
>          orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB normal map of Xen */
> -        mov   r4, r9, lsr #18        /* Slot for paddr(start) */
> -        strd  r2, r3, [r1, r4]       /* Map Xen there */
> -        ldr   r4, =start
> -        lsr   r4, #18                /* Slot for vaddr(start) */
> -        strd  r2, r3, [r1, r4]       /* Map Xen there too */
>  
> -        /* xen_fixmap pagetable */
> -        ldr   r2, =xen_fixmap
> -        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
> -        orr   r2, r2, #PT_UPPER(PT)
> -        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
> -        add   r4, r4, #8
> -        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
> +        lsrs  r1, r9, #30            /* Base paddr */
> +        bne   1f                     /* If paddr(start) is not in slot 0
> +                                      * then the mapping was done in
> +                                      * boot_pgtable above */
>  
> -        mov   r3, #0x0
> -        lsr   r2, r8, #21
> -        lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
> -        orr   r2, r2, #PT_UPPER(MEM)
> -        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
> -        add   r4, r4, #8
> -        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
> +        mov   r1, r9, lsr #18        /* Slot for paddr(start) */
> +        strd  r2, r3, [r4, r1]       /* Map Xen there */
> +
> +        /* Defer fixmap and dtb mapping until after paging enabled, to
> +         * avoid them clashing with the 1:1 mapping.
> +         */
> +
> +1:      /* boot pagetable setup complete */
>  
> -pt_ready:
>          PRINT("- Turning on paging -\r\n")
>  
>          ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
> @@ -315,22 +332,47 @@ pt_ready:
>          mov   pc, r1                 /* Get a proper vaddr into PC */
>  paging:
>  
> -
> -#ifdef EARLY_PRINTK
> +        /* Now we can install the fixmap and dtb mappings, since we
> +         * don't need the 1:1 map any more */
> +        dsb   sy
> +        ldr   r1, =boot_second
> +#if defined(EARLY_PRINTK)
> +        /* xen_fixmap pagetable */

Can you add a comment to explain why we don't need to map the fixmap
when early printk is not enabled?

> +        ldr   r2, =xen_fixmap
> +        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
> +        orr   r2, r2, #PT_UPPER(PT)
> +        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
> +        ldr   r4, =FIXMAP_ADDR(0)
> +        mov   r4, r4, lsr #18        /* r4 := Slot for FIXMAP(0) */
> +        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
>          /* Use a virtual address to access the UART. */
>          ldr   r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
>  #endif
> +        /* Map the DTB in the boot misc slot */
> +        teq   r12, #0                /* Only on boot CPU */
> +        bne   1f
> +
> +        mov   r3, #0x0
> +        lsr   r2, r8, #21
> +        lsl   r2, r2, #21            /* r2: 2MB-aligned paddr of DTB */
> +        orr   r2, r2, #PT_UPPER(MEM)
> +        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
> +        ldr   r4, =BOOT_MISC_VIRT_START
> +        mov   r4, r4, lsr #18        /* Slot for BOOT_MISC_VIRT_START */
> +        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
> +        dsb   sy
>  
> -        PRINT("- Ready -\r\n")
> +1:      PRINT("- Ready -\r\n")
>  
>          /* The boot CPU should go straight into C now */
>          teq   r12, #0
>          beq   launch
>  
> -        /* Non-boot CPUs need to move on to the relocated pagetables */
> -        mov   r0, #0
> -        ldr   r4, =boot_ttbr         /* VA of HTTBR value stashed by CPU 0 */
> -        add   r4, r4, r10            /* PA of it */
> +        /* Non-boot CPUs need to move on to proper pagetables,
> +         * temporarily use cpu0's table and switch to our own in
> +         * mmu_init_secondary_cpu.
> +         */
> +        ldr   r4, =init_ttbr         /* VA of HTTBR value stashed by CPU 0 */
>          ldrd  r4, r5, [r4]           /* Actual value */
>          dsb
>          mcrr  CP64(r4, r5, HTTBR)
> @@ -353,18 +395,6 @@ paging:
>          mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
>          dsb
>  
> -        /* Here, the non-boot CPUs must wait again -- they're now running on
> -         * the boot CPU's pagetables so it's safe for the boot CPU to
> -         * overwrite the non-relocated copy of Xen.  Once it's done that,
> -         * and brought up the memory allocator, non-boot CPUs can get their
> -         * own stacks and enter C. */
> -1:      wfe
> -        dsb
> -        ldr   r0, =smp_up_cpu
> -        ldr   r1, [r0]               /* Which CPU is being booted? */
> -        teq   r1, r12                /* Is it us? */
> -        bne   1b
> -
>  launch:
>          ldr   r0, =init_stack        /* Find the boot-time stack */
>          ldr   sp, [r0]
> @@ -372,7 +402,7 @@ launch:
>          sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
>          mov   r0, r10                /* Marshal args: - phys_offset */
>          mov   r1, r8                 /*               - DTB address */
> -        movs  r2, r12                /*               - CPU ID */
> +        movs  r2, r7                 /*               - CPU ID */
>          beq   start_xen              /* and disappear into the land of C */
>          b     start_secondary        /* (to the appropriate entry point) */
>  
> @@ -382,6 +412,82 @@ fail:   PRINT("- Boot failed -\r\n")
>  1:      wfe
>          b     1b
>  
> +/*
> + * Copy Xen to new location and switch TTBR
> + * r1:r0       ttbr
> + * r2          source address
> + * r3          destination address
> + * [sp]=>r4    length
> + *
> + * Source and destination must be word aligned, length is rounded up
> + * to a 16 byte boundary.
> + *
> + * MUST BE VERY CAREFUL when saving things to RAM over the copy
> + */
> +ENTRY(relocate_xen)
> +        push {r4,r5,r6,r7,r8,r9,r10,r11}
> +
> +        ldr   r4, [sp, #8*4]                     /* Get 4th argument from 
> stack */
> +
> +        /* Copy 16 bytes at a time using:
> +         * r5:  counter
> +         * r6:  data
> +         * r7:  data
> +         * r8:  data
> +         * r9:  data
> +         * r10: source
> +         * r11: destination
> +         */
> +        mov   r5, r4
> +        mov   r10, r2
> +        mov   r11, r3
> +1:      ldmia r10!, {r6, r7, r8, r9}
> +        stmia r11!, {r6, r7, r8, r9}
> +
> +        subs    r5, r5, #16
> +        bgt   1b
> +
> +        /* Flush destination from dcache using:
> +         * r5: counter
> +         * r6: step
> +         * r7: vaddr
> +         */
> +        dsb   sy        /* So the CPU issues all writes to the range */
> +
> +        mov   r5, r4
> +        ldr   r6, =cacheline_bytes /* r6 := step */
> +        ldr   r6, [r6]
> +        mov   r7, r3
> +
> +1:      mcr   CP32(r7, DCCMVAC)
> +
> +        add   r7, r7, r6
> +        subs  r5, r5, r6
> +        bgt   1b
> +
> +        dsb   sy        /* So we know the flushes happen before continuing */
> +
> +        isb             /* Ensure synchronization with previous changes to 
> text */
> +        mcr   CP32(r0, TLBIALLH)       /* Flush hypervisor TLB */
> +        mcr   CP32(r0, ICIALLU)        /* Flush I-cache */
> +        mcr   CP32(r0, BPIALL)         /* Flush branch predictor */
> +        dsb                            /* Ensure completion of TLB+BP flush 
> */
> +        isb
> +
> +        mcrr  CP64(r0, r1, HTTBR)
> +
> +        dsb   sy /* ensure memory accesses do not cross over the TTBR0 write 
> */
> +
> +        isb             /* Ensure synchronization with previous changes to 
> text */
> +        mcr   CP32(r0, TLBIALLH)       /* Flush hypervisor TLB */
> +        mcr   CP32(r0, ICIALLU)        /* Flush I-cache */
> +        mcr   CP32(r0, BPIALL)         /* Flush branch predictor */
> +        dsb                      /* Ensure completion of TLB+BP flush */
> +        isb
> +
> +        pop {r4, r5,r6,r7,r8,r9,r10,r11}
> +
> +        mov pc, lr
>  
>  #ifdef EARLY_PRINTK
>  /* Bring up the UART.
> @@ -438,9 +544,6 @@ putn:   mov   pc, lr
>  
>  #endif /* !EARLY_PRINTK */
>  
> -/* Place holder for machine ID */
> -machine_id: .word 0x0
> -
>  /*
>   * Local variables:
>   * mode: ASM
> diff --git a/xen/arch/arm/arm32/mode_switch.S 
> b/xen/arch/arm/arm32/mode_switch.S
> deleted file mode 100644
> index 2cd5888..0000000
> --- a/xen/arch/arm/arm32/mode_switch.S
> +++ /dev/null
> @@ -1,158 +0,0 @@
> -/*
> - * xen/arch/arm/mode_switch.S
> - *
> - * Start-of day code to take a CPU from Secure mode to Hyp mode.
> - *
> - * Tim Deegan <tim@xxxxxxx>
> - * Copyright (c) 2011-2012 Citrix Systems.
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - */
> -
> -#include <asm/config.h>
> -#include <asm/page.h>
> -#include <asm/platforms/vexpress.h>
> -#include <asm/platforms/exynos5.h>
> -#include <asm/asm_defns.h>
> -#include <asm/gic.h>
> -
> -/* Wake up secondary cpus
> - * This code relies on Machine ID and only works for Vexpress and the Arndale
> - * TODO: Move this code either later (via platform specific desc) or in a 
> bootwrapper
> - * r5: Machine ID
> - * Clobber r0 r2 */
> -GLOBAL(kick_cpus)
> -        ldr   r0, =MACH_TYPE_SMDK5250
> -        teq   r5, r0                          /* Are we running on the 
> arndale? */
> -        beq   kick_cpus_arndale
> -        /* otherwise versatile express */
> -        /* write start paddr to v2m sysreg FLAGSSET register */
> -        ldr   r0, =(V2M_SYS_MMIO_BASE)        /* base V2M sysreg MMIO 
> address */
> -        dsb
> -        mov   r2, #0xffffffff
> -        str   r2, [r0, #(V2M_SYS_FLAGSCLR)]
> -        dsb
> -        ldr   r2, =start
> -        add   r2, r2, r10
> -        str   r2, [r0, #(V2M_SYS_FLAGSSET)]
> -        dsb
> -        ldr   r2, =V2M_GIC_BASE_ADDRESS       /* r2 := VE gic base address */
> -        b     kick_cpus_sgi
> -kick_cpus_arndale:
> -        /* write start paddr to CPU 1 sysreg register */
> -        ldr   r0, =(S5P_PA_SYSRAM)
> -        ldr   r2, =start
> -        add   r2, r2, r10
> -        str   r2, [r0]
> -        dsb
> -        ldr   r2, =EXYNOS5_GIC_BASE_ADDRESS   /* r2 := Exynos5 gic base 
> address */
> -kick_cpus_sgi:
> -        /* send an interrupt */
> -        ldr   r0, =GIC_DR_OFFSET              /* GIC distributor offset */
> -        add   r0, r2                          /* r0 := r0 + gic base address 
> */
> -        mov   r2, #0x1
> -        str   r2, [r0, #(GICD_CTLR * 4)]      /* enable distributor */
> -        mov   r2, #0xfe0000
> -        str   r2, [r0, #(GICD_SGIR * 4)]      /* send IPI to everybody, SGI0 
> = Event check */
> -        dsb
> -        str   r2, [r0, #(GICD_CTLR * 4)]      /* disable distributor */
> -        mov   pc, lr
> -
> -
> -/* Get up a CPU into Hyp mode.  Clobbers r0-r3.
> - *
> - * r5: Machine ID
> - * r12: CPU number
> - *
> - * This code is specific to the VE model/Arndale, and not intended to be used
> - * on production systems.  As such it's a bit hackier than the main
> - * boot code in head.S.  In future it will be replaced by better
> - * integration with the bootloader/firmware so that Xen always starts
> - * in Hyp mode.
> - * Clobber r0 - r4 */
> -GLOBAL(enter_hyp_mode)
> -        mov   r3, lr                 /* Put return address in non-banked reg 
> */
> -        cpsid aif, #0x16             /* Enter Monitor mode */
> -        mrc   CP32(r0, SCR)
> -        orr   r0, r0, #0x100         /* Set HCE */
> -        orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
> -        bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
> -        mcr   CP32(r0, SCR)
> -
> -        ldr   r2, =MACH_TYPE_SMDK5250   /* r4 := Arndale machine ID */
> -        /* By default load Arndale defaults values */
> -        ldr   r0, =EXYNOS5_TIMER_FREQUENCY  /* r0 := timer's frequency */
> -        ldr   r1, =EXYNOS5_GIC_BASE_ADDRESS /* r1 := GIC base address */
> -        /* If it's not the Arndale machine ID, load VE values */
> -        teq   r5, r2
> -        ldrne r0, =V2M_TIMER_FREQUENCY
> -        ldrne r1, =V2M_GIC_BASE_ADDRESS
> -
> -        /* Ugly: the system timer's frequency register is only
> -         * programmable in Secure state.  Since we don't know where its
> -         * memory-mapped control registers live, we can't find out the
> -         * right frequency. */
> -        mcr   CP32(r0, CNTFRQ)
> -
> -        mrc   CP32(r0,NSACR)
> -        ldr   r4, =0x3fff            /* Allow access to all co-processors in 
> NS mode */
> -        orr   r0, r0, r4
> -        orr   r0, r0, #(1<<18)       /* CA7/CA15: Allow access to ACTLR.SMP 
> in NS mode */
> -        mcr   CP32(r0, NSACR)
> -
> -        add   r0, r1, #GIC_DR_OFFSET
> -        /* Disable the GIC distributor, on the boot CPU only */
> -        mov   r4, #0
> -        teq   r12, #0                /* Is this the boot CPU? */
> -        streq r4, [r0]
> -        /* Continuing ugliness: Set up the GIC so NS state owns interrupts,
> -         * The first 32 interrupts (SGIs & PPIs) must be configured on all
> -         * CPUs while the remainder are SPIs and only need to be done one, on
> -         * the boot CPU. */
> -        add   r0, r0, #0x80          /* GICD_IGROUP0 */
> -        mov   r2, #0xffffffff        /* All interrupts to group 1 */
> -        str   r2, [r0]               /* Interrupts  0-31 (SGI & PPI) */
> -        teq   r12, #0                /* Boot CPU? */
> -        bne   skip_spis              /* Don't route SPIs on secondary CPUs */
> -
> -        add   r4, r1, #GIC_DR_OFFSET
> -        ldr   r4, [r4, #4]            /* r4 := Interrupt Controller Type Reg 
> */
> -        and   r4, r4, #GICD_TYPE_LINES /* r4 := number of SPIs */
> -1:      teq   r4, #0
> -        beq   skip_spis
> -        add   r0, r0, #4             /* Go to the new group */
> -        str   r2, [r0]               /* Update the group */
> -        sub  r4, r4, #1
> -        b     1b
> -skip_spis:
> -        /* Disable the GIC CPU interface on all processors */
> -        add   r0, r1, #GIC_CR_OFFSET
> -        mov   r1, #0
> -        str   r1, [r0]
> -        /* Must drop priority mask below 0x80 before entering NS state */
> -        ldr   r1, =0xff
> -        str   r1, [r0, #0x4]         /* -> GICC_PMR */
> -        /* Reset a few config registers */
> -        mov   r0, #0
> -        mcr   CP32(r0, FCSEIDR)
> -        mcr   CP32(r0, CONTEXTIDR)
> -
> -        mrs   r0, cpsr               /* Copy the CPSR */
> -        add   r0, r0, #0x4           /* 0x16 (Monitor) -> 0x1a (Hyp) */
> -        msr   spsr_cxsf, r0          /* into the SPSR */
> -        movs  pc, r3                 /* Exception-return into Hyp mode */
> -
> -/*
> - * Local variables:
> - * mode: ASM
> - * indent-tabs-mode: nil
> - * End:
> - */
> diff --git a/xen/arch/arm/arm32/smpboot.c b/xen/arch/arm/arm32/smpboot.c
> new file mode 100644
> index 0000000..5eb1028
> --- /dev/null
> +++ b/xen/arch/arm/arm32/smpboot.c
> @@ -0,0 +1,24 @@
> +#include <xen/device_tree.h>
> +#include <xen/init.h>
> +#include <xen/smp.h>
> +#include <asm/platform.h>
> +
> +void __init arch_cpu_init(int cpu, struct dt_device_node *dn)
> +{
> +    /* TODO: look for compatible = arm,psci etc and initialise psci */
> +    platform_cpu_init(cpu);
> +}
> +
> +int __init arch_cpu_up(int cpu)
> +{
> +    return platform_cpu_up(cpu);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
> index e06a0a9..5d28bad 100644
> --- a/xen/arch/arm/arm64/Makefile
> +++ b/xen/arch/arm/arm64/Makefile
> @@ -1,10 +1,10 @@
>  subdir-y += lib
>  
>  obj-y += entry.o
> -obj-y += mode_switch.o
>  
>  obj-y += traps.o
>  obj-y += domain.o
>  obj-y += vfp.o
> +obj-y += smpboot.o
>  
>  obj-$(EARLY_PRINTK) += debug.o
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 21b7e4d..6406562 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -33,6 +33,41 @@
>  #include EARLY_PRINTK_INC
>  #endif
>  
> +/*
> + * Common register usage in this file:
> + *  x0  -
> + *  x1  -
> + *  x2  -
> + *  x3  -
> + *  x4  -
> + *  x5  -
> + *  x6  -
> + *  x7  -
> + *  x8  -
> + *  x9  -
> + *  x10 -
> + *  x11 -
> + *  x12 -
> + *  x13 -
> + *  x14 -
> + *  x15 -
> + *  x16 -
> + *  x17 -
> + *  x18 -
> + *  x19 - paddr(start)
> + *  x20 - phys offset
> + *  x21 - DTB address (boot cpu only)
> + *  x22 - !!is_boot_cpu
> + *  x23 - UART address
> + *  x24 - cpuid
> + *  x25 -
> + *  x26 -
> + *  x27 -
> + *  x28 -
> + *  x29 -
> + *  x30 - lr
> + */
> +
>  /* Macro to print a string to the UART, if there is one.
>   * Clobbers x0-x3. */
>  #ifdef EARLY_PRINTK
> @@ -91,36 +126,36 @@ real_start:
>          add   x21, x21, x20          /* x21 := paddr(DTB) */
>  #endif
>  
> -        /* Are we the boot CPU? */
> -        mov   x22, #0                /* x22 := CPU ID */
> +        mov   x22, #0                /* x22 := !!is_boot_cpu */
> +
> +        b     common_start
> +
> +GLOBAL(init_secondary)
> +        msr   DAIFSet, 0xf           /* Disable all interrupts */
> +
> +        /* Find out where we are */
> +        ldr   x0, =start
> +        adr   x19, start             /* x19 := paddr (start) */
> +        sub   x20, x19, x0           /* x20 := phys-offset */
> +
> +        mov   x22, #1                /* x22 := !!is_boot_cpu */
> +
> +common_start:
> +        mov   x24, #0                /* x24 := CPU ID */
>          mrs   x0, mpidr_el1
> -        tbz   x0, 31, boot_cpu       /* Multiprocessor extension supported? 
> */
> -        tbnz  x0, 30, boot_cpu       /* Uniprocessor system? */
> +        tbz   x0, 31, 1f             /* Multiprocessor extension not 
> supported? */
> +        tbnz  x0, 30, 1f             /* Uniprocessor system? */
>  
>          mov   x13, #(0xff << 24)
> -        bics  x22, x0, x13           /* Mask out flags to get CPU ID */
> -        b.eq  boot_cpu               /* If we're CPU 0, boot now */
> -
> -        /* Non-boot CPUs wait here to be woken up one at a time. */
> -1:      dsb   sy
> -        ldr   x0, =smp_up_cpu        /* VA of gate */
> -        add   x0, x0, x20            /* PA of gate */
> -        ldr   x1, [x0]               /* Which CPU is being booted? */
> -        cmp   x1, x22                /* Is it us? */
> -        b.eq  2f
> -        wfe
> -        b     1b
> -2:
> +        bic   x24, x0, x13           /* Mask out flags to get CPU ID */
> +1:
>  
> -boot_cpu:
>  #ifdef EARLY_PRINTK
>          ldr   x23, =EARLY_UART_BASE_ADDRESS /* x23 := UART base address */
>          cbnz  x22, 1f
> -#ifdef EARLY_PRINTK_INIT_UART
> -        bl    init_uart                 /* CPU 0 sets up the UART too */
> -#endif
> +        bl    init_uart                 /* Boot CPU sets up the UART too */
>  1:      PRINT("- CPU ")
> -        mov   x0, x22
> +        mov   x0, x24
>          bl    putn
>          PRINT(" booting -\r\n")
>  #endif
> @@ -130,30 +165,18 @@ boot_cpu:
>          bl    putn
>          PRINT(" -\r\n")
>  
> -        /* Are we in EL3 */
> -        mrs   x0, CurrentEL
> -        cmp   x0, #PSR_MODE_EL3t
> -        ccmp  x0, #PSR_MODE_EL3h, #0x4, ne
> -        b.eq  1f /* Yes */
> -
>          /* Are we in EL2 */
>          cmp   x0, #PSR_MODE_EL2t
> -        ccmp  x0, #PSR_MODE_EL2h, #0x4, ne
> -        b.eq  2f /* Yes */
> -
> -        /* Otherwise, it must have been EL0 or EL1 */
> -        PRINT("- CPU is not in EL3 or EL2 -\r\n")
> -        b     fail
> +        ccmp  x0, #PSR_MODE_EL2h, #0x4, eq
> +        b.eq  el2 /* Yes */
>  
> -1:      PRINT("- Started in EL3 -\r\n- Entering EL2 -\r\n")
> -        ldr   x1, =enter_el2_mode    /* VA of function */
> -        add   x1, x1, x20            /* PA of function */
> -        adr   x30, el2               /* Set return address for call */
> -        br    x1                     /* Call function */
> +        /* OK, we're boned. */
> +        PRINT("- Xen must be entered in NS EL2 mode -\r\n" \
> +              "- Please update the bootloader -\r\n")
> +        b fail
>  
> -2:      PRINT("- Started in EL2 mode -\r\n")
> +el2:    PRINT("- Xen starting at EL2 -\r\n")
>  
> -el2:
>          /* Zero BSS On the boot CPU to avoid nasty surprises */
>          cbnz  x22, skip_bss
>  
> @@ -168,9 +191,10 @@ el2:
>          b.lo  1b
>  
>  skip_bss:
> -
>          PRINT("- Setting up control registers -\r\n")
>  
> +        /* XXXX call PROCINFO_cpu_init here */
> +
>          /* Set up memory attribute type tables */
>          ldr   x0, =MAIRVAL
>          msr   mair_el2, x0
> @@ -184,7 +208,7 @@ skip_bss:
>          ldr   x0, =0x80802500
>          msr   tcr_el2, x0
>  
> -        /* Set up the HSCTLR:
> +        /* Set up the SCTLR_EL2:
>           * Exceptions in LE ARM,
>           * Low-latency IRQs disabled,
>           * Write-implies-XN disabled (for now),
> @@ -195,23 +219,11 @@ skip_bss:
>          ldr   x0, =(HSCTLR_BASE|SCTLR_A)
>          msr   SCTLR_EL2, x0
>  
> -        /* Write Xen's PT's paddr into the HTTBR */
> -        ldr   x4, =boot_pgtable
> -        add   x4, x4, x20            /* x4 := paddr (xen_pagetable) */
> -        msr   TTBR0_EL2, x4
> -
> -        /* Non-boot CPUs don't need to rebuild the pagetable */
> -        cbnz  x22, pt_ready
> -
> -        ldr   x1, =boot_first
> -        add   x1, x1, x20            /* x1 := paddr (xen_first) */
> -        mov   x3, #PT_PT             /* x2 := table map of xen_first */
> -        orr   x2, x1, x3             /* (+ rights for linear PT) */
> -        str   x2, [x4, #0]           /* Map it in slot 0 */
> -
> -        mov   x4, x1                 /* Next level into xen_first */
> +        /* console fixmap */
> +#if defined(EARLY_PRINTK)
> +        /* Non-boot CPUs don't need to rebuild the fixmap */
> +        cbnz  x22, 1f
>  
> -       /* console fixmap */
>          ldr   x1, =xen_fixmap
>          add   x1, x1, x20            /* x1 := paddr (xen_fixmap) */
>          lsr   x2, x23, #12
> @@ -219,45 +231,98 @@ skip_bss:
>          mov   x3, #PT_DEV_L3
>          orr   x2, x2, x3             /* x2 := 4K dev map including UART */
>          str   x2, [x1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's 
> slot */
> +1:
> +#endif
> +
> +        /*
> +         * Rebuild the boot pagetable's first-level entries. The structure
> +         * is described in mm.c.
> +         *
> +         * After the CPU enables paging it will add the fixmap mapping
> +         * to these page tables, however this may clash with the 1:1
> +         * mapping. So each CPU must rebuild the page tables here with
> +         * the 1:1 in place.
> +         */
> +
> +        /* Write Xen's PT's paddr into TTBR0_EL2 */
> +        ldr   x4, =boot_pgtable
> +        add   x4, x4, x20            /* x4 := paddr (boot_pagetable) */
> +        msr   TTBR0_EL2, x4
>  
> -        /* Build the baseline idle pagetable's first-level entries */
> -        ldr   x1, =xen_second
> -        add   x1, x1, x20            /* x1 := paddr (xen_second) */
> -        mov   x3, #PT_PT             /* x2 := table map of xen_second */
> -        orr   x2, x1, x3             /* (+ rights for linear PT) */
> +        /* Setup boot_pgtable: */
> +        ldr   x1, =boot_first
> +        add   x1, x1, x20            /* x1 := paddr (boot_first) */
> +
> +        /* ... map boot_first in boot_pgtable[0] */
> +        mov   x3, #PT_PT             /* x2 := table map of boot_first */
> +        orr   x2, x1, x3             /*       + rights for linear PT */
>          str   x2, [x4, #0]           /* Map it in slot 0 */
> -        add   x2, x2, #0x1000
> -        str   x2, [x4, #8]           /* Map 2nd page in slot 1 */
> -        add   x2, x2, #0x1000
> -        str   x2, [x4, #16]          /* Map 3rd page in slot 2 */
> -        add   x2, x2, #0x1000
> -        str   x2, [x4, #24]          /* Map 4th page in slot 3 */
> -
> -        /* Now set up the second-level entries */
> -        mov   x3, #PT_MEM
> -        orr   x2, x19, x3            /* x2 := 2MB normal map of Xen */
> -        orr   x4, xzr, x19, lsr #18
> -        str   x2, [x1, x4]           /* Map Xen there */
> -        ldr   x4, =start
> -        lsr   x4, x4, #18            /* Slot for vaddr(start) */
> -        str   x2, [x1, x4]           /* Map Xen there too */
>  
> -        /* xen_fixmap pagetable */
> -        ldr   x2, =xen_fixmap
> -        add   x2, x2, x20            /* x2 := paddr (xen_fixmap) */
> -        mov   x3, #PT_PT
> -        orr   x2, x2, x3             /* x2 := table map of xen_fixmap */
> -        add   x4, x4, #8
> -        str   x2, [x1, x4]           /* Map it in the fixmap's slot */
> +        /* ... map of paddr(start) in boot_pgtable */
> +        lsr   x1, x19, #39           /* Offset of base paddr in boot_pgtable 
> */
> +        cbz   x1, 1f                 /* It's in slot 0, map in boot_first
> +                                      * or boot_second later on */
>  
> -        lsr   x2, x21, #21
> -        lsl   x2, x2, #21            /* 2MB-aligned paddr of DTB */
> -        mov   x3, #PT_MEM            /* x2 := 2MB RAM incl. DTB */
> +        lsl   x2, x1, #39            /* Base address for 512GB mapping */
> +        mov   x3, #PT_MEM            /* x2 := Section mapping */
> +        orr   x2, x2, x3
> +        lsl   x1, x1, #3             /* x1 := Slot offset */
> +        str   x2, [x4, x1]           /* Mapping of paddr(start)*/
> +
> +1:      /* Setup boot_first: */
> +        ldr   x4, =boot_first        /* Next level into boot_first */
> +        add   x4, x4, x20            /* x4 := paddr(boot_first) */
> +
> +        /* ... map boot_second in boot_first[0] */
> +        ldr   x1, =boot_second
> +        add   x1, x1, x20            /* x1 := paddr(boot_second) */
> +        mov   x3, #PT_PT             /* x2 := table map of boot_first */
> +        orr   x2, x1, x3             /*       + rights for linear PT */
> +        str   x2, [x4, #0]           /* Map it in slot 0 */
> +
> +        /* ... map of paddr(start) in boot_first */
> +        lsr   x2, x19, #30           /* x2 := Offset of base paddr in 
> boot_first */
> +        and   x1, x2, 0x1ff          /* x1 := Slot to use */
> +        cbz   x1, 1f                 /* It's in slot 0, map in boot_second */
> +
> +        lsl   x2, x2, #30            /* Base address for 1GB mapping */
> +        mov   x3, #PT_MEM            /* x2 := Section map */
>          orr   x2, x2, x3
> -        add   x4, x4, #8
> -        str   x2, [x1, x4]           /* Map it in the early boot slot */
> +        lsl   x1, x1, #3             /* x1 := Slot offset */
> +        str   x2, [x4, x1]           /* Create mapping of paddr(start)*/
> +
> +1:      /* Setup boot_second: */
> +        ldr   x4, =boot_second
> +        add   x4, x4, x20            /* x4 := paddr (boot_second) */
> +
> +        lsr   x2, x19, #20           /* Base address for 2MB mapping */
> +        lsl   x2, x2, #20
> +        mov   x3, #PT_MEM            /* x2 := Section map */
> +        orr   x2, x2, x3
> +
> +        /* ... map of vaddr(start) in boot_second */
> +        ldr   x1, =start
> +        lsr   x1, x1, #18            /* Slot for vaddr(start) */
> +        str   x2, [x4, x1]           /* Map vaddr(start) */
> +
> +        /* ... map of paddr(start) in boot_second */
> +        mov   x3, #PT_PT             /* x2 := table map of boot_second */
> +        orr   x2, x1, x3             /*       + rights for linear PT */
> +
> +        lsr   x1, x19, #30           /* Base paddr */
> +        cbnz  x1, 1f                 /* If paddr(start) is not in slot 0
> +                                      * then the mapping was done in
> +                                      * boot_pgtable or boot_first above */
> +
> +        lsr   x1, x19, #18           /* Slot for paddr(start) */
> +        str   x2, [x4, x1]           /* Map Xen there */
> +
> +        /* Defer fixmap and dtb mapping until after paging enabled, to
> +         * avoid them clashing with the 1:1 mapping.
> +         */
> +
> +1:      /* boot pagetable setup complete */
>  
> -pt_ready:
>          PRINT("- Turning on paging -\r\n")
>  
>          ldr   x1, =paging            /* Explicit vaddr, not RIP-relative */
> @@ -270,17 +335,44 @@ pt_ready:
>          br    x1                     /* Get a proper vaddr into PC */
>  paging:
>  
> +        /* Now we can install the fixmap and dtb mappings, since we
> +         * don't need the 1:1 map any more */
> +        dsb   sy
> +        ldr   x4, =boot_second
> +#if defined(EARLY_PRINTK)
> +        /* xen_fixmap pagetable */
> +        ldr   x2, =xen_fixmap
> +        add   x2, x2, x20            /* x2 := paddr (xen_fixmap) */
> +        mov   x3, #PT_PT
> +        orr   x2, x2, x3             /* x2 := table map of xen_fixmap */
> +        ldr   x1, =FIXMAP_ADDR(0)
> +        lsr   x1, x1, #18            /* x1 := Slot for FIXMAP(0) */
> +        str   x2, [x4, x1]           /* Map it in the fixmap's slot */
> +
>          /* Use a virtual address to access the UART. */
>          ldr   x23, =FIXMAP_ADDR(FIXMAP_CONSOLE)
> +#endif
>  
> -        PRINT("- Ready -\r\n")
> +        /* Map the DTB in the boot misc slot */
> +        cbnz  x22, 1f                /* Only on boot CPU */
> +
> +        lsr   x2, x21, #21
> +        lsl   x2, x2, #21            /* x2 := 2MB-aligned paddr of DTB */
> +        mov   x3, #PT_MEM            /* x2 := 2MB RAM incl. DTB */
> +        orr   x2, x2, x3
> +        ldr   x1, =BOOT_MISC_VIRT_START
> +        lsr   x1, x1, #18            /* x4 := Slot for BOOT_MISC_VIRT_START 
> */
> +        str   x2, [x4, x1]           /* Map it in the early boot slot */
> +        dsb   sy
> +
> +1:      PRINT("- Ready -\r\n")
>  
>          /* The boot CPU should go straight into C now */
>          cbz   x22, launch
>  
> -        /* Non-boot CPUs need to move on to the relocated pagetables */
> -        ldr   x4, =boot_ttbr         /* VA of TTBR0_EL2 stashed by CPU 0 */
> -        add   x4, x4, x20            /* PA of it */
> +        /* Non-boot CPUs need to move on to the relocated pagetables,
> +           temporarily use cpu0's table and switch */
> +        ldr   x4, =init_ttbr         /* VA of TTBR0_EL2 stashed by CPU 0 */
>          ldr   x4, [x4]               /* Actual value */
>          dsb   sy
>          msr   TTBR0_EL2, x4
> @@ -300,18 +392,6 @@ paging:
>          dc    cvac, x0               /* Flush D-Cache */
>          dsb   sy
>  
> -        /* Here, the non-boot CPUs must wait again -- they're now running on
> -         * the boot CPU's pagetables so it's safe for the boot CPU to
> -         * overwrite the non-relocated copy of Xen.  Once it's done that,
> -         * and brought up the memory allocator, non-boot CPUs can get their
> -         * own stacks and enter C. */
> -1:      wfe
> -        dsb   sy
> -        ldr   x0, =smp_up_cpu
> -        ldr   x1, [x0]               /* Which CPU is being booted? */
> -        cmp   x1, x22                /* Is it us? */
> -        b.ne  1b
> -
>  launch:
>          ldr   x0, =init_stack        /* Find the boot-time stack */
>          ldr   x0, [x0]
> @@ -321,7 +401,7 @@ launch:
>  
>          mov   x0, x20                /* Marshal args: - phys_offset */
>          mov   x1, x21                /*               - FDT */
> -        mov   x2, x22                /*               - CPU ID */
> +        mov   x2, x24                /*               - CPU ID */
>          cbz   x22, start_xen         /* and disappear into the land of C */
>          b     start_secondary        /* (to the appropriate entry point) */
>  
> @@ -331,13 +411,80 @@ fail:   PRINT("- Boot failed -\r\n")
>  1:      wfe
>          b     1b
>  
> -#ifdef EARLY_PRINTK
> +/*
> + * Copy Xen to new location and switch TTBR
> + * x0    ttbr
> + * x1    source address
> + * x2    destination address
> + * x3    length
> + *
> + * Source and destination must be word aligned, length is rounded up
> + * to a 16 byte boundary.
> + *
> + * MUST BE VERY CAREFUL when saving things to RAM over the copy
> + */
> +ENTRY(relocate_xen)
> +        /* Copy 16 bytes at a time using:
> +         *   x9: counter
> +         *   x10: data
> +         *   x11: data
> +         *   x12: source
> +         *   x13: destination
> +         */
> +        mov     x9, x3
> +        mov     x12, x1
> +        mov     x13, x2
> +
> +1:      ldp     x10, x11, [x12], #16
> +        stp     x10, x11, [x13], #16
> +
> +        subs    x9, x9, #16
> +        bgt     1b
> +
> +        /* Flush destination from dcache using:
> +         * x9: counter
> +         * x10: step
> +         * x11: vaddr
> +         */
> +        dsb   sy        /* So the CPU issues all writes to the range */
> +
> +        mov   x9, x3
> +        ldr   x10, =cacheline_bytes /* x10 := step */
> +        ldr   x10, [x10]
> +        mov   x11, x2
>  
> +1:      dc    cvac, x11
> +
> +        add   x11, x11, x10
> +        subs  x9, x9, x10
> +        bgt   1b
> +
> +        dsb   sy        /* So we know the flushes happen before continuing */
> +
> +        isb       /* Ensure synchronization with previous changes to text */
> +        tlbi   alle2                 /* Flush hypervisor TLB */
> +        ic     iallu                 /* Flush I-cache */
> +        dsb    sy                    /* Ensure completion of TLB flush */
> +        isb
> +
> +        msr    TTBR0_EL2, x0
> +
> +        isb       /* Ensure synchronization with previous changes to text */
> +        tlbi   alle2                 /* Flush hypervisor TLB */
> +        ic     iallu                 /* Flush I-cache */
> +        dsb    sy                    /* Ensure completion of TLB flush */
> +        isb
> +
> +        ret
> +
> +#ifdef EARLY_PRINTK
>  /* Bring up the UART.
>   * x23: Early UART base address
>   * Clobbers x0-x1 */
>  init_uart:
> +#ifdef EARLY_PRINTK_INIT_UART
>          early_uart_init x23, 0
> +#endif
>          adr   x0, 1f
>          b     puts
>  1:      .asciz "- UART enabled -\r\n"
> diff --git a/xen/arch/arm/arm64/mode_switch.S 
> b/xen/arch/arm/arm64/mode_switch.S
> deleted file mode 100644
> index ea64f22..0000000
> --- a/xen/arch/arm/arm64/mode_switch.S
> +++ /dev/null
> @@ -1,89 +0,0 @@
> -/*
> - * xen/arch/arm/arm64/mode_switch.S
> - *
> - * Start-of day code to take a CPU from EL3 to EL2. Largely taken from
> - *       bootwrapper.
> - *
> - * Ian Campbell <ian.campbell@xxxxxxxxxx>
> - * Copyright (c) 2012 Citrix Systems.
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - */
> -
> -#include <asm/config.h>
> -#include <asm/page.h>
> -#include <asm/asm_defns.h>
> -#include <asm/platforms/vexpress.h>
> -
> -/* Get up a CPU into EL2.  Clobbers x0-x3.
> - *
> - * Expects x22 == CPU number
> - * Expects x30  == EL2 entry point
> - *
> - * This code is specific to the VE model, and not intended to be used
> - * on production systems.  As such it's a bit hackier than the main
> - * boot code in head.S.  In future it will be replaced by better
> - * integration with the bootloader/firmware so that Xen always starts
> - * at EL2.
> - */
> -GLOBAL(enter_el2_mode)
> -        mov     x0, #0x30                       // RES1
> -        orr     x0, x0, #(1 << 0)               // Non-secure EL1
> -        orr     x0, x0, #(1 << 8)               // HVC enable
> -        orr     x0, x0, #(1 << 10)              // 64-bit EL2
> -        msr     scr_el3, x0
> -
> -        msr     cptr_el3, xzr                   // Disable copro. traps to 
> EL3
> -
> -        ldr     x0, =0x01800000                 // 24Mhz
> -        msr     cntfrq_el0, x0
> -
> -        /*
> -         * Check for the primary CPU to avoid a race on the distributor
> -         * registers.
> -         */
> -        cbnz    x22, 1f
> -
> -        ldr     x1, =(V2M_GIC_BASE_ADDRESS+GIC_DR_OFFSET) // GICD_CTLR
> -        mov     w0, #3                          // EnableGrp0 | EnableGrp1
> -        str     w0, [x1]
> -
> -1:      ldr     x1, =(V2M_GIC_BASE_ADDRESS+GIC_DR_OFFSET+0x80) // 
> GICD_IGROUPR
> -        mov     w0, #~0                         // Grp1 interrupts
> -        str     w0, [x1], #4
> -        b.ne    2f                              // Only local interrupts for 
> secondary CPUs
> -        str     w0, [x1], #4
> -        str     w0, [x1], #4
> -
> -2:      ldr     x1, =(V2M_GIC_BASE_ADDRESS+GIC_CR_OFFSET) // GICC_CTLR
> -        ldr     w0, [x1]
> -        mov     w0, #3                          // EnableGrp0 | EnableGrp1
> -        str     w0, [x1]
> -
> -        mov     w0, #1 << 7                     // allow NS access to 
> GICC_PMR
> -        str     w0, [x1, #4]                    // GICC_PMR
> -
> -        msr     sctlr_el2, xzr
> -
> -        /*
> -         * Prepare the switch to the EL2_SP1 mode from EL3
> -         */
> -        msr     elr_el3, x30                    // Return to desired function
> -        mov     x1, #0x3c9                      // EL2_SP1 | D | A | I | F
> -        msr     spsr_el3, x1
> -        eret
> -
> -/*
> - * Local variables:
> - * mode: ASM
> - * indent-tabs-mode: nil
> - * End:
> - */
> diff --git a/xen/arch/arm/arm64/smpboot.c b/xen/arch/arm/arm64/smpboot.c
> new file mode 100644
> index 0000000..4d1b7f8
> --- /dev/null
> +++ b/xen/arch/arm/arm64/smpboot.c
> @@ -0,0 +1,80 @@
> +#include <xen/cpu.h>
> +#include <xen/lib.h>
> +#include <xen/init.h>
> +#include <xen/errno.h>
> +#include <xen/mm.h>
> +#include <xen/smp.h>
> +
> +struct smp_enable_ops {
> +//         const char      *name;
> +//        int             (*init_cpu)(struct dt_device_node *, int);
> +        int             (*prepare_cpu)(int);
> +};
> +
> +static paddr_t cpu_release_addr[NR_CPUS];
> +static struct smp_enable_ops smp_enable_ops[NR_CPUS];
> +
> +static int __init smp_spin_table_cpu_up(int cpu)
> +{
> +    paddr_t *release;
> +
> +    if (!cpu_release_addr[cpu])
> +    {
> +        printk("CPU%d: No release addr\n", cpu);
> +        return -ENODEV;
> +    }
> +
> +    release = __va(cpu_release_addr[cpu]);
> +
> +    release[0] = __pa(init_secondary);
> +    flush_xen_data_tlb_range_va((vaddr_t)release, sizeof(*release));
> +
> +    sev();
> +    return 0;
> +}
> +
> +static void __init smp_spin_table_init(int cpu, struct dt_device_node *dn)
> +{
> +    if ( !dt_property_read_u64(dn, "cpu-release-addr", 
> &cpu_release_addr[cpu]) )
> +    {
> +        printk("CPU%d has no cpu-release-addr\n", cpu);
> +        return;
> +    }
> +
> +    smp_enable_ops[cpu].prepare_cpu = smp_spin_table_cpu_up;
> +}
> +
> +void __init arch_cpu_init(int cpu, struct dt_device_node *dn)
> +{
> +    const char *enable_method;
> +
> +    enable_method = dt_get_property(dn, "enable-method", NULL);
> +    if (!enable_method)
> +    {
> +        printk("CPU%d has no enable method\n", cpu);
> +        return;
> +    }
> +
> +    if ( !strcmp(enable_method, "spin-table") )
> +        smp_spin_table_init(cpu, dn);
> +    /* TODO: method "psci" */
> +    else
> +        printk("CPU%d has unknown enable method \"%s\"\n", cpu, 
> enable_method);
> +}
> +
> +int __init arch_cpu_up(int cpu)
> +{
> +    if ( !smp_enable_ops[cpu].prepare_cpu )
> +        return -ENODEV;
> +
> +    return smp_enable_ops[cpu].prepare_cpu(cpu);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index cdd5cba..efac619 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -43,40 +43,70 @@
>  
>  struct domain *dom_xen, *dom_io, *dom_cow;
>  
> -/* Static start-of-day pagetables that we use before the
> - * allocators are up. These go on to become the boot CPU's real pagetables.
> +/* Static start-of-day pagetables that we use before the allocators
> + * are up. These are used by all CPUs during bringup before switching
> + * to the CPUs own pagetables.
> + *
> + * These pagetables have a very simple structure. They include:
> + *  - a 2MB mapping of xen at XEN_VIRT_START, boot_first and
> + *    boot_second are used to populate the trie down to that mapping.
> + *  - a 1:1 mapping of xen at its current physical address. This uses a
> + *    section mapping at whichever of boot_{pgtable,first,second}
> + *    covers that physical address.
> + *
> + * For the boot CPU these mappings point to the address where Xen was
> + * loaded by the bootloader. For secondary CPUs they point to the
> + * relocated copy of Xen for the benefit of secondary CPUs.
> + *
> + * In addition to the above for the boot CPU the device-tree is
> + * initially mapped in the boot misc slot. This mapping is not present
> + * for secondary CPUs.
> + *
> + * Finally, if EARLY_PRINTK is enabled then xen_fixmap will be mapped
> + * by the CPU once it has moved off the 1:1 mapping.
>   */
>  lpae_t boot_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  #ifdef CONFIG_ARM_64
>  lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> -/* The first page of the first level mapping of the xenheap. The
> - * subsequent xenheap first level pages are dynamically allocated, but
> - * we need this one to bootstrap ourselves. */
> -lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> -/* The zeroeth level slot which uses xenheap_first_first. Used because
> - * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
> - * valid for a non-xenheap mapping. */
> -static __initdata int xenheap_first_first_slot = -1;
>  #endif
> +lpae_t boot_second[LPAE_ENTRIES]  __attribute__((__aligned__(4096)));
> +
> +/* Main runtime page tables */
>  
>  /*
> - * xen_pgtable and xen_dommap are per-PCPU and are allocated before
> - * bringing up each CPU. On 64-bit a first level table is also allocated.
> + * For arm32 xen_pgtable and xen_dommap are per-PCPU and are allocated before
> + * bringing up each CPU. For arm64 xen_pgtable is common to all PCPUs.
>   *
> - * xen_second, xen_fixmap and xen_xenmap are shared between all PCPUs.
> + * xen_second, xen_fixmap and xen_xenmap are always shared between all
> + * PCPUs.
>   */
>  
>  #ifdef CONFIG_ARM_64
> -#define THIS_CPU_PGTABLE boot_pgtable
> +lpae_t xen_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +lpae_t xen_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +#define THIS_CPU_PGTABLE xen_pgtable
>  #else
>  /* Per-CPU pagetable pages */
>  /* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on 
> 32-bit) */
>  static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
>  #define THIS_CPU_PGTABLE this_cpu(xen_pgtable)
>  /* xen_dommap == pages used by map_domain_page, these pages contain
> - * the second level pagetables which mapp the domheap region
> + * the second level pagetables which map the domheap region
>   * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */
>  static DEFINE_PER_CPU(lpae_t *, xen_dommap);
> +/* Root of the trie for cpu0 */
> +lpae_t cpu0_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +#endif
> +
> +#ifdef CONFIG_ARM_64
> +/* The first page of the first level mapping of the xenheap. The
> + * subsequent xenheap first level pages are dynamically allocated, but
> + * we need this one to bootstrap ourselves. */
> +lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +/* The zeroeth level slot which uses xenheap_first_first. Used because
> + * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
> + * valid for a non-xenheap mapping. */
> +static __initdata int xenheap_first_first_slot = -1;
>  #endif
>  
>  /* Common pagetable leaves */
> @@ -104,9 +134,8 @@ lpae_t xen_fixmap[LPAE_ENTRIES] 
> __attribute__((__aligned__(4096)));
>   * as appropriate. */
>  static lpae_t xen_xenmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  
> -
>  /* Non-boot CPUs use this to find the correct pagetables. */
> -uint64_t boot_ttbr;
> +uint64_t init_ttbr;
>  
>  static paddr_t phys_offset;
>  
> @@ -131,6 +160,12 @@ static inline void 
> check_memory_layout_alignment_constraints(void) {
>      BUILD_BUG_ON(BOOT_MISC_VIRT_START & ~SECOND_MASK);
>      /* 1GB aligned regions */
>      BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK);
> +    /* Page table structure constraints */
> +#ifdef CONFIG_ARM_64
> +    BUILD_BUG_ON(zeroeth_table_offset(XEN_VIRT_START));
> +#endif
> +    BUILD_BUG_ON(first_table_offset(XEN_VIRT_START));
> +    BUILD_BUG_ON(second_linear_offset(XEN_VIRT_START) >= LPAE_ENTRIES);
>  #ifdef CONFIG_DOMAIN_PAGE
>      BUILD_BUG_ON(DOMHEAP_VIRT_START & ~FIRST_MASK);
>  #endif
> @@ -361,16 +396,6 @@ void __cpuinit setup_virt_paging(void)
>      WRITE_SYSREG32(0x80002558, VTCR_EL2); isb();
>  }
>  
> -/* This needs to be a macro to stop the compiler spilling to the stack
> - * which will change when we change pagetables */
> -#define WRITE_TTBR(ttbr)                                                \
> -    flush_xen_text_tlb();                                               \
> -    WRITE_SYSREG64(ttbr, TTBR0_EL2);                                    \
> -    dsb(); /* ensure memory accesses do not cross over the TTBR0 write */ \
> -    /* flush_xen_text_tlb contains an initial isb which ensures the     \
> -     * write to TTBR0 has completed. */                                 \
> -    flush_xen_text_tlb()
> -
>  static inline lpae_t pte_of_xenaddr(vaddr_t va)
>  {
>      paddr_t ma = va + phys_offset;
> @@ -378,69 +403,67 @@ static inline lpae_t pte_of_xenaddr(vaddr_t va)
>      return mfn_to_xen_entry(mfn);
>  }
>  
> +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
> +
>  /* Boot-time pagetable setup.
>   * Changes here may need matching changes in head.S */
>  void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t 
> xen_paddr)
>  {
> +    uint64_t ttbr;
>      unsigned long dest_va;
>      lpae_t pte, *p;
>      int i;
>  
> -    /* Map the destination in the boot misc area. */
> -    dest_va = BOOT_MISC_VIRT_START;
> -    pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT);
> -    write_pte(xen_second + second_table_offset(dest_va), pte);
> -    flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE);
> -
>      /* Calculate virt-to-phys offset for the new location */
>      phys_offset = xen_paddr - (unsigned long) _start;
>  
> -    /* Copy */
> -    memcpy((void *) dest_va, _start, _end - _start);
> -
> -    /* Beware!  Any state we modify between now and the PT switch may be
> -     * discarded when we switch over to the copy. */
> -
> -    /* Update the copy of boot_pgtable to use the new paddrs */
> -    p = (void *) boot_pgtable + dest_va - (unsigned long) _start;
>  #ifdef CONFIG_ARM_64
> -    p[0].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
> -    p = (void *) boot_first + dest_va - (unsigned long) _start;
> +    p = (void *) xen_pgtable;
> +    p[0] = pte_of_xenaddr((uintptr_t)xen_first);
> +    p[0].pt.table = 1;
> +    p[0].pt.xn = 0;
> +    p = (void *) xen_first;
> +#else
> +    p = (void *) cpu0_pgtable;
>  #endif
> -    for ( i = 0; i < 4; i++)
> -        p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
>  
> -    p = (void *) xen_second + dest_va - (unsigned long) _start;
> -    if ( boot_phys_offset != 0 )
> +    /* Initialise first level entries, to point to second level entries */
> +    for ( i = 0; i < 4; i++)
>      {
> -        /* Remove the old identity mapping of the boot paddr */
> -        vaddr_t va = (vaddr_t)_start + boot_phys_offset;
> -        p[second_linear_offset(va)].bits = 0;
> +        p[i] = pte_of_xenaddr((uintptr_t)(xen_second+i*LPAE_ENTRIES));
> +        p[i].pt.table = 1;
> +        p[i].pt.xn = 0;
>      }
> -    for ( i = 0; i < 4 * LPAE_ENTRIES; i++)
> -        if ( p[i].pt.valid )
> -            p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
>  
> -    /* Change pagetables to the copy in the relocated Xen */
> -    boot_ttbr = (uintptr_t) boot_pgtable + phys_offset;
> -    flush_xen_dcache(boot_ttbr);
> -    flush_xen_dcache_va_range((void*)dest_va, _end - _start);
> +    /* Initialise xen second level entries ... */
> +    /* ... Xen's text etc */
>  
> -    WRITE_TTBR(boot_ttbr);
> -
> -    /* Undo the temporary map */
> -    pte.bits = 0;
> -    write_pte(xen_second + second_table_offset(dest_va), pte);
> -    flush_xen_text_tlb();
> +    pte = mfn_to_xen_entry(xen_paddr>>PAGE_SHIFT);
> +    pte.pt.xn = 0;/* Contains our text mapping! */
> +    xen_second[second_table_offset(XEN_VIRT_START)] = pte;
>  
> -    /* Link in the fixmap pagetable */
> +    /* ... Fixmap */
>      pte = pte_of_xenaddr((vaddr_t)xen_fixmap);
>      pte.pt.table = 1;
> -    write_pte(xen_second + second_table_offset(FIXMAP_ADDR(0)), pte);
> -    /*
> -     * No flush required here. Individual flushes are done in
> -     * set_fixmap as entries are used.
> -     */
> +    xen_second[second_table_offset(FIXMAP_ADDR(0))] = pte;
> +
> +    /* Map the destination in the boot misc area. */
> +    dest_va = BOOT_MISC_VIRT_START;
> +    pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT);
> +    write_pte(boot_second + second_table_offset(dest_va), pte);
> +    flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE);
> +#ifdef CONFIG_ARM_64
> +    ttbr = (uintptr_t) xen_pgtable + phys_offset;
> +#else
> +    ttbr = (uintptr_t) cpu0_pgtable + phys_offset;
> +#endif
> +
> +    relocate_xen(ttbr, _start, (void*)dest_va, _end - _start);
> +
> +    /* Clear the copy of the boot pagetables. Each secondary CPU
> +     * rebuilds these itself (see head.S) */
> +    memset(boot_pgtable, 0x0, PAGE_SIZE);
> +    memset(boot_second, 0x0, PAGE_SIZE);
>  
>      /* Break up the Xen mapping into 4k pages and protect them separately. */
>      for ( i = 0; i < LPAE_ENTRIES; i++ )
> @@ -461,6 +484,7 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>          write_pte(xen_xenmap + i, pte);
>          /* No flush required here as page table is not hooked in yet. */
>      }
> +
>      pte = pte_of_xenaddr((vaddr_t)xen_xenmap);
>      pte.pt.table = 1;
>      write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte);
> @@ -472,7 +496,7 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>      flush_xen_text_tlb();
>  
>  #ifdef CONFIG_ARM_32
> -    per_cpu(xen_pgtable, 0) = boot_pgtable;
> +    per_cpu(xen_pgtable, 0) = cpu0_pgtable;
>      per_cpu(xen_dommap, 0) = xen_second +
>          second_linear_offset(DOMHEAP_VIRT_START);
>  
> @@ -482,7 +506,13 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>      flush_xen_dcache_va_range(this_cpu(xen_dommap),
>                                DOMHEAP_SECOND_PAGES*PAGE_SIZE);
>  #endif
> +
> +    /* All cpus start on boot page tables, then switch to cpu0's (both
> +     * in head.S), finally onto their own in mmu_init_secondary_cpu. */
> +    init_ttbr = (uintptr_t) THIS_CPU_PGTABLE + phys_offset;
> +    flush_xen_dcache(init_ttbr);
>  }
> +
>  #ifdef CONFIG_ARM_64
>  int init_secondary_pagetables(int cpu)
>  {
> @@ -507,7 +537,7 @@ int init_secondary_pagetables(int cpu)
>      }
>  
>      /* Initialise root pagetable from root of boot tables */
> -    memcpy(first, boot_pgtable, PAGE_SIZE);
> +    memcpy(first, cpu0_pgtable, PAGE_SIZE);
>  
>      /* Ensure the domheap has no stray mappings */
>      memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> @@ -538,7 +568,13 @@ void __cpuinit mmu_init_secondary_cpu(void)
>  
>      /* Change to this CPU's pagetables */
>      ttbr = (uintptr_t)virt_to_maddr(THIS_CPU_PGTABLE);
> -    WRITE_TTBR(ttbr);
> +
> +    flush_xen_text_tlb();
> +    WRITE_SYSREG64(ttbr, TTBR0_EL2);
> +    dsb(); /* ensure memory accesses do not cross over the TTBR0 write */
> +    /* flush_xen_text_tlb contains an initial isb which ensures the
> +     * write to TTBR0 has completed. */
> +    flush_xen_text_tlb();
>  
>      /* From now on, no mapping may be both writable and executable. */
>      WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2);
> @@ -612,7 +648,7 @@ void __init setup_xenheap_mappings(unsigned long base_mfn,
>      while ( base_mfn < end_mfn )
>      {
>          int slot = zeroeth_table_offset(vaddr);
> -        lpae_t *p = &boot_pgtable[slot];
> +        lpae_t *p = &xen_pgtable[slot];
>  
>          if ( p->pt.valid )
>          {
> @@ -679,7 +715,7 @@ void __init setup_frametable_mappings(paddr_t ps, paddr_t 
> pe)
>      {
>          pte = mfn_to_xen_entry(second_base + i);
>          pte.pt.table = 1;
> -        write_pte(&boot_first[first_table_offset(FRAMETABLE_VIRT_START)+i], 
> pte);
> +        write_pte(&xen_first[first_table_offset(FRAMETABLE_VIRT_START)+i], 
> pte);
>      }
>      create_32mb_mappings(second, 0, base_mfn, frametable_size >> PAGE_SHIFT);
>  #else
> diff --git a/xen/arch/arm/platform.c b/xen/arch/arm/platform.c
> index afda302..0060b8a 100644
> --- a/xen/arch/arm/platform.c
> +++ b/xen/arch/arm/platform.c
> @@ -105,6 +105,24 @@ int __init platform_specific_mapping(struct domain *d)
>      return res;
>  }
>  
> +#ifdef CONFIG_ARM_32
> +int platform_cpu_up(int cpu)
> +{
> +    if ( platform && platform->cpu_up )
> +        return platform->cpu_up(cpu);
> +
> +    return -EAGAIN;
> +}
> +
> +int platform_cpu_init(int cpu)
> +{
> +    if ( platform && platform->cpu_init )
> +        return platform->cpu_init(cpu);
> +
> +    return 0;
> +}
> +#endif
> +
>  void platform_reset(void)
>  {
>      if ( platform && platform->reset )
> diff --git a/xen/arch/arm/platforms/vexpress.c 
> b/xen/arch/arm/platforms/vexpress.c
> index 6f7dc2c..2d8d905 100644
> --- a/xen/arch/arm/platforms/vexpress.c
> +++ b/xen/arch/arm/platforms/vexpress.c
> @@ -21,6 +21,7 @@
>  #include <asm/platform.h>
>  #include <xen/mm.h>
>  #include <xen/vmap.h>
> +#include <asm/gic.h>
>  
>  #define DCC_SHIFT      26
>  #define FUNCTION_SHIFT 20
> @@ -119,6 +120,41 @@ static void vexpress_reset(void)
>      iounmap(sp810);
>  }
>  
> +#ifdef CONFIG_ARM_32
> +static int vexpress_cpu_up(int cpu)
> +{
> +    static int have_set_sysflags = 0;
> +
> +    /* XXX separate init hook? */
> +    if ( !have_set_sysflags )
> +    {
> +        void __iomem *sysflags;
> +
> +        sysflags = ioremap_nocache(V2M_SYS_MMIO_BASE, PAGE_SIZE);
> +        if ( !sysflags )
> +        {
> +            dprintk(XENLOG_ERR, "Unable to map vexpress MMIO\n");
> +            return -EFAULT;
> +        }
> +
> +        printk("Set SYS_FLAGS to %"PRIpaddr" (%p)\n",
> +               __pa(init_secondary), init_secondary);
> +        iowritel(sysflags + V2M_SYS_FLAGSCLR, ~0);
> +        iowritel(sysflags + V2M_SYS_FLAGSSET,
> +                 __pa(init_secondary));
> +
> +        iounmap(sysflags);
> +
> +        have_set_sysflags  = 1;
> +    }
> +
> +    printk("Waking CPU%d\n", cpu);
> +    send_SGI_mask(cpumask_of(cpu), GIC_SGI_EVENT_CHECK);
> +
> +    return 0;
> +}
> +#endif
> +
>  static const char * const vexpress_dt_compat[] __initdata =
>  {
>      "arm,vexpress",
> @@ -127,6 +163,9 @@ static const char * const vexpress_dt_compat[] __initdata 
> =
>  
>  PLATFORM_START(vexpress, "VERSATILE EXPRESS")
>      .compatible = vexpress_dt_compat,
> +#ifdef CONFIG_ARM_32
> +    .cpu_up = vexpress_cpu_up,
> +#endif
>      .reset = vexpress_reset,
>  PLATFORM_END
>  
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 94b1362..3a3c360 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -548,7 +548,6 @@ void __init start_xen(unsigned long boot_phys_offset,
>      init_xen_time();
>  
>      gic_init();
> -    make_cpus_ready(cpus, boot_phys_offset);
>  
>      set_current((struct vcpu *)0xfffff000); /* debug sanity */
>      idle_vcpu[0] = current;
> diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
> index b6aea63..40e48a1 100644
> --- a/xen/arch/arm/smpboot.c
> +++ b/xen/arch/arm/smpboot.c
> @@ -49,8 +49,7 @@ static unsigned char __initdata cpu0_boot_stack[STACK_SIZE]
>  /* Pointer to the stack, used by head.S when entering C */
>  unsigned char *init_stack = cpu0_boot_stack;
>  
> -/* Shared state for coordinating CPU bringup */
> -unsigned long smp_up_cpu = 0;
> +/* Shared state for coordinating CPU teardown */
>  static bool_t cpu_is_dead = 0;
>  
>  /* Number of non-boot CPUs ready to enter C */
> @@ -96,40 +95,20 @@ smp_get_max_cpus (void)
>      return max_cpus;
>  }
>  
> -
>  void __init
>  smp_prepare_cpus (unsigned int max_cpus)
>  {
> +    struct dt_device_node *dn = NULL;
> +    int cpu = 0;
>      cpumask_copy(&cpu_present_map, &cpu_possible_map);
>  
>      setup_cpu_sibling_map(0);
> -}
>  
> -void __init
> -make_cpus_ready(unsigned int max_cpus, unsigned long boot_phys_offset)
> -{
> -    unsigned long *gate;
> -    paddr_t gate_pa;
> -    int i;
> -
> -    printk("Waiting for %i other CPUs to be ready\n", max_cpus - 1);
> -    /* We use the unrelocated copy of smp_up_cpu as that's the one the
> -     * others can see. */ 
> -    gate_pa = ((paddr_t) (unsigned long) &smp_up_cpu) + boot_phys_offset;
> -    gate = map_domain_page(gate_pa >> PAGE_SHIFT) + (gate_pa & ~PAGE_MASK); 
> -    for ( i = 1; i < max_cpus; i++ )
> -    {
> -        /* Tell the next CPU to get ready */
> -        /* TODO: handle boards where CPUIDs are not contiguous */
> -        *gate = i;
> -        flush_xen_dcache(*gate);
> -        isb();
> -        sev();
> -        /* And wait for it to respond */
> -        while ( ready_cpus < i )
> -            smp_rmb();
> +    while ((dn = dt_find_node_by_type(dn, "cpu"))) {
> +        /* TODO: replace using code from Juliens MIDR parsing patch */
> +        arch_cpu_init(cpu++, dn);
>      }
> -    unmap_domain_page(gate);
> +
>  }
>  
>  /* Boot the current CPU */
> @@ -226,14 +205,13 @@ int __cpu_up(unsigned int cpu)
>      /* Tell the remote CPU which stack to boot on. */
>      init_stack = idle_vcpu[cpu]->arch.stack;
>  
> -    /* Unblock the CPU.  It should be waiting in the loop in head.S
> -     * for an event to arrive when smp_up_cpu matches its cpuid. */
> -    smp_up_cpu = cpu;
> -    /* we need to make sure that the change to smp_up_cpu is visible to
> -     * secondary cpus with D-cache off */
> -    flush_xen_dcache(smp_up_cpu);
> -    isb();
> -    sev();
> +    rc = arch_cpu_up(cpu);
> +
> +    if ( rc < 0 )
> +    {
> +        printk("Failed to bring up CPU%d\n", cpu);
> +        return rc;
> +    }
>  
>      while ( !cpu_online(cpu) )
>      {
> @@ -262,7 +240,6 @@ void __cpu_die(unsigned int cpu)
>      mb();
>  }
>  
> -
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/asm-arm/platform.h b/xen/include/asm-arm/platform.h
> index f460e9c..f616807 100644
> --- a/xen/include/asm-arm/platform.h
> +++ b/xen/include/asm-arm/platform.h
> @@ -14,6 +14,11 @@ struct platform_desc {
>      /* Platform initialization */
>      int (*init)(void);
>      int (*init_time)(void);
> +#ifdef CONFIG_ARM_32
> +    /* SMP */
> +    int (*cpu_init)(int cpu);

I don't think a cpu_init callback is usefull. An smp_init callback would
be better.

This will allow you to move the sys_flags check for the versatile
express in smp_init.

> +    int (*cpu_up)(int cpu);
> +#endif
>      /* Specific mapping for dom0 */
>      int (*specific_mapping)(struct domain *d);
>      /* Platform reset */
> @@ -37,6 +42,10 @@ struct platform_desc {
>  int __init platform_init(void);
>  int __init platform_init_time(void);
>  int __init platform_specific_mapping(struct domain *d);
> +#ifdef CONFIG_ARM_32
> +int platform_cpu_init(int cpu);
> +int platform_cpu_up(int cpu);
> +#endif
>  void platform_reset(void);
>  void platform_poweroff(void);
>  bool_t platform_has_quirk(uint32_t quirk);
> diff --git a/xen/include/asm-arm/platforms/exynos5.h 
> b/xen/include/asm-arm/platforms/exynos5.h
> index ee5bdfa..af30608 100644
> --- a/xen/include/asm-arm/platforms/exynos5.h
> +++ b/xen/include/asm-arm/platforms/exynos5.h
> @@ -14,20 +14,6 @@
>  
>  #define S5P_PA_SYSRAM   0x02020000
>  
> -/* Constants below is only used in assembly because the DTS is not yet 
> parsed */
> -#ifdef __ASSEMBLY__
> -
> -/* GIC Base Address */
> -#define EXYNOS5_GIC_BASE_ADDRESS    0x10480000
> -
> -/* Timer's frequency */
> -#define EXYNOS5_TIMER_FREQUENCY     (24 * 1000 * 1000) /* 24 MHz */
> -
> -/* Arndale machine ID */
> -#define MACH_TYPE_SMDK5250          3774
> -
> -#endif /* __ASSEMBLY__ */
> -
>  #endif /* __ASM_ARM_PLATFORMS_EXYNOS5_H */
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/platforms/vexpress.h 
> b/xen/include/asm-arm/platforms/vexpress.h
> index 982a293..5cf3aba 100644
> --- a/xen/include/asm-arm/platforms/vexpress.h
> +++ b/xen/include/asm-arm/platforms/vexpress.h
> @@ -32,17 +32,6 @@
>  int vexpress_syscfg(int write, int function, int device, uint32_t *data);
>  #endif
>  
> -/* Constants below is only used in assembly because the DTS is not yet 
> parsed */
> -#ifdef __ASSEMBLY__
> -
> -/* GIC base address */
> -#define V2M_GIC_BASE_ADDRESS        0x2c000000
> -
> -/* Timer's frequency */
> -#define V2M_TIMER_FREQUENCY         0x5f5e100 /* 100 Mhz */
> -
> -#endif /* __ASSEMBLY__ */
> -
>  #endif /* __ASM_ARM_PLATFORMS_VEXPRESS_H */
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/smp.h b/xen/include/asm-arm/smp.h
> index 1c2746b..d57a088 100644
> --- a/xen/include/asm-arm/smp.h
> +++ b/xen/include/asm-arm/smp.h
> @@ -4,6 +4,7 @@
>  #ifndef __ASSEMBLY__
>  #include <xen/config.h>
>  #include <xen/cpumask.h>
> +#include <xen/device_tree.h>
>  #include <asm/current.h>
>  #endif
>  
> @@ -16,15 +17,16 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
>  
>  extern void stop_cpu(void);
>  
> -/* Bring the non-boot CPUs up to paging and ready to enter C.  
> - * Must be called after Xen is relocated but before the original copy of
> - * .text gets overwritten. */
> -extern void
> -make_cpus_ready(unsigned int max_cpus, unsigned long boot_phys_offset);
> +extern void arch_cpu_init(int cpu, struct dt_device_node *dn);
> +extern int arch_cpu_up(int cpu);
> +
> +/* Secondary CPU entry point */
> +extern void init_secondary(void);
>  
>  extern void smp_clear_cpu_maps (void);
>  extern int smp_get_max_cpus (void);
>  #endif
> +
>  /*
>   * Local variables:
>   * mode: C
> 


-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.