[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [PATCH 03/40] arm64: add the boot code
Hi Shijie, On 03/11/17 03:11, Huang Shijie wrote: This patch adds the boot code for arm64: 0.) add the header which contains all the macros to setup the page table 1.) init the MAIR/TCR for 39 bit virtual address. 2.) setup the page table. 3.) enable the MMU This patch refers to Chen Baozi's patch: "Initial codes for arm64" Change-Id: I5371640c3f58d2659b31a1009352fdb7b1b02833 Jira: ENTOS-247 Signed-off-by: Huang Shijie <shijie.huang@xxxxxxx> --- arch/arm/arm64/arm64.S | 307 ++++++++++++++++++++++++++++++++++++++++++ arch/arm/arm64/asm.h | 18 +++ include/arm/arm64/pagetable.h | 106 +++++++++++++++ 3 files changed, 431 insertions(+) create mode 100644 arch/arm/arm64/arm64.S create mode 100644 arch/arm/arm64/asm.h create mode 100644 include/arm/arm64/pagetable.h diff --git a/arch/arm/arm64/arm64.S b/arch/arm/arm64/arm64.S new file mode 100644 index 0000000..61c14a6 --- /dev/null +++ b/arch/arm/arm64/arm64.S @@ -0,0 +1,307 @@ +#include "asm.h" +#include <arm64/pagetable.h> +#include <xen/xen.h> + +/* This macro will use the x0/x1/x2/x16 */ +#define PRINT(_s) \ + adr x2, 97f; \ + adr x1, 98f; \ + sub x1, x1, x2; \ + mov x0, #CONSOLEIO_write; \ + mov x16, #__HYPERVISOR_console_io; \ + hvc #XEN_HYPERCALL_TAG; \ + b 99f; \ +97: .asciz _s; \ +98: ; \ + .align 2; \ +99: \ + + .macro mem_clear, addr, len + adr x0, \addr + add x1, x0, \len +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x1 + b.lo 1b + .endm + + .data If you move all those information in bss you avoid to worry about the initialization later on. This is also assuming that clearing the memory is done much earlier than doing it in C. In any case, clearing BSS much earlier would also avoid potential screw up if a global is been used in the code. + .globl _boot_stack + .globl boot_l1_pgtable, boot_l2_pgtable, boot_l2_pgtable1 + .globl idmap_pgtable Please put the .globl before each symbols. + + .align 12 +boot_l1_pgtable: + .space PAGE_SIZE +boot_l2_pgtable: + .space PAGE_SIZE +boot_l2_pgtable1: + .space PAGE_SIZE +idmap_pgtable: + .space PAGE_SIZE + + .align 12 +_boot_stack: + .space (4 * PAGE_SIZE) I thought here was a define about the size of the stack? +stack_end: + +/* + * Kernel startup entry point. + * + * Please refer to kernel file Documentation/arm64/booting.txt Mini-OS is a kernel. So which kernel is it? + * for the header format. + */ + .text + + b _start /* branch to kernel start, magic */ + .long 0 /* reserved */ + .quad 0x0 /* Image load offset from start of RAM */ + .quad _end - _start /* Effective Image size */ + .quad 2 /* kernel flages: LE, 4K page size */ s/flages/flags/Also do we really care to have the kernel placed as close as possible to the base of the DRAM? + .quad 0 /* reserved */ + .quad 0 /* reserved */ + .quad 0 /* reserved */ + .byte 0x41 /* Magic number, "ARM\x64" */ + .byte 0x52 + .byte 0x4d + .byte 0x64 + .long 0 /* reserved */ + +/* + * Primary CPU general-purpose register settings + * x0 = physical address of device tree blob (dtb) in system RAM. + * x1 = 0 (reserved for future use) + * x2 = 0 (reserved for future use) + * x3 = 0 (reserved for future use) + * + * The registers used by _start: + * x20 - FDT pointer + * x22 - offset between PA and VA + */ +ENTRY(_start) + /* Save the FDT pointer */ + mov x20, x0 + + /* Calculate where we are */ + bl _calc_offset + + PRINT("- Mini-OS booting -\n") + + PRINT("- Setup CPU -\n") + /* Setup CPU for turning on the MMU. */ + bl _setup_cpu + + PRINT("- Setup booting pagetable -\n") + /* Setup the initial page table. */ + bl _setup_initial_pgtable + + /* Setup the identity mapping */ + bl _setup_idmap_pgtable + + /* Load TTBRx */ + msr ttbr1_el1, x4 + msr ttbr0_el1, x5 + isb + + /* Turning on MMU */ + tlbi vmalle1 + dsb nsh + isb + ldr x1, =(SCTLR_M | SCTLR_C | SCTLR_I) + msr sctlr_el1, x1 + isb + + PRINT("- MMU on -\n") + ldr x0, =mmu_on + br x0 + +mmu_on: + /* Setup stack */ + PRINT("- Setup stack -\n") + mem_clear _boot_stack,#(4*PAGE_SIZE) + + ldr x1, =stack_end + mov sp, x1 + + PRINT("- Jumping to C entry -\n") + add x20, x20, x22 + mov x0, x20 /* x0 <- device tree (virtual address) */ + mov x1, x22 /* x1 <- phys_offset */ + + b arch_init +ENDPROC(_start) + +/* + * Get the phys-offset, and save it in x22 + */ +ENTRY(_calc_offset) ENTRY will export the symbol. Can you explain why you need to do that? + ldr x22, =_start /* x0 := vaddr(_start) */ + adr x21, _start /* x21 := paddr(_start) */ + sub x22, x22, x21 /* x22 := phys-offset (vaddr - paddr) */ Now, I understand why you switch from + to - in patch #5. Arm32 is calculating the offset using paddr - vaddr. Please stay consistent. I believe the negative offset will badly screw arm32 so it might be worth to stick to paddr - vaddr. + ret +ENDPROC(_calc_offset) + +/* + * Setup the memory region attribute; + * Setup the TCR. + */ +ENTRY(_setup_cpu) Ditto for exporting the symbol. + /* + * Setup memory attribute type tables + * + * Memory region attributes for LPAE: + * + * n = AttrIndx[2:0] + * n MAIR + * DEVICE_nGnRnE 000 00000000 (0x00) + * DEVICE_nGnRE 001 00000100 (0x04) + * DEVICE_GRE 010 00001100 (0x0c) + * NORMAL_NC 011 01000100 (0x44) + * NORMAL 100 11111111 (0xff) + */ + ldr x0, =(SET_MAIR(0x00, MEM_DEVICE_nGnRnE) | \ + SET_MAIR(0x04, MEM_DEVICE_nGnRE) | \ + SET_MAIR(0x0c, MEM_DEVICE_GRE) | \ + SET_MAIR(0x44, MEM_NORMAL_NC) | \ + SET_MAIR(0xff, MEM_NORMAL)) + msr mair_el1, x0 + + /* + * Setup translation control register (TCR) + */ + ldr x0, =(TCR_TxSZ(VA_BITS) | TCR_ASID16 | TCR_TG1_4K | TCR_FLAGS ) Please remove one space after TCR_TG1_4K and before ).Also, can you explain why you define some flags in TCR_FLAGS and other directly here? + msr tcr_el1, x0 + + ret +ENDPROC(_setup_cpu) + + +/* + * Setup the mapping for code section and device tree + * + * => x20 = device tree address + * <= x4 -> for TTBR1_EL1 + */ +ENTRY(_setup_initial_pgtable) Ditto for the exporting the symbol. + /* Clear page tables */ + mem_clear boot_l1_pgtable,#PAGE_SIZE space before and after , + mem_clear boot_l2_pgtable,#PAGE_SIZE Ditto + mem_clear boot_l2_pgtable1,#PAGE_SIZE Ditto I believe, this algo will result to sometimes map 2MB more than necessary. This would happen if the code is exactly 2MB.+ + adr x4, boot_l1_pgtable /* x4 := paddr (boot_l1_pgtable) */ + adr x5, boot_l2_pgtable /* x5 := paddr (boot_l2_pgtable) */ + + /* Find the size of the kernel */ + ldr x0, =_text /* x0 := vaddr(_text) */ + ldr x1, =_end /* x1 := vaddr(_end) */ + sub x2, x1, x0 + /* Get the number of l2 pages to allocate, rounded down */ + lsr x2, x2, #L2_SHIFT + /* Add 2 MiB for rounding above */ + add x2, x2, #1 /* x2 := total number of entries */ + + /* Find the table index */ + lsr x3, x0, #L2_SHIFT /* L2_SHIFT = 21 */ + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l2 table */ + + Spurious line. + /* Build the L2 block entries */ + sub x6, x0, x22 /* x6 := paddr(_text) */ + lsr x6, x6, #L2_SHIFT /* L2_SHIFT = 21 */ + mov x7, #PT_MEM +1: orr x7, x7, x6, lsl #L2_SHIFT /* x7 := l2 pgtbl entry content */ + + /* Store the entry */ + str x7, [x5, x3, lsl #3] + + /* Clear the address bits */ + and x7, x7, #ATTR_MASK_L + + sub x2, x2, #1 + add x3, x3, #1 + add x6, x6, #1 + cbnz x2, 1b + + /* Link the l1 -> l2 table */ + /* Find the table index */ + lsr x3, x0, #L1_SHIFT + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l1 table */ + + /* Build the L1 page table entry */ + ldr x7, =PT_PT + lsr x9, x5, #12 Sometimes you use define, other plain value. Can you please stay consistent and always use define? + orr x7, x7, x9, lsl #12 + + /* Store the L1 entry */ + str x7, [x4, x3, lsl #3] + + /* Start to map the Device-Tree */ Per the kernel protocol: "The device tree blob (dtb) must be placed on an 8-byte boundary and must not exceed 2 megabytes in size. Since the dtb will be mapped cacheable using blocks of up to 2 megabytes in size, it must not be placed within any 2M region which must be mapped with any specific attributes. NOTE: versions prior to v4.2 also require that the DTB be placed within the 512 MB region starting at text_offset bytes below the kernel Image." So the DTB may span over two 2MB regions. But do you really need to map the DT from assembly code? + lsr x3, x20, #L1_SHIFT + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l1 table */ + ldr x0, [x4, x3, lsl #3] + cbz x0, 2f Please document that test. + + /* Setup the new l2 page table */ + ldr x7, =PT_PT + adr x6, boot_l2_pgtable1 + lsr x9, x6, #12 + orr x7, x7, x9, lsl #12 + + /* Store the L1 entry */ + str x7, [x4, x3, lsl #3] + + mov x5, x6 +2: + mov x8, x5 /* x8 := the l2 page table */ + + lsr x3, x20, #L2_SHIFT + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l2 table */ + + /* Build the L2 block entries */ AFAICT, you only build one. So s/entries/entry/ + mov x6, x20 + lsr x6, x6, #L2_SHIFT + mov x7, #PT_MEM + orr x7, x7, x6, lsl #L2_SHIFT /* x7 := l2 pgtbl entry content */ + + /* Store the entry */ + str x7, [x8, x3, lsl #3] + + dsb sy + ret +ENDPROC(_setup_initial_pgtable) + +/* + * Setup the page table for TTBR0_EL1: + * Mapping the page table for the code section. + * We use 39bit address, and just use level 1 + * for the mapping (we do not use level 0, level 2 and level 3). + * + * => none + * <= x5 : save the page table pointer for TTBR0_EL1. + */ +ENTRY(_setup_idmap_pgtable) + /* Clear identity mapping page table */ + mem_clear idmap_pgtable,#PAGE_SIZE + + adr x5, idmap_pgtable + + /* Create the VA = PA map */ + adr x6, _text + + /* Find the table index */ + lsr x0, x6, #L1_SHIFT + and x0, x0, #Ln_ADDR_MASK /* x0 := index of l1 table */ + + /* Build the L1 block entry */ + ldr x1, =PT_MEM + lsr x2, x6, #L1_SHIFT + orr x1, x1, x2, lsl #L1_SHIFT + + /* Store the entry */ + str x1, [x5, x0, lsl #3] + + dsb sy + ret +ENDPROC(_setup_idmap_pgtable) diff --git a/arch/arm/arm64/asm.h b/arch/arm/arm64/asm.h new file mode 100644 index 0000000..3a498c4 --- /dev/null +++ b/arch/arm/arm64/asm.h Nothing in this header look arm64 specific. @@ -0,0 +1,18 @@ +#ifndef __ASM_H__ +#define __ASM_H__ + +#define ALIGN .align 4 + +#define ENTRY(name) \ + .globl name; \ + ALIGN; \ + name: + +#define END(name) \ + .size name, .-name + +#define ENDPROC(name) \ + .type name, @function; \ + END(name) + +#endif /* __ASM_H__ */ diff --git a/include/arm/arm64/pagetable.h b/include/arm/arm64/pagetable.h new file mode 100644 index 0000000..1e3a472 --- /dev/null +++ b/include/arm/arm64/pagetable.h @@ -0,0 +1,106 @@ +#ifndef __ARM64_PAGE_TABLE__ + +#define __ARM64_PAGE_TABLE__ + +/* TCR flags */ +#define TCR_TxSZ(x) ((((64) - (x)) << 16) | (((64) - (x)) << 0)) +#define TCR_IRGN_WBWA (((1) << 8) | ((1) << 24)) +#define TCR_ORGN_WBWA (((1) << 10) | ((1) << 26)) +#define TCR_SHARED (((3) << 12) | ((3) << 28)) +#define TCR_ASID16 ((1) << 36) This is not going to work well if you use them in C code. As pretty much all the values within this header. +#define TCR_IPS_40BIT ((2) << 32) +#define TCR_TG1_4K ((2) << 30) + +#define TCR_FLAGS (TCR_IRGN_WBWA | TCR_ORGN_WBWA | TCR_SHARED | TCR_IPS_40BIT) Please add a comment explain the flags used. + +/* Number of virtual address bits */ +#define VA_BITS 39 + +/* + * Memory types available. + */ +#define MEM_DEVICE_nGnRnE 0 +#define MEM_DEVICE_nGnRE 1 +#define MEM_DEVICE_GRE 2 +#define MEM_NORMAL_NC 3 +#define MEM_NORMAL 4 + +#define SET_MAIR(attr, mt) ((attr) << ((mt) * 8)) + +/* SCTLR_EL1 - System Control Register */ +#define SCTLR_M 0x00000001 +#define SCTLR_C 0x00000004 +#define SCTLR_I 0x00001000 Can you please use shift as everywhere else? + +/* Level 0 table, 512GiB per entry */ +#define L0_SHIFT 39 +#define L0_INVAL 0x0 /* An invalid address */ +#define L0_TABLE 0x3 /* A next-level table */ + +/* Level 1 table, 1GiB per entry */ +#define L1_SHIFT 30 +#define L1_SIZE (1 << L1_SHIFT) +#define L1_OFFSET (L1_SIZE - 1) +#define L1_INVAL L0_INVAL +#define L1_BLOCK 0x1 +#define L1_TABLE L0_TABLE +#define L1_MASK (~(L1_SIZE-1)) + +/* Level 2 table, 2MiB per entry */ +#define L2_SHIFT 21 +#define L2_SIZE (1 << L2_SHIFT) +#define L2_OFFSET (L2_SIZE - 1) +#define L2_INVAL L0_INVAL +#define L2_BLOCK L1_BLOCK +#define L2_TABLE L0_TABLE +#define L2_MASK (~(L2_SIZE-1)) + +/* Level 3 table, 4KiB per entry */ +#define L3_SHIFT 12 +#define L3_SIZE (1 << L3_SHIFT) +#define L3_OFFSET (L3_SIZE - 1) +#define L3_INVAL 0x0 +#define L3_PAGE 0x3 +#define L3_MASK (~(L3_SIZE-1)) + +#define Ln_ENTRIES (1 << 9) +#define Ln_ADDR_MASK (Ln_ENTRIES - 1) + +#define ATTR_MASK_L 0xfff + +#define l1_pgt_idx(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) +#define l2_pgt_idx(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) +#define l3_pgt_idx(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) + +/* + * Lower attributes fields in Stage 1 VMSAv8-A Block and Page descriptor + */ +#define ATTR_nG (1 << 11) +#define ATTR_AF (1 << 10) +#define ATTR_SH(x) ((x) << 8) +#define ATTR_SH_MASK ATTR_SH(3) +#define ATTR_SH_NS 0 /* Non-shareable */ +#define ATTR_SH_OS 2 /* Outer-shareable */ +#define ATTR_SH_IS 3 /* Inner-shareable */ +#define ATTR_AP_RW_BIT (1 << 7) +#define ATTR_AP(x) ((x) << 6) +#define ATTR_AP_MASK ATTR_AP(3) +#define ATTR_AP_RW (0 << 1) +#define ATTR_AP_RO (1 << 1) +#define ATTR_AP_USER (1 << 0) +#define ATTR_NS (1 << 5) +#define ATTR_IDX(x) ((x) << 2) +#define ATTR_IDX_MASK (7 << 2) + +#define BLOCK_DEF_ATTR (ATTR_AF|ATTR_SH(ATTR_SH_IS)|ATTR_IDX(MEM_NORMAL)) +#define BLOCK_NC_ATTR (ATTR_AF|ATTR_SH(ATTR_SH_IS)|ATTR_IDX(MEM_NORMAL_NC)) +#define BLOCK_DEV_ATTR (ATTR_AF|ATTR_SH(ATTR_SH_IS)|ATTR_IDX(MEM_DEVICE_nGnRnE)) + +#define PT_PT (L0_TABLE) +#define PT_MEM (BLOCK_DEF_ATTR | L1_BLOCK) + +#ifndef PAGE_SIZE This looks quite wrong. How can it be possible to define multiple PAGE_SIZE? What if they are not the same size? +#define PAGE_SIZE L3_SIZE +#endif + +#endif Cheers, -- Julien Grall _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/cgi-bin/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |