[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [PATCH 03/40] arm64: add the boot code
On Fri, Nov 03, 2017 at 04:42:05PM +0000, Julien Grall wrote: > > + .macro mem_clear, addr, len > > + adr x0, \addr > > + add x1, x0, \len > > +1: stp xzr, xzr, [x0], #16 > > + stp xzr, xzr, [x0], #16 > > + stp xzr, xzr, [x0], #16 > > + stp xzr, xzr, [x0], #16 > > + cmp x0, x1 > > + b.lo 1b > > + .endm > > + > > + .data > > If you move all those information in bss you avoid to worry about the > initialization later on. A good idea. I will do it in the next version. > > This is also assuming that clearing the memory is done much earlier than > doing it in C. In any case, clearing BSS much earlier would also avoid > potential screw up if a global is been used in the code. > > > + .globl _boot_stack > > + .globl boot_l1_pgtable, boot_l2_pgtable, boot_l2_pgtable1 > > + .globl idmap_pgtable > > Please put the .globl before each symbols. okay. > > > + > > + .align 12 > > +boot_l1_pgtable: > > + .space PAGE_SIZE > > +boot_l2_pgtable: > > + .space PAGE_SIZE > > +boot_l2_pgtable1: > > + .space PAGE_SIZE > > +idmap_pgtable: > > + .space PAGE_SIZE > > + > > + .align 12 > > +_boot_stack: > > + .space (4 * PAGE_SIZE) > > I thought here was a define about the size of the stack? Yes. we can add a macro for the stack size. > > > +stack_end: > > + > > +/* > > + * Kernel startup entry point. > > + * > > + * Please refer to kernel file Documentation/arm64/booting.txt > > Mini-OS is a kernel. So which kernel is it? Fix it in next version. > > > + * for the header format. > > + */ > > + .text > > + > > + b _start /* branch to kernel start, magic */ > > + .long 0 /* reserved */ > > + .quad 0x0 /* Image load offset from start of RAM > > */ > > + .quad _end - _start /* Effective Image size */ > > + .quad 2 /* kernel flages: LE, 4K page size */ > > s/flages/flags/ okay. > > Also do we really care to have the kernel placed as close as possible to the > base of the DRAM? I have no idea about this.... > > > + .quad 0 /* reserved */ > > + .quad 0 /* reserved */ > > + .quad 0 /* reserved */ > > + .byte 0x41 /* Magic number, "ARM\x64" */ > > + .byte 0x52 > > + .byte 0x4d > > + .byte 0x64 > > + .long 0 /* reserved */ > > + > > +/* > > + * Get the phys-offset, and save it in x22 > > + */ > > +ENTRY(_calc_offset) > > ENTRY will export the symbol. Can you explain why you need to do that? The ENTRY is not needed. I will change it. > > > + ldr x22, =_start /* x0 := vaddr(_start) */ > > + adr x21, _start /* x21 := paddr(_start) */ > > + sub x22, x22, x21 /* x22 := phys-offset (vaddr - paddr) > > */ > > Now, I understand why you switch from + to - in patch #5. Arm32 is > calculating the offset using paddr - vaddr. Please stay consistent. > > I believe the negative offset will badly screw arm32 so it might be worth to > stick to paddr - vaddr. I will spend some time to think about this issue... > > > + ret > > +ENDPROC(_calc_offset) > > + > > +/* > > + * Setup the memory region attribute; > > + * Setup the TCR. > > + */ > > +ENTRY(_setup_cpu) > > Ditto for exporting the symbol. okay. > > > + /* > > + * Setup memory attribute type tables > > + * > > + * Memory region attributes for LPAE: > > + * > > + * n = AttrIndx[2:0] > > + * n MAIR > > + * DEVICE_nGnRnE 000 00000000 (0x00) > > + * DEVICE_nGnRE 001 00000100 (0x04) > > + * DEVICE_GRE 010 00001100 (0x0c) > > + * NORMAL_NC 011 01000100 (0x44) > > + * NORMAL 100 11111111 (0xff) > > + */ > > + ldr x0, =(SET_MAIR(0x00, MEM_DEVICE_nGnRnE) | \ > > + SET_MAIR(0x04, MEM_DEVICE_nGnRE) | \ > > + SET_MAIR(0x0c, MEM_DEVICE_GRE) | \ > > + SET_MAIR(0x44, MEM_NORMAL_NC) | \ > > + SET_MAIR(0xff, MEM_NORMAL)) > > + msr mair_el1, x0 > > + > > + /* > > + * Setup translation control register (TCR) > > + */ > > + ldr x0, =(TCR_TxSZ(VA_BITS) | TCR_ASID16 | TCR_TG1_4K | TCR_FLAGS > > ) > > Please remove one space after TCR_TG1_4K and before ). > > Also, can you explain why you define some flags in TCR_FLAGS and other > directly here? Emm, it really looks strange. I can remove it in next version. > > > + msr tcr_el1, x0 > > + > > + ret > > +ENDPROC(_setup_cpu) > > + > > + > > +/* > > + * Setup the mapping for code section and device tree > > + * > > + * => x20 = device tree address > > + * <= x4 -> for TTBR1_EL1 > > + */ > > +ENTRY(_setup_initial_pgtable) > > Ditto for the exporting the symbol. okay.. > > > + /* Clear page tables */ > > + mem_clear boot_l1_pgtable,#PAGE_SIZE > > space before and after , > > > + mem_clear boot_l2_pgtable,#PAGE_SIZE > > Ditto > > > + mem_clear boot_l2_pgtable1,#PAGE_SIZE > > Ditto okay Ditto. > > > + > > + adr x4, boot_l1_pgtable /* x4 := paddr (boot_l1_pgtable) */ > > + adr x5, boot_l2_pgtable /* x5 := paddr (boot_l2_pgtable) */ > > + > > + /* Find the size of the kernel */ > > + ldr x0, =_text /* x0 := vaddr(_text) */ > > + ldr x1, =_end /* x1 := vaddr(_end) */ > > + sub x2, x1, x0 > > + /* Get the number of l2 pages to allocate, rounded down */ > > + lsr x2, x2, #L2_SHIFT > > + /* Add 2 MiB for rounding above */ > > + add x2, x2, #1 /* x2 := total number of entries */ > I believe, this algo will result to sometimes map 2MB more than necessary. > This would happen if the code is exactly 2MB. yes. but it is okay. > > > + > > + /* Find the table index */ > > + lsr x3, x0, #L2_SHIFT /* L2_SHIFT = 21 */ > > + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l2 table */ > > + > > + > > Spurious line. okay. > > > + /* Build the L2 block entries */ > > + sub x6, x0, x22 /* x6 := paddr(_text) */ > > + lsr x6, x6, #L2_SHIFT /* L2_SHIFT = 21 */ > > + mov x7, #PT_MEM > > +1: orr x7, x7, x6, lsl #L2_SHIFT /* x7 := l2 pgtbl entry content */ > > + > > + /* Store the entry */ > > + str x7, [x5, x3, lsl #3] > > + > > + /* Clear the address bits */ > > + and x7, x7, #ATTR_MASK_L > > + > > + sub x2, x2, #1 > > + add x3, x3, #1 > > + add x6, x6, #1 > > + cbnz x2, 1b > > + > > + /* Link the l1 -> l2 table */ > > + /* Find the table index */ > > + lsr x3, x0, #L1_SHIFT > > + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l1 table */ > > + > > + /* Build the L1 page table entry */ > > + ldr x7, =PT_PT > > + lsr x9, x5, #12 > > Sometimes you use define, other plain value. Can you please stay consistent > and always use define? I will change it in next version. > > > + orr x7, x7, x9, lsl #12 > > + > > + /* Store the L1 entry */ > > + str x7, [x4, x3, lsl #3] > > + > > + /* Start to map the Device-Tree */ > > Per the kernel protocol: > > "The device tree blob (dtb) must be placed on an 8-byte boundary and must > not exceed 2 megabytes in size. Since the dtb will be mapped cacheable > using blocks of up to 2 megabytes in size, it must not be placed within > any 2M region which must be mapped with any specific attributes. > > NOTE: versions prior to v4.2 also require that the DTB be placed within > the 512 MB region starting at text_offset bytes below the kernel Image." > > So the DTB may span over two 2MB regions. > > But do you really need to map the DT from assembly code? Yes, we cannot allocate pages for the page tables in C language. > > > + lsr x3, x20, #L1_SHIFT > > + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l1 table */ > > + ldr x0, [x4, x3, lsl #3] > > + cbz x0, 2f > > Please document that test. okay, I will post the text above in the next version. > > > + > > + /* Setup the new l2 page table */ > > + ldr x7, =PT_PT > > + adr x6, boot_l2_pgtable1 > > + lsr x9, x6, #12 > > + orr x7, x7, x9, lsl #12 > > + > > + /* Store the L1 entry */ > > + str x7, [x4, x3, lsl #3] > > + > > + mov x5, x6 > > +2: > > + mov x8, x5 /* x8 := the l2 page table */ > > + > > + lsr x3, x20, #L2_SHIFT > > + and x3, x3, #Ln_ADDR_MASK /* x3 := index of l2 table */ > > + > > + /* Build the L2 block entries */ > > AFAICT, you only build one. So s/entries/entry/ okay. > > > + mov x6, x20 > > + lsr x6, x6, #L2_SHIFT > > + mov x7, #PT_MEM > > + orr x7, x7, x6, lsl #L2_SHIFT /* x7 := l2 pgtbl entry content */ > > + > > + /* Store the entry */ > > + str x7, [x8, x3, lsl #3] > > + > > + dsb sy > > + ret > > +ENDPROC(_setup_initial_pgtable) > > + > > +/* > > + * Setup the page table for TTBR0_EL1: > > + * Mapping the page table for the code section. > > + * We use 39bit address, and just use level 1 > > + * for the mapping (we do not use level 0, level 2 and level 3). > > + * > > + * => none > > + * <= x5 : save the page table pointer for TTBR0_EL1. > > + */ > > +ENTRY(_setup_idmap_pgtable) > > + /* Clear identity mapping page table */ > > + mem_clear idmap_pgtable,#PAGE_SIZE > > + > > + adr x5, idmap_pgtable > > + > > + /* Create the VA = PA map */ > > + adr x6, _text > > + > > + /* Find the table index */ > > + lsr x0, x6, #L1_SHIFT > > + and x0, x0, #Ln_ADDR_MASK /* x0 := index of l1 table */ > > + > > + /* Build the L1 block entry */ > > + ldr x1, =PT_MEM > > + lsr x2, x6, #L1_SHIFT > > + orr x1, x1, x2, lsl #L1_SHIFT > > + > > + /* Store the entry */ > > + str x1, [x5, x0, lsl #3] > > + > > + dsb sy > > + ret > > +ENDPROC(_setup_idmap_pgtable) > > diff --git a/arch/arm/arm64/asm.h b/arch/arm/arm64/asm.h > > new file mode 100644 > > index 0000000..3a498c4 > > --- /dev/null > > +++ b/arch/arm/arm64/asm.h > > Nothing in this header look arm64 specific. yes. I can try to move it the arch/arm folder. > > > @@ -0,0 +1,18 @@ > > +#ifndef __ASM_H__ > > +#define __ASM_H__ > > + > > +#define ALIGN .align 4 > > + > > +#define ENTRY(name) \ > > + .globl name; \ > > + ALIGN; \ > > + name: > > + > > +#define END(name) \ > > + .size name, .-name > > + > > +#define ENDPROC(name) \ > > + .type name, @function; \ > > + END(name) > > + > > +#endif /* __ASM_H__ */ > > diff --git a/include/arm/arm64/pagetable.h b/include/arm/arm64/pagetable.h > > new file mode 100644 > > index 0000000..1e3a472 > > --- /dev/null > > +++ b/include/arm/arm64/pagetable.h > > @@ -0,0 +1,106 @@ > > +#ifndef __ARM64_PAGE_TABLE__ > > + > > +#define __ARM64_PAGE_TABLE__ > > + > > +/* TCR flags */ > > +#define TCR_TxSZ(x) ((((64) - (x)) << 16) | (((64) - (x)) << 0)) > > +#define TCR_IRGN_WBWA (((1) << 8) | ((1) << 24)) > > +#define TCR_ORGN_WBWA (((1) << 10) | ((1) << 26)) > > +#define TCR_SHARED (((3) << 12) | ((3) << 28)) > > +#define TCR_ASID16 ((1) << 36) > > This is not going to work well if you use them in C code. As pretty much all > the values within this header. These flags are not used in C code, only used in assembly code. > > > +#define TCR_IPS_40BIT ((2) << 32) > > +#define TCR_TG1_4K ((2) << 30) > > + > > +#define TCR_FLAGS (TCR_IRGN_WBWA | TCR_ORGN_WBWA | TCR_SHARED | > > TCR_IPS_40BIT) > > Please add a comment explain the flags used. I will remove it. > > > + > > +/* Number of virtual address bits */ > > +#define VA_BITS 39 > > + > > +/* > > + * Memory types available. > > + */ > > +#define MEM_DEVICE_nGnRnE 0 > > +#define MEM_DEVICE_nGnRE 1 > > +#define MEM_DEVICE_GRE 2 > > +#define MEM_NORMAL_NC 3 > > +#define MEM_NORMAL 4 > > + > > +#define SET_MAIR(attr, mt) ((attr) << ((mt) * 8)) > > + > > +/* SCTLR_EL1 - System Control Register */ > > +#define SCTLR_M 0x00000001 > > +#define SCTLR_C 0x00000004 > > +#define SCTLR_I 0x00001000 > > Can you please use shift as everywhere else? okay. > > > + > > +/* Level 0 table, 512GiB per entry */ > > +#define L0_SHIFT 39 > > +#define L0_INVAL 0x0 /* An invalid address */ > > +#define L0_TABLE 0x3 /* A next-level table */ > > + > > +/* Level 1 table, 1GiB per entry */ > > +#define L1_SHIFT 30 > > +#define L1_SIZE (1 << L1_SHIFT) > > +#define L1_OFFSET (L1_SIZE - 1) > > +#define L1_INVAL L0_INVAL > > +#define L1_BLOCK 0x1 > > +#define L1_TABLE L0_TABLE > > +#define L1_MASK (~(L1_SIZE-1)) > > + > > +/* Level 2 table, 2MiB per entry */ > > +#define L2_SHIFT 21 > > +#define L2_SIZE (1 << L2_SHIFT) > > +#define L2_OFFSET (L2_SIZE - 1) > > +#define L2_INVAL L0_INVAL > > +#define L2_BLOCK L1_BLOCK > > +#define L2_TABLE L0_TABLE > > +#define L2_MASK (~(L2_SIZE-1)) > > + > > +/* Level 3 table, 4KiB per entry */ > > +#define L3_SHIFT 12 > > +#define L3_SIZE (1 << L3_SHIFT) > > +#define L3_OFFSET (L3_SIZE - 1) > > +#define L3_INVAL 0x0 > > +#define L3_PAGE 0x3 > > +#define L3_MASK (~(L3_SIZE-1)) > > + > > +#define Ln_ENTRIES (1 << 9) > > +#define Ln_ADDR_MASK (Ln_ENTRIES - 1) > > + > > +#define ATTR_MASK_L 0xfff > > + > > +#define l1_pgt_idx(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) > > +#define l2_pgt_idx(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) > > +#define l3_pgt_idx(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) > > + > > +/* > > + * Lower attributes fields in Stage 1 VMSAv8-A Block and Page descriptor > > + */ > > +#define ATTR_nG (1 << 11) > > +#define ATTR_AF (1 << 10) > > +#define ATTR_SH(x) ((x) << 8) > > +#define ATTR_SH_MASK ATTR_SH(3) > > +#define ATTR_SH_NS 0 /* Non-shareable */ > > +#define ATTR_SH_OS 2 /* Outer-shareable */ > > +#define ATTR_SH_IS 3 /* Inner-shareable */ > > +#define ATTR_AP_RW_BIT (1 << 7) > > +#define ATTR_AP(x) ((x) << 6) > > +#define ATTR_AP_MASK ATTR_AP(3) > > +#define ATTR_AP_RW (0 << 1) > > +#define ATTR_AP_RO (1 << 1) > > +#define ATTR_AP_USER (1 << 0) > > +#define ATTR_NS (1 << 5) > > +#define ATTR_IDX(x) ((x) << 2) > > +#define ATTR_IDX_MASK (7 << 2) > > + > > +#define BLOCK_DEF_ATTR > > (ATTR_AF|ATTR_SH(ATTR_SH_IS)|ATTR_IDX(MEM_NORMAL)) > > +#define BLOCK_NC_ATTR > > (ATTR_AF|ATTR_SH(ATTR_SH_IS)|ATTR_IDX(MEM_NORMAL_NC)) > > +#define BLOCK_DEV_ATTR > > (ATTR_AF|ATTR_SH(ATTR_SH_IS)|ATTR_IDX(MEM_DEVICE_nGnRnE)) > > + > > +#define PT_PT (L0_TABLE) > > +#define PT_MEM (BLOCK_DEF_ATTR | L1_BLOCK) > > + > > +#ifndef PAGE_SIZE > > This looks quite wrong. How can it be possible to define multiple PAGE_SIZE? > What if they are not the same size? okay, I will remove it. Thanks Huang Shijie _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/cgi-bin/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |