|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5 07/19] xen: arm: allocate dom0 memory separately from preparing the dtb
On Wed, 13 Nov 2013, Ian Campbell wrote:
> Mixing these two together is a pain, it forces us to prepare the dtb before
> processing the kernel which means we don't know whether the guest is 32- or
> 64-bit while we construct its DTB.
>
> Instead split out the memory allocation (including 1:1 workaround handling)
> and p2m setup into a separate phase and then create a memory node in the DTB
> based on the result.
>
> This allows us to move kernel parsing before DTB setup.
>
> As part of this it was also necessary to rework where the decision regarding
> the placement of the DTB and initrd in RAM was made. It is now made when
> loading the kernel, which allows it to make use of the zImage/ELF specific
> information and therefore to make decisions based on complete knowledge and do
> it right rather than guessing in prepare_dtb and relying on a later check to
> see if things worked.
>
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
It looks good
> v3: Also rework module placement, v2 broke boot because dtb_paddr wasn't set
> soon enough. This ends up cleaner anyway.
> v2: Fixed typo in the commit log
> Handle multiple memory nodes as well as individual nodes with several
> entries in them.
> Strip the original memory node and recreate rather than trying to modify.
> ---
> xen/arch/arm/domain_build.c | 203
> ++++++++++++++++++++++---------------------
> xen/arch/arm/kernel.c | 80 +++++++++++------
> xen/arch/arm/kernel.h | 2 -
> 3 files changed, 158 insertions(+), 127 deletions(-)
>
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 8645aa1..edfcf14 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -81,11 +81,8 @@ struct vcpu *__init alloc_dom0_vcpu0(void)
> return alloc_vcpu(dom0, 0, 0);
> }
>
> -static int set_memory_reg_11(struct domain *d, struct kernel_info *kinfo,
> - const struct dt_property *pp,
> - const struct dt_device_node *np, __be32
> *new_cell)
> +static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
> {
> - int reg_size = dt_cells_to_size(dt_n_addr_cells(np) +
> dt_n_size_cells(np));
> paddr_t start;
> paddr_t size;
> struct page_info *pg = NULL;
> @@ -116,53 +113,61 @@ static int set_memory_reg_11(struct domain *d, struct
> kernel_info *kinfo,
> if ( res )
> panic("Unable to add pages in DOM0: %d\n", res);
>
> - dt_set_range(&new_cell, np, start, size);
> -
> kinfo->mem.bank[0].start = start;
> kinfo->mem.bank[0].size = size;
> kinfo->mem.nr_banks = 1;
>
> - return reg_size;
> + kinfo->unassigned_mem -= size;
> }
>
> -static int set_memory_reg(struct domain *d, struct kernel_info *kinfo,
> - const struct dt_property *pp,
> - const struct dt_device_node *np, __be32 *new_cell)
> +static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
> {
> - int reg_size = dt_cells_to_size(dt_n_addr_cells(np) +
> dt_n_size_cells(np));
> - int l = 0;
> +
> + struct dt_device_node *memory = NULL;
> + const void *reg;
> + u32 reg_len, reg_size;
> unsigned int bank = 0;
> - u64 start;
> - u64 size;
> - int ret;
>
> if ( platform_has_quirk(PLATFORM_QUIRK_DOM0_MAPPING_11) )
> - return set_memory_reg_11(d, kinfo, pp, np, new_cell);
> + return allocate_memory_11(d, kinfo);
>
> - while ( kinfo->unassigned_mem > 0 && l + reg_size <= pp->length
> - && kinfo->mem.nr_banks < NR_MEM_BANKS )
> + while ( (memory = dt_find_node_by_type(memory, "memory")) )
> {
> - ret = dt_device_get_address(np, bank, &start, &size);
> - if ( ret )
> - panic("Unable to retrieve the bank %u for %s\n",
> - bank, dt_node_full_name(np));
> -
> - if ( size > kinfo->unassigned_mem )
> - size = kinfo->unassigned_mem;
> - dt_set_range(&new_cell, np, start, size);
> -
> - printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", start, start + size);
> - if ( p2m_populate_ram(d, start, start + size) < 0 )
> - panic("Failed to populate P2M\n");
> - kinfo->mem.bank[kinfo->mem.nr_banks].start = start;
> - kinfo->mem.bank[kinfo->mem.nr_banks].size = size;
> - kinfo->mem.nr_banks++;
> - kinfo->unassigned_mem -= size;
> -
> - l += reg_size;
> - }
> + int l;
> +
> + DPRINT("memory node\n");
> +
> + reg_size = dt_cells_to_size(dt_n_addr_cells(memory) +
> dt_n_size_cells(memory));
> +
> + reg = dt_get_property(memory, "reg", ®_len);
> + if ( reg == NULL )
> + panic("Memory node has no reg property!\n");
> +
> + for ( l = 0;
> + kinfo->unassigned_mem > 0 && l + reg_size <= reg_len
> + && kinfo->mem.nr_banks < NR_MEM_BANKS;
> + l += reg_size )
> + {
> + paddr_t start, size;
>
> - return l;
> + if ( dt_device_get_address(memory, bank, &start, &size) )
> + panic("Unable to retrieve the bank %u for %s\n",
> + bank, dt_node_full_name(memory));
> +
> + if ( size > kinfo->unassigned_mem )
> + size = kinfo->unassigned_mem;
> +
> + printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n",
> + start, start + size);
> + if ( p2m_populate_ram(d, start, start + size) < 0 )
> + panic("Failed to populate P2M\n");
> + kinfo->mem.bank[kinfo->mem.nr_banks].start = start;
> + kinfo->mem.bank[kinfo->mem.nr_banks].size = size;
> + kinfo->mem.nr_banks++;
> +
> + kinfo->unassigned_mem -= size;
> + }
> + }
> }
>
> static int write_properties(struct domain *d, struct kernel_info *kinfo,
> @@ -211,23 +216,6 @@ static int write_properties(struct domain *d, struct
> kernel_info *kinfo,
> continue;
> }
> }
> - /*
> - * In a memory node: adjust reg property.
> - * TODO: handle properly memory node (ie: device_type = "memory")
> - */
> - else if ( dt_node_name_is_equal(np, "memory") )
> - {
> - if ( dt_property_name_is_equal(pp, "reg") )
> - {
> - new_data = xzalloc_bytes(pp->length);
> - if ( new_data == NULL )
> - return -FDT_ERR_XEN(ENOMEM);
> -
> - prop_len = set_memory_reg(d, kinfo, pp, np,
> - (__be32 *)new_data);
> - prop_data = new_data;
> - }
> - }
>
> res = fdt_property(kinfo->fdt, pp->name, prop_data, prop_len);
>
> @@ -304,6 +292,46 @@ static int fdt_property_interrupts(void *fdt,
> gic_interrupt_t *intr,
> return res;
> }
>
> +static int make_memory_node(const struct domain *d,
> + void *fdt,
> + const struct kernel_info *kinfo)
> +{
> + int res, i;
> + int nr_cells = XEN_FDT_NODE_REG_SIZE*kinfo->mem.nr_banks;
> + __be32 reg[nr_cells];
> + __be32 *cells;
> +
> + DPRINT("Create memory node\n");
> +
> + /* ePAPR 3.4 */
> + res = fdt_begin_node(fdt, "memory");
> + if ( res )
> + return res;
> +
> + res = fdt_property_string(fdt, "device_type", "memory");
> + if ( res )
> + return res;
> +
> + cells = ®[0];
> + for ( i = 0 ; i < kinfo->mem.nr_banks; i++ )
> + {
> + u64 start = kinfo->mem.bank[i].start;
> + u64 size = kinfo->mem.bank[i].size;
> +
> + DPRINT(" Bank %d: %#"PRIx64"->%#"PRIx64"\n",
> + i, start, start + size);
> +
> + set_xen_range(&cells, start, size);
> + }
> +
> + res = fdt_property(fdt, "reg", reg, nr_cells);
> + if ( res )
> + return res;
> +
> + res = fdt_end_node(fdt);
> +
> + return res;
> +}
>
> static int make_hypervisor_node(void *fdt, const struct dt_device_node
> *parent)
> {
> @@ -627,7 +655,8 @@ static int make_timer_node(const struct domain *d, void
> *fdt)
> }
>
> static int make_xen_node(const struct domain *d, void *fdt,
> - const struct dt_device_node *parent)
> + const struct dt_device_node *parent,
> + const struct kernel_info *kinfo)
> {
> int res;
>
> @@ -649,6 +678,10 @@ static int make_xen_node(const struct domain *d, void
> *fdt,
> if ( res )
> return res;
>
> + res = make_memory_node(d, fdt, kinfo);
> + if ( res )
> + return res;
> +
> res = make_gic_node(d, fdt);
> if ( res )
> return res;
> @@ -750,6 +783,7 @@ static int handle_node(struct domain *d, struct
> kernel_info *kinfo,
> DT_MATCH_COMPATIBLE("xen,multiboot-module"),
> DT_MATCH_COMPATIBLE("arm,psci"),
> DT_MATCH_PATH("/cpus"),
> + DT_MATCH_TYPE("memory"),
> DT_MATCH_GIC,
> DT_MATCH_TIMER,
> { /* sentinel */ },
> @@ -826,7 +860,7 @@ static int handle_node(struct domain *d, struct
> kernel_info *kinfo,
> if ( res )
> return res;
>
> - res = make_xen_node(d, kinfo->fdt, np);
> + res = make_xen_node(d, kinfo->fdt, np, kinfo);
> if ( res )
> return res;
> }
> @@ -841,14 +875,9 @@ static int prepare_dtb(struct domain *d, struct
> kernel_info *kinfo)
> const void *fdt;
> int new_size;
> int ret;
> - paddr_t end;
> - paddr_t initrd_len;
> - paddr_t dtb_len;
>
> ASSERT(dt_host && (dt_host->sibling == NULL));
>
> - kinfo->unassigned_mem = dom0_mem;
> -
> fdt = device_tree_flattened;
>
> new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
> @@ -870,36 +899,6 @@ static int prepare_dtb(struct domain *d, struct
> kernel_info *kinfo)
> if ( ret < 0 )
> goto err;
>
> - /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte
> alignment */
> - initrd_len = ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2));
> - dtb_len = ROUNDUP(fdt_totalsize(kinfo->fdt), MB(2));
> - new_size = initrd_len + dtb_len;
> -
> - /*
> - * DTB must be loaded such that it does not conflict with the
> - * kernel decompressor. For 32-bit Linux Documentation/arm/Booting
> - * recommends just after the 128MB boundary while for 64-bit Linux
> - * the recommendation in Documentation/arm64/booting.txt is below
> - * 512MB. Place at 128MB, (or, if we have less RAM, as high as
> - * possible) in order to satisfy both.
> - * If the bootloader provides an initrd, it will be loaded just
> - * after the DTB.
> - */
> - end = kinfo->mem.bank[0].start + kinfo->mem.bank[0].size;
> - end = MIN(kinfo->mem.bank[0].start + (128<<20) + new_size, end);
> -
> - kinfo->initrd_paddr = end - initrd_len;
> - kinfo->dtb_paddr = kinfo->initrd_paddr - dtb_len;
> -
> - if ( kinfo->dtb_paddr < kinfo->mem.bank[0].start ||
> - kinfo->mem.bank[0].start + new_size > end )
> - {
> - printk(XENLOG_ERR "Not enough memory in the first bank for "
> - "the device tree.");
> - ret = -FDT_ERR_XEN(EINVAL);
> - goto err;
> - }
> -
> return 0;
>
> err:
> @@ -994,11 +993,19 @@ int construct_dom0(struct domain *d)
>
> d->max_pages = ~0U;
>
> - rc = prepare_dtb(d, &kinfo);
> + kinfo.unassigned_mem = dom0_mem;
> +
> + allocate_memory(d, &kinfo);
> +
> + rc = kernel_prepare(&kinfo);
> if ( rc < 0 )
> return rc;
>
> - rc = kernel_prepare(&kinfo);
> +#ifdef CONFIG_ARM_64
> + d->arch.type = kinfo.type;
> +#endif
> +
> + rc = prepare_dtb(d, &kinfo);
> if ( rc < 0 )
> return rc;
>
> @@ -1006,9 +1013,6 @@ int construct_dom0(struct domain *d)
> if ( rc < 0 )
> return rc;
>
> - if ( kinfo.check_overlap )
> - kinfo.check_overlap(&kinfo);
> -
> /* The following loads use the domain's p2m */
> p2m_load_VTTBR(d);
> #ifdef CONFIG_ARM_64
> @@ -1019,6 +1023,10 @@ int construct_dom0(struct domain *d)
> WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_RW, HCR_EL2);
> #endif
>
> + /*
> + * kernel_load will determine the placement of the initrd & fdt in
> + * RAM, so call it first.
> + */
> kernel_load(&kinfo);
> /* initrd_load will fix up the fdt, so call it before dtb_load */
> initrd_load(&kinfo);
> @@ -1033,7 +1041,6 @@ int construct_dom0(struct domain *d)
>
> regs->pc = (register_t)kinfo.entry;
>
> -
> if ( is_pv32_domain(d) )
> {
> regs->cpsr = PSR_GUEST32_INIT;
> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
> index 7036d94..9046797 100644
> --- a/xen/arch/arm/kernel.c
> +++ b/xen/arch/arm/kernel.c
> @@ -68,26 +68,56 @@ void copy_from_paddr(void *dst, paddr_t paddr, unsigned
> long len, int attrindx)
> clear_fixmap(FIXMAP_MISC);
> }
>
> -static void kernel_zimage_check_overlap(struct kernel_info *info)
> +static void place_modules(struct kernel_info *info,
> + paddr_t kernel_start,
> + paddr_t kernel_end)
> {
> - paddr_t zimage_start = info->zimage.load_addr;
> - paddr_t zimage_end = info->zimage.load_addr + info->zimage.len;
> - paddr_t start = info->dtb_paddr;
> - paddr_t end;
> + /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte
> alignment */
> + const paddr_t initrd_len =
> + ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2));
> + const paddr_t dtb_len = ROUNDUP(fdt_totalsize(info->fdt), MB(2));
> + const paddr_t total = initrd_len + dtb_len;
>
> - end = info->initrd_paddr + early_info.modules.module[MOD_INITRD].size;
> + /* Convenient */
> + const paddr_t mem_start = info->mem.bank[0].start;
> + const paddr_t mem_size = info->mem.bank[0].size;
> + const paddr_t mem_end = mem_start + mem_size;
> + const paddr_t kernel_size = kernel_end - kernel_start;
> +
> + paddr_t addr;
> +
> + if ( total + kernel_size > mem_size )
> + panic("Not enough memory in the first bank for the dtb+initrd.");
>
> /*
> - * In the dom0 memory, the initrd will be just after the DTB. So we
> - * only need to check if the zImage range will overlap the
> - * DTB-initrd range.
> + * DTB must be loaded such that it does not conflict with the
> + * kernel decompressor. For 32-bit Linux Documentation/arm/Booting
> + * recommends just after the 128MB boundary while for 64-bit Linux
> + * the recommendation in Documentation/arm64/booting.txt is below
> + * 512MB.
> + *
> + * If the bootloader provides an initrd, it will be loaded just
> + * after the DTB.
> + *
> + * We try to place dtb+initrd at 128MB, (or, if we have less RAM,
> + * as high as possible). If there is no space then fallback to
> + * just after the kernel, if there is room, otherwise just before.
> */
> - if ( (start > zimage_end) || (end < zimage_start) )
> +
> + if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
> + addr = MIN(mem_start + MB(128), mem_end - total);
> + else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
> + addr = ROUNDUP(kernel_end, MB(2));
> + else if ( kernel_start - mem_start >= total )
> + addr = kernel_start - total;
> + else
> + {
> + panic("Unable to find suitable location for dtb+initrd.");
> return;
> + }
>
> - panic(XENLOG_ERR "The kernel(0x%"PRIpaddr"-0x%"PRIpaddr
> - ") is overlapping the DTB-initrd(0x%"PRIpaddr"-0x%"PRIpaddr")\n",
> - zimage_start, zimage_end, start, end);
> + info->dtb_paddr = addr;
> + info->initrd_paddr = info->dtb_paddr + dtb_len;
> }
>
> static void kernel_zimage_load(struct kernel_info *info)
> @@ -98,6 +128,8 @@ static void kernel_zimage_load(struct kernel_info *info)
> paddr_t len = info->zimage.len;
> unsigned long offs;
>
> + place_modules(info, load_addr, load_addr + len);
> +
> printk("Loading zImage from %"PRIpaddr" to %"PRIpaddr"-%"PRIpaddr"\n",
> paddr, load_addr, load_addr + len);
> for ( offs = 0; offs < len; )
> @@ -176,7 +208,6 @@ static int kernel_try_zimage64_prepare(struct kernel_info
> *info,
>
> info->entry = info->zimage.load_addr;
> info->load = kernel_zimage_load;
> - info->check_overlap = kernel_zimage_check_overlap;
>
> info->type = DOMAIN_PV64;
>
> @@ -236,17 +267,9 @@ static int kernel_try_zimage32_prepare(struct
> kernel_info *info,
> paddr_t load_end;
>
> load_end = info->mem.bank[0].start + info->mem.bank[0].size;
> - load_end = MIN(info->mem.bank[0].start + (128<<20), load_end);
> -
> - /*
> - * FDT is loaded above 128M or as high as possible, so the
> - * only way we can clash is if we have <=128MB, in which case
> - * FDT will be right at the end and so dtb_paddr will be below
> - * the proposed kernel load address. Move the kernel down if
> - * necessary.
> - */
> - if ( load_end >= info->dtb_paddr )
> - load_end = info->dtb_paddr;
> + load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
> +
> + load_end += MB(2);
>
> info->zimage.load_addr = load_end - end;
> /* Align to 2MB */
> @@ -258,7 +281,6 @@ static int kernel_try_zimage32_prepare(struct kernel_info
> *info,
>
> info->entry = info->zimage.load_addr;
> info->load = kernel_zimage_load;
> - info->check_overlap = kernel_zimage_check_overlap;
>
> #ifdef CONFIG_ARM_64
> info->type = DOMAIN_PV32;
> @@ -269,10 +291,15 @@ static int kernel_try_zimage32_prepare(struct
> kernel_info *info,
>
> static void kernel_elf_load(struct kernel_info *info)
> {
> + place_modules(info,
> + info->elf.parms.virt_kstart,
> + info->elf.parms.virt_kend);
> +
> printk("Loading ELF image into guest memory\n");
> info->elf.elf.dest_base = (void*)(unsigned
> long)info->elf.parms.virt_kstart;
> info->elf.elf.dest_size =
> info->elf.parms.virt_kend - info->elf.parms.virt_kstart;
> +
> elf_load_binary(&info->elf.elf);
>
> printk("Free temporary kernel buffer\n");
> @@ -321,7 +348,6 @@ static int kernel_try_elf_prepare(struct kernel_info
> *info,
> */
> info->entry = info->elf.parms.virt_entry;
> info->load = kernel_elf_load;
> - info->check_overlap = NULL;
>
> if ( elf_check_broken(&info->elf.elf) )
> printk("Xen: warning: ELF kernel broken: %s\n",
> diff --git a/xen/arch/arm/kernel.h b/xen/arch/arm/kernel.h
> index debf590..b48c2c9 100644
> --- a/xen/arch/arm/kernel.h
> +++ b/xen/arch/arm/kernel.h
> @@ -40,8 +40,6 @@ struct kernel_info {
> };
>
> void (*load)(struct kernel_info *info);
> - /* Callback to check overlap between the kernel and the device tree */
> - void (*check_overlap)(struct kernel_info *kinfo);
> int load_attr;
> };
>
> --
> 1.7.10.4
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |