[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 5/7] xen/ppc: Enable bootfdt and boot allocator


  • To: Shawn Anastasio <sanastasio@xxxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Wed, 20 Dec 2023 12:44:38 +0100
  • Autocrypt: addr=jbeulich@xxxxxxxx; keydata= xsDiBFk3nEQRBADAEaSw6zC/EJkiwGPXbWtPxl2xCdSoeepS07jW8UgcHNurfHvUzogEq5xk hu507c3BarVjyWCJOylMNR98Yd8VqD9UfmX0Hb8/BrA+Hl6/DB/eqGptrf4BSRwcZQM32aZK 7Pj2XbGWIUrZrd70x1eAP9QE3P79Y2oLrsCgbZJfEwCgvz9JjGmQqQkRiTVzlZVCJYcyGGsD /0tbFCzD2h20ahe8rC1gbb3K3qk+LpBtvjBu1RY9drYk0NymiGbJWZgab6t1jM7sk2vuf0Py O9Hf9XBmK0uE9IgMaiCpc32XV9oASz6UJebwkX+zF2jG5I1BfnO9g7KlotcA/v5ClMjgo6Gl MDY4HxoSRu3i1cqqSDtVlt+AOVBJBACrZcnHAUSuCXBPy0jOlBhxPqRWv6ND4c9PH1xjQ3NP nxJuMBS8rnNg22uyfAgmBKNLpLgAGVRMZGaGoJObGf72s6TeIqKJo/LtggAS9qAUiuKVnygo 3wjfkS9A3DRO+SpU7JqWdsveeIQyeyEJ/8PTowmSQLakF+3fote9ybzd880fSmFuIEJldWxp Y2ggPGpiZXVsaWNoQHN1c2UuY29tPsJgBBMRAgAgBQJZN5xEAhsDBgsJCAcDAgQVAggDBBYC AwECHgECF4AACgkQoDSui/t3IH4J+wCfQ5jHdEjCRHj23O/5ttg9r9OIruwAn3103WUITZee e7Sbg12UgcQ5lv7SzsFNBFk3nEQQCACCuTjCjFOUdi5Nm244F+78kLghRcin/awv+IrTcIWF hUpSs1Y91iQQ7KItirz5uwCPlwejSJDQJLIS+QtJHaXDXeV6NI0Uef1hP20+y8qydDiVkv6l IreXjTb7DvksRgJNvCkWtYnlS3mYvQ9NzS9PhyALWbXnH6sIJd2O9lKS1Mrfq+y0IXCP10eS FFGg+Av3IQeFatkJAyju0PPthyTqxSI4lZYuJVPknzgaeuJv/2NccrPvmeDg6Coe7ZIeQ8Yj t0ARxu2xytAkkLCel1Lz1WLmwLstV30g80nkgZf/wr+/BXJW/oIvRlonUkxv+IbBM3dX2OV8 AmRv1ySWPTP7AAMFB/9PQK/VtlNUJvg8GXj9ootzrteGfVZVVT4XBJkfwBcpC/XcPzldjv+3 HYudvpdNK3lLujXeA5fLOH+Z/G9WBc5pFVSMocI71I8bT8lIAzreg0WvkWg5V2WZsUMlnDL9 mpwIGFhlbM3gfDMs7MPMu8YQRFVdUvtSpaAs8OFfGQ0ia3LGZcjA6Ik2+xcqscEJzNH+qh8V m5jjp28yZgaqTaRbg3M/+MTbMpicpZuqF4rnB0AQD12/3BNWDR6bmh+EkYSMcEIpQmBM51qM EKYTQGybRCjpnKHGOxG0rfFY1085mBDZCH5Kx0cl0HVJuQKC+dV2ZY5AqjcKwAxpE75MLFkr wkkEGBECAAkFAlk3nEQCGwwACgkQoDSui/t3IH7nnwCfcJWUDUFKdCsBH/E5d+0ZnMQi+G0A nAuWpQkjM1ASeQwSHEeAWPgskBQL
  • Cc: Timothy Pearson <tpearson@xxxxxxxxxxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Wed, 20 Dec 2023 11:44:43 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 15.12.2023 03:44, Shawn Anastasio wrote:
> --- /dev/null
> +++ b/xen/arch/ppc/include/asm/setup.h
> @@ -0,0 +1,123 @@
> +#ifndef __ASM_PPC_SETUP_H__
> +#define __ASM_PPC_SETUP_H__
> +
> +#define max_init_domid (0)
> +
> +#include <public/version.h>
> +#include <asm/p2m.h>
> +#include <xen/device_tree.h>
> +
> +#define MIN_FDT_ALIGN 8
> +#define MAX_FDT_SIZE SZ_2M
> +
> +#define NR_MEM_BANKS 256
> +
> +#define MAX_MODULES 32 /* Current maximum useful modules */
> +
> +typedef enum {
> +    BOOTMOD_XEN,
> +    BOOTMOD_FDT,
> +    BOOTMOD_KERNEL,
> +    BOOTMOD_RAMDISK,
> +    BOOTMOD_XSM,
> +    BOOTMOD_GUEST_DTB,
> +    BOOTMOD_UNKNOWN
> +}  bootmodule_kind;
> +
> +enum membank_type {
> +    /*
> +     * The MEMBANK_DEFAULT type refers to either reserved memory for the
> +     * device/firmware (when the bank is in 'reserved_mem') or any RAM (when
> +     * the bank is in 'mem').
> +     */
> +    MEMBANK_DEFAULT,
> +    /*
> +     * The MEMBANK_STATIC_DOMAIN type is used to indicate whether the memory
> +     * bank is bound to a static Xen domain. It is only valid when the bank
> +     * is in reserved_mem.
> +     */
> +    MEMBANK_STATIC_DOMAIN,
> +    /*
> +     * The MEMBANK_STATIC_HEAP type is used to indicate whether the memory
> +     * bank is reserved as static heap. It is only valid when the bank is
> +     * in reserved_mem.
> +     */
> +    MEMBANK_STATIC_HEAP,
> +};
> +
> +/* Indicates the maximum number of characters(\0 included) for shm_id */
> +#define MAX_SHM_ID_LENGTH 16
> +
> +struct membank {
> +    paddr_t start;
> +    paddr_t size;
> +    enum membank_type type;
> +};
> +
> +struct meminfo {
> +    unsigned int nr_banks;
> +    struct membank bank[NR_MEM_BANKS];
> +};
> +
> +/*
> + * The domU flag is set for kernels and ramdisks of "xen,domain" nodes.
> + * The purpose of the domU flag is to avoid getting confused in
> + * kernel_probe, where we try to guess which is the dom0 kernel and
> + * initrd to be compatible with all versions of the multiboot spec.
> + */
> +#define BOOTMOD_MAX_CMDLINE 1024

Why does this live here, rather than ...

> +struct bootmodule {
> +    bootmodule_kind kind;
> +    bool domU;
> +    paddr_t start;
> +    paddr_t size;
> +};
> +
> +/* DT_MAX_NAME is the node name max length according the DT spec */
> +#define DT_MAX_NAME 41

... next to this?

> +struct bootcmdline {
> +    bootmodule_kind kind;
> +    bool domU;
> +    paddr_t start;
> +    char dt_name[DT_MAX_NAME];
> +    char cmdline[BOOTMOD_MAX_CMDLINE];
> +};
> +
> +struct bootmodules {
> +    int nr_mods;
> +    struct bootmodule module[MAX_MODULES];
> +};
> +
> +struct bootcmdlines {
> +    unsigned int nr_mods;
> +    struct bootcmdline cmdline[MAX_MODULES];
> +};
> +
> +struct bootinfo {
> +    struct meminfo mem;
> +    struct meminfo reserved_mem;
> +    struct bootmodules modules;
> +    struct bootcmdlines cmdlines;
> +    bool static_heap;

Unused field?

> +};
> +
> +extern struct bootinfo bootinfo;
> +
> +/*
> + * setup.c
> + */
> +
> +bool check_reserved_regions_overlap(paddr_t region_start, paddr_t 
> region_size);
> +struct bootmodule *add_boot_module(bootmodule_kind kind,
> +                                   paddr_t start, paddr_t size, bool domU);
> +void add_boot_cmdline(const char *name, const char *cmdline,
> +                      bootmodule_kind kind, paddr_t start, bool domU);
> +const char *boot_module_kind_as_string(bootmodule_kind kind);
> +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind);

No __init please on declarations; section placement attributes make sense only
on definitions (with pretty narrow exceptions).

> --- a/xen/arch/ppc/setup.c
> +++ b/xen/arch/ppc/setup.c
> @@ -1,16 +1,296 @@
>  /* SPDX-License-Identifier: GPL-2.0-or-later */
>  #include <xen/init.h>
>  #include <xen/lib.h>
> +#include <xen/libfdt/libfdt.h>
>  #include <xen/mm.h>
>  #include <public/version.h>
>  #include <asm/boot.h>
>  #include <asm/early_printk.h>
>  #include <asm/mm.h>
>  #include <asm/processor.h>
> +#include <asm/setup.h>
>  
>  /* Xen stack for bringing up the first CPU. */
>  unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE);
>  
> +struct bootinfo __initdata bootinfo;
> +
> +void __init add_boot_cmdline(const char *name, const char *cmdline,
> +                             bootmodule_kind kind, paddr_t start, bool domU)
> +{
> +    struct bootcmdlines *cmds = &bootinfo.cmdlines;
> +    struct bootcmdline *cmd;
> +
> +    if ( cmds->nr_mods == MAX_MODULES )
> +    {
> +        printk("Ignoring %s cmdline (too many)\n", name);
> +        return;
> +    }
> +
> +    cmd = &cmds->cmdline[cmds->nr_mods++];
> +    cmd->kind = kind;
> +    cmd->domU = domU;
> +    cmd->start = start;
> +
> +    ASSERT(strlen(name) <= DT_MAX_NAME);
> +    safe_strcpy(cmd->dt_name, name);
> +
> +    if ( strlen(cmdline) > BOOTMOD_MAX_CMDLINE )
> +        panic("module %s command line too long\n", name);
> +    safe_strcpy(cmd->cmdline, cmdline);
> +}
> +
> +struct bootmodule __init *add_boot_module(bootmodule_kind kind,
> +                                          paddr_t start, paddr_t size,
> +                                          bool domU)
> +{
> +    struct bootmodules *mods = &bootinfo.modules;
> +    struct bootmodule *mod;
> +    unsigned int i;
> +
> +    if ( mods->nr_mods == MAX_MODULES )
> +    {
> +        printk("Ignoring %s boot module at %"PRIpaddr"-%"PRIpaddr" (too 
> many)\n",
> +               boot_module_kind_as_string(kind), start, start + size);
> +        return NULL;
> +    }
> +
> +    if ( check_reserved_regions_overlap(start, size) )
> +        return NULL;
> +
> +    for ( i = 0 ; i < mods->nr_mods ; i++ )
> +    {
> +        mod = &mods->module[i];
> +        if ( mod->kind == kind && mod->start == start )
> +        {
> +            if ( !domU )
> +                mod->domU = false;

What's the intention behind this (negative) accumulation of "domU"?

> +            return mod;
> +        }
> +    }

And what's the intention behind checking existing modules here (but not
existing command lines higher up in add_boot_cmdline())?

> +    mod = &mods->module[mods->nr_mods++];
> +    mod->kind = kind;
> +    mod->start = start;
> +    mod->size = size;
> +    mod->domU = domU;
> +
> +    return mod;
> +}
> +
> +const char * __init boot_module_kind_as_string(bootmodule_kind kind)
> +{
> +    switch ( kind )
> +    {
> +    case BOOTMOD_XEN:     return "Xen";
> +    case BOOTMOD_FDT:     return "Device Tree";
> +    case BOOTMOD_KERNEL:  return "Kernel";
> +    default: BUG();
> +    }
> +}
> +
> +/*
> + * TODO: '*_end' could be 0 if the module/region is at the end of the 
> physical
> + * address space. This is for now not handled as it requires more rework.
> + */
> +static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules,
> +                                             paddr_t region_start,
> +                                             paddr_t region_size)
> +{
> +    paddr_t mod_start = INVALID_PADDR, mod_end = 0;

Pointless initializers? (The variables might benefit from moving into
the more narrow scope anyway.)

> +    paddr_t region_end = region_start + region_size;
> +    unsigned int i, mod_num = bootmodules->nr_mods;
> +
> +    for ( i = 0; i < mod_num; i++ )
> +    {
> +        mod_start = bootmodules->module[i].start;
> +        mod_end = mod_start + bootmodules->module[i].size;
> +
> +        if ( region_end <= mod_start || region_start >= mod_end )
> +            continue;
> +        else

I for one consider such an "else" misleading.

> +        {
> +            printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with"
> +                   " mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start,
> +                   region_end, i, mod_start, mod_end);
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
> +/*
> + * TODO: '*_end' could be 0 if the bank/region is at the end of the physical
> + * address space. This is for now not handled as it requires more rework.
> + */
> +static bool __init meminfo_overlap_check(struct meminfo *meminfo,
> +                                         paddr_t region_start,
> +                                         paddr_t region_size)
> +{
> +    paddr_t bank_start = INVALID_PADDR, bank_end = 0;

As above: Pointless initializers? (The variables might benefit from moving
into the more narrow scope anyway.)

> +    paddr_t region_end = region_start + region_size;
> +    unsigned int i, bank_num = meminfo->nr_banks;
> +
> +    for ( i = 0; i < bank_num; i++ )
> +    {
> +        bank_start = meminfo->bank[i].start;
> +        bank_end = bank_start + meminfo->bank[i].size;
> +
> +        if ( region_end <= bank_start || region_start >= bank_end )
> +            continue;
> +        else
> +        {
> +            printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with"
> +                   " bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start,
> +                   region_end, i, bank_start, bank_end);

I think this is confusing to read: When the format string already needs
wrapping, I don't think the next argument should be put on the same line.
I was almost going to complain that arguments don't fit the format string.

> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
> +/*
> + * Given an input physical address range, check if this range is overlapping
> + * with the existing reserved memory regions defined in bootinfo.
> + * Return true if the input physical address range is overlapping with any
> + * existing reserved memory regions, otherwise false.
> + */
> +bool __init check_reserved_regions_overlap(paddr_t region_start,
> +                                           paddr_t region_size)
> +{
> +    /* Check if input region is overlapping with bootinfo.reserved_mem banks 
> */
> +    if ( meminfo_overlap_check(&bootinfo.reserved_mem,
> +                               region_start, region_size) )
> +        return true;
> +
> +    /* Check if input region is overlapping with bootmodules */
> +    if ( bootmodules_overlap_check(&bootinfo.modules,
> +                                   region_start, region_size) )
> +        return true;
> +
> +    return false;
> +}
> +
> +/*
> + * Return the end of the non-module region starting at s. In other
> + * words return s the start of the next modules after s.

Stray " s" after "return"?

> + * On input *end is the end of the region which should be considered
> + * and it is updated to reflect the end of the module, clipped to the
> + * end of the region if it would run over.
> + */
> +static paddr_t __init next_module(paddr_t s, paddr_t *end)
> +{
> +    struct bootmodules *mi = &bootinfo.modules;

const?

> +    paddr_t lowest = ~(paddr_t)0;
> +    int i;

unsigned int?

> +    for ( i = 0; i < mi->nr_mods; i++ )
> +    {
> +        paddr_t mod_s = mi->module[i].start;
> +        paddr_t mod_e = mod_s + mi->module[i].size;
> +
> +        if ( !mi->module[i].size )
> +            continue;
> +
> +        if ( mod_s < s )
> +            continue;
> +        if ( mod_s > lowest )
> +            continue;
> +        if ( mod_s > *end )
> +            continue;
> +        lowest = mod_s;
> +        *end = min(*end, mod_e);
> +    }
> +    return lowest;
> +}
> +
> +static void __init dt_unreserved_regions(paddr_t s, paddr_t e,
> +                                         void (*cb)(paddr_t ps, paddr_t pe),
> +                                         unsigned int first)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ )
> +    {
> +        paddr_t r_s = bootinfo.reserved_mem.bank[i].start;
> +        paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i].size;
> +
> +        if ( s < r_e && r_s < e )
> +        {
> +            dt_unreserved_regions(r_e, e, cb, i + 1);
> +            dt_unreserved_regions(s, r_s, cb, i + 1);

What's the reason for this recursion? Can there be overlapping regions?
If so, is there a guaranteed depth limit (seeing in particular that
you're not using the last function parameter)?

> +            return;
> +        }
> +    }
> +
> +    cb(s, e);
> +}
> +
> +/*
> + * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g
> + * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest
> + * modules.
> + */
> +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind)

This function looks to be unused?

> +{
> +    struct bootcmdlines *cmds = &bootinfo.cmdlines;
> +    struct bootcmdline *cmd;
> +    int i;

unsigned int?

> +    for ( i = 0 ; i < cmds->nr_mods ; i++ )
> +    {
> +        cmd = &cmds->cmdline[i];
> +        if ( cmd->kind == kind && !cmd->domU )
> +            return cmd;
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * Populate the boot allocator. Based on arch/arm/setup.c's
> + * populate_boot_allocator.
> + * All RAM but the following regions will be added to the boot allocator:
> + *  - Modules (e.g., Xen, Kernel)
> + *  - Reserved regions
> + */
> +static void __init populate_boot_allocator(void)
> +{
> +    unsigned int i;
> +    const struct meminfo *banks = &bootinfo.mem;
> +    paddr_t s, e;
> +
> +    for ( i = 0; i < banks->nr_banks; i++ )
> +    {
> +        const struct membank *bank = &banks->bank[i];
> +        paddr_t bank_end = bank->start + bank->size;

Since you already make use of inner scope variables, and reason not
to put s here and ...

> +        s = bank->start;
> +        while ( s < bank_end )
> +        {
> +            paddr_t n = bank_end;

... e even here (each then gaining an initializer as well)?

> +            e = next_module(s, &n);
> +
> +            if ( e == ~(paddr_t)0 )
> +                e = n = bank_end;
> +
> +            /*
> +             * Module in a RAM bank other than the one which we are
> +             * not dealing with here.

Nit: "not"?

> +             */
> +            if ( e > bank_end )
> +                e = bank_end;
> +
> +            dt_unreserved_regions(s, e, init_boot_pages, 0);
> +
> +            s = n;
> +        }
> +    }
> +}
> +
>  void setup_exceptions(void)
>  {
>      unsigned long lpcr;
> @@ -24,6 +304,8 @@ void __init noreturn start_xen(unsigned long r3, unsigned 
> long r4,
>                                 unsigned long r5, unsigned long r6,
>                                 unsigned long r7)
>  {
> +    void *boot_fdt;

const?

> @@ -32,11 +314,16 @@ void __init noreturn start_xen(unsigned long r3, 
> unsigned long r4,
>      else
>      {
>          /* kexec boot protocol */
> -        boot_opal_init((void *)r3);
> +        boot_fdt = (void *)r3;

And then here as well, to avoid Misra complaints.

> +        boot_opal_init(boot_fdt);
>      }
>  
>      setup_exceptions();
>  
> +    boot_fdt_info(boot_fdt, r3);
> +
> +    populate_boot_allocator();
> +
>      setup_initial_pagetables();
>  
>      early_printk("Hello, ppc64le!\n");
> --- a/xen/common/device-tree/bootfdt.c
> +++ b/xen/common/device-tree/bootfdt.c
> @@ -543,12 +543,33 @@ size_t __init boot_fdt_info(const void *fdt, paddr_t 
> paddr)
>      if ( ret < 0 )
>          panic("No valid device tree\n");
>  
> -    add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false);
> -
>      ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, NULL);
>      if ( ret )
>          panic("Early FDT parsing failed (%d)\n", ret);
>  
> +    /*
> +     * Add module for the FDT itself after the device tree has been parsed. 
> This
> +     * is required on ppc64le where the device tree passed to Xen may have 
> been
> +     * allocated by skiboot, in which case it will exist within a reserved
> +     * region and this call will fail. This is fine, however, since either 
> way
> +     * the allocator will know not to step on the device tree.
> +     */
> +    add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false);
> +
> +    /*
> +     * Xen relocates itself at the ppc64 entrypoint, so we need to manually 
> mark
> +     * the kernel module.
> +     */
> +    if ( IS_ENABLED(CONFIG_PPC64) ) {

Nit (style): Brace placement.

> +        paddr_t xen_start, xen_end;
> +
> +        xen_start = __pa(_start);
> +        xen_end = PAGE_ALIGN(__pa(_end));
> +        if ( !add_boot_module(BOOTMOD_XEN, xen_start, xen_end, false) )
> +            panic("Xen overlaps reserved memory! %016lx - %016lx\n", 
> xen_start,
> +                xen_end);
> +    }

I'll need to leave commenting on this to the DT maintainers.

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.