[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 1/3] xen/riscv: introduce setup_mm()



On Wed, 2024-11-06 at 13:44 +0100, oleksii.kurochko@xxxxxxxxx wrote:
> On Tue, 2024-11-05 at 16:20 +0100, Jan Beulich wrote:
> > On 01.11.2024 14:16, Oleksii Kurochko wrote:> @@ -423,3 +429,138 @@
> > void * __init early_fdt_map(paddr_t fdt_paddr)
> > >  
> > >      return fdt_virt;
> > >  }
> > > +
> > > +vaddr_t __ro_after_init directmap_virt_start =
> > > DIRECTMAP_VIRT_START;
> > > +
> > > +#ifndef CONFIG_RISCV_32
> > > +
> > > +/* Map a frame table to cover physical addresses ps through pe
> > > */
> > > +static void __init setup_frametable_mappings(paddr_t ps, paddr_t
> > > pe)
> > > +{
> > > +    paddr_t aligned_ps = ROUNDUP(ps, PAGE_SIZE);
> > > +    paddr_t aligned_pe = ROUNDDOWN(pe, PAGE_SIZE);
> > > +    unsigned long nr_mfns = PFN_DOWN(aligned_pe - aligned_ps);
> > > +    unsigned long frametable_size = nr_mfns *
> > > sizeof(*frame_table);
> > > +    mfn_t base_mfn;
> > > +
> > > +    if ( frametable_size > FRAMETABLE_SIZE )
> > > +        panic("The frametable cannot cover [%#"PRIpaddr",
> > > %#"PRIpaddr")\n",
> > > +              ps, pe);
> > > +
> > > +    /*
> > > +     * align base_mfn and frametable_size to MB(2) to have
> > > superpage mapping
> > > +     * in map_pages_to_xen()
> > > +     */
> > > +    frametable_size = ROUNDUP(frametable_size, MB(2));
> > > +    base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT,
> > > PFN_DOWN(MB(2)));
> > > +
> > > +    if ( map_pages_to_xen(FRAMETABLE_VIRT_START, base_mfn,
> > > +                          PFN_DOWN(frametable_size),
> > > +                          PAGE_HYPERVISOR_RW) )
> > > +        panic("frametable mappings failed: %#lx -> %#lx\n",
> > > +              FRAMETABLE_VIRT_START, mfn_x(base_mfn));
> > > +
> > > +    memset(&frame_table[0], -1, frametable_size);
> > > +    memset(&frame_table[PFN_DOWN(aligned_ps)],
> > > +           0, nr_mfns * sizeof(*frame_table));
> > 
> > Interesting - now you write out a huge amount of -1s, just to then
> > overwrite
> > most of them with zeroes. I'm not going to insist that you change
> > this yet
> > another time, but the performance hit from this is going to bite
> > you/us as
> > soon as Xen is run on bigger-memory systems.
> I agree that validating or invalidating frames in a single pass would
> be preferable to nearly two passes. I’m considering whether there’s a
> way to ensure that frame_table is set to -1 at compile time. It seems
> the best approach (and only one?) might be to initialize frame_table
> in
> one pass as follows:
> 1) [0, aligned_ps) = -1
> 2) [aligned_ps, nr_mfns * sizeof(*frame_table)) = 0
> 3) [nr_mfns * sizeof(*frame_table), frame_table_size) = -1
> Does this approach seem optimal?
> 
> > 
> > Plus, unless I'm mistaken, the function continues to rely on ps ==
> > 0
> > as
> > input. Just that the dependency is now better hidden. Specifically
> > if
> > you
> > calculate nr_mfns from the difference, and then use that for
> > allocation,
> > then you need to offset the start of the mapping you create
> > accordingly.
> I'm not quite understanding why the method of calculating nr_mfns
> affects how the frame_table is mapped. Isn’t it only necessary to
> calculate the correct size of the frame_table that needs to be
> allocated?
> 
> I assume by the offset you mean something similar to what was done
> for
> directmap mapping?
Here is the small diff for simplicity:
   --- a/xen/arch/riscv/include/asm/mm.h
   +++ b/xen/arch/riscv/include/asm/mm.h
   @@ -132,11 +132,13 @@ struct page_info
        };
    };
    
   +extern struct page_info *frame_table_virt_start;
   +
    #define frame_table ((struct page_info *)FRAMETABLE_VIRT_START)
    
    /* Convert between machine frame numbers and page-info structures.
   */
   -#define mfn_to_page(mfn)    (frame_table + mfn_x(mfn))
   -#define page_to_mfn(pg)     _mfn((pg) - frame_table)
   +#define mfn_to_page(mfn)    (frame_table_virt_start + mfn_x(mfn))
   +#define page_to_mfn(pg)     _mfn((pg) - frame_table_virt_start)
    
    static inline void *page_to_virt(const struct page_info *pg)
    {
   diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c
   index db75aa1d4f..9bd15597a9 100644
   --- a/xen/arch/riscv/mm.c
   +++ b/xen/arch/riscv/mm.c
   @@ -426,6 +426,8 @@ void * __init early_fdt_map(paddr_t fdt_paddr)
    
    vaddr_t __ro_after_init directmap_virt_start =
   DIRECTMAP_VIRT_START;
    
   +struct page_info *frame_table_virt_start;
   +
    #ifndef CONFIG_RISCV_32
    
    /* Map a frame table to cover physical addresses ps through pe */
   @@ -437,6 +439,11 @@ static void __init
   setup_frametable_mappings(paddr_t ps, paddr_t pe)
        unsigned long frametable_size = nr_mfns * sizeof(*frame_table);
        mfn_t base_mfn;
    
   +    if ( !frame_table_virt_start )
   +    {
   +        frame_table_virt_start = frame_table -
   paddr_to_pfn(aligned_ps);
   +    }
   +
        if ( frametable_size > FRAMETABLE_SIZE )
            panic("The frametable cannot cover [%#"PRIpaddr",
   %#"PRIpaddr")\n",
                  ps, pe);
   @@ -454,9 +461,12 @@ static void __init
   setup_frametable_mappings(paddr_t ps, paddr_t pe)
            panic("frametable mappings failed: %#lx -> %#lx\n",
                  FRAMETABLE_VIRT_START, mfn_x(base_mfn));
    
   -    memset(&frame_table[0], -1, frametable_size);
   -    memset(&frame_table[PFN_DOWN(aligned_ps)],
   -           0, nr_mfns * sizeof(*frame_table));
   +    memset(&frame_table[0], -1,
   +           PFN_DOWN(aligned_ps) * sizeof(*frame_table));
   +    memset(&frame_table[PFN_DOWN(aligned_ps)], 0,
   +           nr_mfns * sizeof(*frame_table));
   +    memset(&frame_table[PFN_DOWN(aligned_pe)], -1,
   +           frametable_size - (nr_mfns * sizeof(*frame_table)));
    }

~ Oleksii 

> 
> 
> >  At
> > which point you may need to apply extra care to cover the case
> > where
> > sizeof(*frame_table) is not a power of two, and hence e.g. the
> > first
> > valid
> > page might have a struct instance straddling a page boundary.
> The first valid page is aligned_ps ( which is aligned on a page
> boundary ) so assuming that sizeof(*frame_table) < PAGE_SIZE we can't
> straddle a page boundary, can we?
> 
> > 
> > > +    /*
> > > +     * The base address may not be aligned to the second level
> > > +     * size in case of Sv39 (e.g. 1GB when using 4KB pages).
> > > +     * This would prevent superpage mappings for all the regions
> > > +     * because the virtual address and machine address should
> > > +     * both be suitably aligned.
> > > +     *
> > > +     * Prevent that by offsetting the start of the directmap
> > > virtual
> > > +     * address.
> > > +     */
> > > +    if ( map_pages_to_xen(DIRECTMAP_VIRT_START + (base_addr &
> > > ~high_bits_mask),
> > 
> > I'm afraid this is correct only for the first invocation of the
> > function.
> > For any further invocation you'd likely (attempt to) replace
> > previously
> > established mappings. I think that here you need to use
> > directmap_virt_start
> > instead.
> Banks are sorted by bank start address ( common/device-
> tree/bootfdt.c:623 ):
>        /*
>         * On Arm64 setup_directmap_mappings() expects to be called
> with
>    the lowest
>         * bank in memory first. There is no requirement that the DT
> will
>    provide
>         * the banks sorted in ascending order. So sort them through.
>         */
>        sort(mem->bank, mem->nr_banks, sizeof(struct membank),
>             cmp_memory_node, swap_memory_node);
> ( btw, comment is needed to be updated ... )
> 
> Thereby no replacement should happen if I don't miss something.
> 
> I am thinking wouldn't it be more correct to use
> mfn_to_virt(base_mfn):
>        if ( map_pages_to_xen(__mfn_to_virt(base_mfn) + (base_addr &
>    ~high_bits_mask),
>                              _mfn(base_mfn), nr_mfns,
>                              PAGE_HYPERVISOR_RW) )
> 
> > 
> > > +/*
> > > + * Setup memory management
> > > + *
> > > + * RISC-V 64 has a large virtual address space (the minimum
> > > supported
> > > + * MMU mode is Sv39, which provides GBs of VA space).
> > > + *
> > > + * The directmap_virt_start is shifted lower in the VA space to
> > > + * (DIRECTMAP_VIRT_START - masked_low_bits_of_ram_start_address)
> > > to avoid
> > > + * wasting a large portion of the directmap space, this also
> > > allows for simple
> > > + * VA <-> PA translations. Also aligns DIRECTMAP_VIRT_START to a
> > > GB boundary
> > > + * (for Sv39; for other MMU mode boundaries will be bigger ) by
> > > masking the
> > > + * higher bits of the RAM start address to enable the use of
> > > superpages in
> > > + * map_pages_to_xen().
> > > + *
> > > + * The frametable is mapped starting from physical address 0,
> > > minimizing
> > > + * wasted VA space and simplifying page_to_mfn() and
> > > mfn_to_page()
> > > + * translations.
> > > + */
> > > +void __init setup_mm(void)
> > > +{
> > > +    const struct membanks *banks = bootinfo_get_mem();
> > > +    paddr_t ram_start = INVALID_PADDR;
> > > +    paddr_t ram_end = 0;
> > > +    paddr_t ram_size = 0;
> > > +    unsigned int i;
> > > +
> > > +    /*
> > > +     * We need some memory to allocate the page-tables used for
> > > the directmap
> > > +     * mappings. But some regions may contain memory already
> > > allocated
> > > +     * for other uses (e.g. modules, reserved-memory...).
> > > +     *
> > > +     * For simplicity, add all the free regions in the boot
> > > allocator.
> > > +     */
> > > +    populate_boot_allocator();
> > > +
> > > +    total_pages = 0;
> > 
> > Nit: Is this actually necessary?
> Agree, there is no need for total_pages. It should be dropped.
> 
> > 
> > > +    for ( i = 0; i < banks->nr_banks; i++ )
> > > +    {
> > > +        const struct membank *bank = &banks->bank[i];
> > > +        paddr_t bank_start = ROUNDUP(bank->start, PAGE_SIZE);
> > > +        paddr_t bank_end = ROUNDDOWN(bank->start + bank->size,
> > > PAGE_SIZE);
> > > +        unsigned long bank_size = bank_end - bank_start;
> > > +
> > > +        ram_size += bank_size;
> > > +        ram_start = min(ram_start, bank_start);
> > > +        ram_end = max(ram_end, bank_end);
> > > +
> > > +        setup_directmap_mappings(PFN_DOWN(bank_start),
> > > PFN_DOWN(bank_size));
> > > +    }
> > 
> > You maintain ram_start in the loop, just to then ...
> > 
> > > +    total_pages = PFN_DOWN(ram_size);
> > > +
> > > +    setup_frametable_mappings(0, ram_end);
> > > +    max_page = PFN_DOWN(ram_end);
> > > +}
> > 
> > ... not use it at all - why?
> ram_start was needed for the case when setup_frametable_mappings()
> used
> it as the first argument. Now it isn't true anymore so should be
> dropped.
> 
> Thanks.
> 
> ~ Oleksii




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.