|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to common
Hi Julien,
> -----Original Message-----
> From: Julien Grall <julien@xxxxxxx>
> Sent: 2021年8月25日 18:22
> To: Wei Chen <Wei.Chen@xxxxxxx>; xen-devel@xxxxxxxxxxxxxxxxxxxx;
> sstabellini@xxxxxxxxxx; jbeulich@xxxxxxxx
> Cc: Bertrand Marquis <Bertrand.Marquis@xxxxxxx>
> Subject: Re: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to
> common
>
> Hi Wei,
>
> On 11/08/2021 11:23, Wei Chen wrote:
> > This function can be reused by Arm device tree based
> > NUMA support. So we move it from x86 to common, as well
> > as its related variables and functions:
> > setup_node_bootmem, numa_init_array and numa_emulation.
> >
> > As numa_initmem_init has been moved to common, _memnodemap
> > is not used cross files. We can restore _memnodemap to
> > static.
>
> As we discussed on a previous patch, we should try to avoid this kind of
> dance. I can help to find a split that would achieve that.
>
Yes, thanks!
> >
> > Signed-off-by: Wei Chen <wei.chen@xxxxxxx>
> > ---
> > xen/arch/x86/numa.c | 118 ----------------------------------
> > xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++-
> > xen/include/asm-x86/numa.h | 5 --
> > xen/include/asm-x86/setup.h | 1 -
> > xen/include/xen/numa.h | 8 ++-
> > 5 files changed, 128 insertions(+), 126 deletions(-)
> >
> > diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> > index f2626b3968..6908738305 100644
> > --- a/xen/arch/x86/numa.c
> > +++ b/xen/arch/x86/numa.c
> > @@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
> >
> > nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
> >
> > -bool numa_off;
> > s8 acpi_numa = 0;
> >
> > int srat_disabled(void)
> > @@ -46,123 +45,6 @@ int srat_disabled(void)
> > return numa_off || acpi_numa < 0;
> > }
> >
> > -/* initialize NODE_DATA given nodeid and start/end */
> > -void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
> > -{
> > - unsigned long start_pfn, end_pfn;
> > -
> > - start_pfn = start >> PAGE_SHIFT;
> > - end_pfn = end >> PAGE_SHIFT;
> > -
> > - NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> > - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> > -
> > - node_set_online(nodeid);
> > -}
> > -
> > -void __init numa_init_array(void)
> > -{
> > - int rr, i;
> > -
> > - /* There are unfortunately some poorly designed mainboards around
> > - that only connect memory to a single CPU. This breaks the 1:1
> cpu->node
> > - mapping. To avoid this fill in the mapping for all possible
> > - CPUs, as the number of CPUs is not known yet.
> > - We round robin the existing nodes. */
> > - rr = first_node(node_online_map);
> > - for ( i = 0; i < nr_cpu_ids; i++ )
> > - {
> > - if ( cpu_to_node[i] != NUMA_NO_NODE )
> > - continue;
> > - numa_set_node(i, rr);
> > - rr = cycle_node(rr, node_online_map);
> > - }
> > -}
> > -
> > -#ifdef CONFIG_NUMA_EMU
> > -static int numa_fake __initdata = 0;
> > -
> > -/* Numa emulation */
> > -static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
> > -{
> > - int i;
> > - struct node nodes[MAX_NUMNODES];
> > - u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
> > -
> > - /* Kludge needed for the hash function */
> > - if ( hweight64(sz) > 1 )
> > - {
> > - u64 x = 1;
> > - while ( (x << 1) < sz )
> > - x <<= 1;
> > - if ( x < sz/2 )
> > - printk(KERN_ERR "Numa emulation unbalanced. Complain to
> maintainer\n");
> > - sz = x;
> > - }
> > -
> > - memset(&nodes,0,sizeof(nodes));
> > - for ( i = 0; i < numa_fake; i++ )
> > - {
> > - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
> > - if ( i == numa_fake - 1 )
> > - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
> > - nodes[i].end = nodes[i].start + sz;
> > - printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64"
> (%"PRIu64"MB)\n",
> > - i,
> > - nodes[i].start, nodes[i].end,
> > - (nodes[i].end - nodes[i].start) >> 20);
> > - node_set_online(i);
> > - }
> > - memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
> > - if ( memnode_shift < 0 )
> > - {
> > - memnode_shift = 0;
> > - printk(KERN_ERR "No NUMA hash function found. Emulation
> disabled.\n");
> > - return -1;
> > - }
> > - for_each_online_node ( i )
> > - setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> > - numa_init_array();
> > -
> > - return 0;
> > -}
> > -#endif
> > -
> > -void __init numa_initmem_init(unsigned long start_pfn, unsigned long
> end_pfn)
> > -{
> > - int i;
> > -
> > -#ifdef CONFIG_NUMA_EMU
> > - if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
> > - return;
> > -#endif
> > -
> > -#ifdef CONFIG_ACPI_NUMA
> > - if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
> > - (u64)end_pfn << PAGE_SHIFT) )
> > - return;
> > -#endif
> > -
> > - printk(KERN_INFO "%s\n",
> > - numa_off ? "NUMA turned off" : "No NUMA configuration
> found");
> > -
> > - printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
> > - (u64)start_pfn << PAGE_SHIFT,
> > - (u64)end_pfn << PAGE_SHIFT);
> > - /* setup dummy node covering all memory */
> > - memnode_shift = BITS_PER_LONG - 1;
> > - memnodemap = _memnodemap;
> > - memnodemapsize = ARRAY_SIZE(_memnodemap);
> > -
> > - nodes_clear(node_online_map);
> > - node_set_online(0);
> > - for ( i = 0; i < nr_cpu_ids; i++ )
> > - numa_set_node(i, 0);
> > - cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
> > - setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
> > - (u64)end_pfn << PAGE_SHIFT);
> > -}
> > -
> > void numa_set_node(int cpu, nodeid_t node)
> > {
> > cpu_to_node[cpu] = node;
> > diff --git a/xen/common/numa.c b/xen/common/numa.c
> > index 1facc8fe2b..26c0006d04 100644
> > --- a/xen/common/numa.c
> > +++ b/xen/common/numa.c
> > @@ -14,12 +14,13 @@
> > #include <xen/smp.h>
> > #include <xen/pfn.h>
> > #include <xen/sched.h>
>
> NIT: We tend to add a newline betwen <xen/...> headers and <asm/...>
> headers.
>
got it
> > +#include <asm/acpi.h>
> >
> > struct node_data node_data[MAX_NUMNODES];
> >
> > /* Mapping from pdx to node id */
> > int memnode_shift;
> > -typeof(*memnodemap) _memnodemap[64];
> > +static typeof(*memnodemap) _memnodemap[64];
> > unsigned long memnodemapsize;
> > u8 *memnodemap;
> >
> > @@ -34,6 +35,8 @@ int num_node_memblks;
> > struct node node_memblk_range[NR_NODE_MEMBLKS];
> > nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
> >
> > +bool numa_off;
> > +
> > /*
> > * Given a shift value, try to populate memnodemap[]
> > * Returns :
> > @@ -191,3 +194,120 @@ void numa_add_cpu(int cpu)
> > {
> > cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> > }
> > +
> > +/* initialize NODE_DATA given nodeid and start/end */
> > +void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
>
> From an abstract PoV, start and end should be paddr_t. This should be
> done on a separate patch though.
>
Ok.
> > +{
> > + unsigned long start_pfn, end_pfn;
> > +
> > + start_pfn = start >> PAGE_SHIFT;
> > + end_pfn = end >> PAGE_SHIFT;
> > +
> > + NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> > + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> > +
> > + node_set_online(nodeid);
> > +}
> > +
> > +void __init numa_init_array(void)
> > +{
> > + int rr, i;
> > +
> > + /* There are unfortunately some poorly designed mainboards around
> > + that only connect memory to a single CPU. This breaks the 1:1
> cpu->node
> > + mapping. To avoid this fill in the mapping for all possible
> > + CPUs, as the number of CPUs is not known yet.
> > + We round robin the existing nodes. */
> > + rr = first_node(node_online_map);
> > + for ( i = 0; i < nr_cpu_ids; i++ )
> > + {
> > + if ( cpu_to_node[i] != NUMA_NO_NODE )
> > + continue;
> > + numa_set_node(i, rr);
> > + rr = cycle_node(rr, node_online_map);
> > + }
> > +}
> > +
> > +#ifdef CONFIG_NUMA_EMU
> > +int numa_fake __initdata = 0;
> > +
> > +/* Numa emulation */
> > +static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
>
> Here, this should be either "unsigned long" or ideally "mfn_t".
> Although, if you use "unsigned long", you will need to...
>
Do we need a separate patch to do it?
> > +{
> > + int i;
> > + struct node nodes[MAX_NUMNODES];
> > + u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
>
> ... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr().
>
Ok
> > +
> > + /* Kludge needed for the hash function */
> > + if ( hweight64(sz) > 1 )
> > + {
> > + u64 x = 1;
> > + while ( (x << 1) < sz )
> > + x <<= 1;
> > + if ( x < sz/2 )
> > + printk(KERN_ERR "Numa emulation unbalanced. Complain to
> maintainer\n");
> > + sz = x;
> > + }
> > +
> > + memset(&nodes,0,sizeof(nodes));
> > + for ( i = 0; i < numa_fake; i++ )
> > + {
> > + nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
> > + if ( i == numa_fake - 1 )
> > + sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
> > + nodes[i].end = nodes[i].start + sz;
> > + printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64"
> (%"PRIu64"MB)\n",
> > + i,
> > + nodes[i].start, nodes[i].end,
> > + (nodes[i].end - nodes[i].start) >> 20);
> > + node_set_online(i);
> > + }
> > + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
> > + if ( memnode_shift < 0 )
> > + {
> > + memnode_shift = 0;
> > + printk(KERN_ERR "No NUMA hash function found. Emulation
> disabled.\n");
> > + return -1;
> > + }
> > + for_each_online_node ( i )
> > + setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> > + numa_init_array();
> > +
> > + return 0;
> > +}
> > +#endif
> > +
> > +void __init numa_initmem_init(unsigned long start_pfn, unsigned long
> end_pfn)
> > +{
> > + int i;
> > +
> > +#ifdef CONFIG_NUMA_EMU
> > + if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
> > + return;
> > +#endif
> > +
> > +#ifdef CONFIG_ACPI_NUMA
> > + if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
> > + (u64)end_pfn << PAGE_SHIFT) )
>
> (u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or
> mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe.
>
Still need a separate patch to change it before move?
> > + return;
> > +#endif
> > +
> > + printk(KERN_INFO "%s\n",
> > + numa_off ? "NUMA turned off" : "No NUMA configuration
> found");
> > +
> > + printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
> > + (u64)start_pfn << PAGE_SHIFT,
> > + (u64)end_pfn << PAGE_SHIFT);
>
> Same remark here. PRIx64 would also have to be switched to PRIpaddr.
>
Hmm, It seems I'd better to use a separate patch to do PRIpaddr clean up
before move code.
> > + /* setup dummy node covering all memory */
> > + memnode_shift = BITS_PER_LONG - 1;
> > + memnodemap = _memnodemap;
> > + memnodemapsize = ARRAY_SIZE(_memnodemap);
> > +
> > + nodes_clear(node_online_map);
> > + node_set_online(0);
> > + for ( i = 0; i < nr_cpu_ids; i++ )
> > + numa_set_node(i, 0);
> > + cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
> > + setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
> > + (u64)end_pfn << PAGE_SHIFT);
> > +}
> > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> > index e8a92ad9df..f8e4e15586 100644
> > --- a/xen/include/asm-x86/numa.h
> > +++ b/xen/include/asm-x86/numa.h
> > @@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm);
> >
> > #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
> >
> > -extern void numa_init_array(void);
> > -extern bool numa_off;
> > -
> > -
> > extern int srat_disabled(void);
> > extern void numa_set_node(int cpu, nodeid_t node);
> > extern nodeid_t setup_node(unsigned int pxm);
> > extern void srat_detect_node(int cpu);
> >
> > -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
> > extern nodeid_t apicid_to_node[];
> > extern void init_cpu_to_node(void);
> >
> > diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
> > index 24be46115d..63838ba2d1 100644
> > --- a/xen/include/asm-x86/setup.h
> > +++ b/xen/include/asm-x86/setup.h
> > @@ -17,7 +17,6 @@ void early_time_init(void);
> >
> > void set_nr_cpu_ids(unsigned int max_cpus);
> >
> > -void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
> > void arch_init_memory(void);
> > void subarch_init_memory(void);
> >
> > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> > index 67b79a73a3..258a5cb3db 100644
> > --- a/xen/include/xen/numa.h
> > +++ b/xen/include/xen/numa.h
> > @@ -26,7 +26,6 @@
> > extern int memnode_shift;
> > extern unsigned long memnodemapsize;
> > extern u8 *memnodemap;
> > -extern typeof(*memnodemap) _memnodemap[64];
> >
> > struct node_data {
> > unsigned long node_start_pfn;
> > @@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end);
> > extern void cutoff_node(int i, u64 start, u64 end);
> > extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
> >
> > +extern void numa_init_array(void);
> > +extern void numa_initmem_init(unsigned long start_pfn, unsigned long
> end_pfn);
> > +extern bool numa_off;
> > +extern int numa_fake;
> > +
> > +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
> > +
> > #endif /* CONFIG_NUMA */
> >
> > #endif /* _XEN_NUMA_H */
> >
>
> Cheers,
>
> --
> Julien Grall
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |