|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map functions to common
Hi Julien,
> -----Original Message-----
> From: Julien Grall <julien@xxxxxxx>
> Sent: 2021年8月24日 1:47
> To: Wei Chen <Wei.Chen@xxxxxxx>; xen-devel@xxxxxxxxxxxxxxxxxxxx;
> sstabellini@xxxxxxxxxx; jbeulich@xxxxxxxx
> Cc: Bertrand Marquis <Bertrand.Marquis@xxxxxxx>
> Subject: Re: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map
> functions to common
>
> Hi Wei,
>
> On 11/08/2021 11:23, Wei Chen wrote:
> > In the later patches we will add NUMA support to Arm. Arm
> > NUMA support will follow current memory node map management
> > as x86. So this part of code can be common, in this case,
> > we move this part of code from arch/x86 to common.
>
> I would add "No functional changes intended" to make clear this patch is
> only moving code.
Ok, I will do it.
>
> >
> > Signed-off-by: Wei Chen <wei.chen@xxxxxxx>
> > ---
> > xen/arch/x86/numa.c | 114 --------------------------------
> > xen/common/Makefile | 1 +
> > xen/common/numa.c | 131 +++++++++++++++++++++++++++++++++++++
> > xen/include/asm-x86/numa.h | 29 --------
> > xen/include/xen/numa.h | 35 ++++++++++
> > 5 files changed, 167 insertions(+), 143 deletions(-)
> > create mode 100644 xen/common/numa.c
> >
> > diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> > index d23f4f7919..a6211be121 100644
> > --- a/xen/arch/x86/numa.c
> > +++ b/xen/arch/x86/numa.c
> > @@ -29,14 +29,6 @@ custom_param("numa", numa_setup);
> > /* from proto.h */
> > #define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
> >
> > -struct node_data node_data[MAX_NUMNODES];
> > -
> > -/* Mapping from pdx to node id */
> > -int memnode_shift;
> > -static typeof(*memnodemap) _memnodemap[64];
> > -unsigned long memnodemapsize;
> > -u8 *memnodemap;
> > -
> > nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
> > [0 ... NR_CPUS-1] = NUMA_NO_NODE
> > };
> > @@ -58,112 +50,6 @@ int srat_disabled(void)
> > return numa_off || acpi_numa < 0;
> > }
> >
> > -/*
> > - * Given a shift value, try to populate memnodemap[]
> > - * Returns :
> > - * 1 if OK
> > - * 0 if memnodmap[] too small (of shift too small)
> > - * -1 if node overlap or lost ram (shift too big)
> > - */
> > -static int __init populate_memnodemap(const struct node *nodes,
> > - int numnodes, int shift, nodeid_t
> *nodeids)
> > -{
> > - unsigned long spdx, epdx;
> > - int i, res = -1;
> > -
> > - memset(memnodemap, NUMA_NO_NODE, memnodemapsize *
> sizeof(*memnodemap));
> > - for ( i = 0; i < numnodes; i++ )
> > - {
> > - spdx = paddr_to_pdx(nodes[i].start);
> > - epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > - if ( spdx >= epdx )
> > - continue;
> > - if ( (epdx >> shift) >= memnodemapsize )
> > - return 0;
> > - do {
> > - if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
> > - return -1;
> > -
> > - if ( !nodeids )
> > - memnodemap[spdx >> shift] = i;
> > - else
> > - memnodemap[spdx >> shift] = nodeids[i];
> > -
> > - spdx += (1UL << shift);
> > - } while ( spdx < epdx );
> > - res = 1;
> > - }
> > -
> > - return res;
> > -}
> > -
> > -static int __init allocate_cachealigned_memnodemap(void)
> > -{
> > - unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
> > - unsigned long mfn = mfn_x(alloc_boot_pages(size, 1));
> > -
> > - memnodemap = mfn_to_virt(mfn);
> > - mfn <<= PAGE_SHIFT;
> > - size <<= PAGE_SHIFT;
> > - printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
> > - mfn, mfn + size);
> > - memnodemapsize = size / sizeof(*memnodemap);
> > -
> > - return 0;
> > -}
> > -
> > -/*
> > - * The LSB of all start and end addresses in the node map is the value
> of the
> > - * maximum possible shift.
> > - */
> > -static int __init extract_lsb_from_nodes(const struct node *nodes,
> > - int numnodes)
> > -{
> > - int i, nodes_used = 0;
> > - unsigned long spdx, epdx;
> > - unsigned long bitfield = 0, memtop = 0;
> > -
> > - for ( i = 0; i < numnodes; i++ )
> > - {
> > - spdx = paddr_to_pdx(nodes[i].start);
> > - epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > - if ( spdx >= epdx )
> > - continue;
> > - bitfield |= spdx;
> > - nodes_used++;
> > - if ( epdx > memtop )
> > - memtop = epdx;
> > - }
> > - if ( nodes_used <= 1 )
> > - i = BITS_PER_LONG - 1;
> > - else
> > - i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
> > - memnodemapsize = (memtop >> i) + 1;
> > - return i;
> > -}
> > -
> > -int __init compute_hash_shift(struct node *nodes, int numnodes,
> > - nodeid_t *nodeids)
> > -{
> > - int shift;
> > -
> > - shift = extract_lsb_from_nodes(nodes, numnodes);
> > - if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
> > - memnodemap = _memnodemap;
> > - else if ( allocate_cachealigned_memnodemap() )
> > - return -1;
> > - printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
> > -
> > - if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
> > - {
> > - printk(KERN_INFO "Your memory is not aligned you need to "
> > - "rebuild your hypervisor with a bigger NODEMAPSIZE "
> > - "shift=%d\n", shift);
> > - return -1;
> > - }
> > -
> > - return shift;
> > -}
> > /* initialize NODE_DATA given nodeid and start/end */
> > void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
> > {
> > diff --git a/xen/common/Makefile b/xen/common/Makefile
> > index 54de70d422..f8f667e90a 100644
> > --- a/xen/common/Makefile
> > +++ b/xen/common/Makefile
> > @@ -54,6 +54,7 @@ obj-y += wait.o
> > obj-bin-y += warning.init.o
> > obj-$(CONFIG_XENOPROF) += xenoprof.o
> > obj-y += xmalloc_tlsf.o
> > +obj-$(CONFIG_NUMA) += numa.o
>
> AFAICT, the Makefile is listing the file in alphabetical order. So
> please add numa.o in the correct position.
>
Thanks for the reminder, I will fix it.
> >
> > obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma
> lzo unlzo unlz4 unzstd earlycpio,$(n).init.o)
> >
> > diff --git a/xen/common/numa.c b/xen/common/numa.c
> > new file mode 100644
> > index 0000000000..e65b6a6676
> > --- /dev/null
> > +++ b/xen/common/numa.c
> > @@ -0,0 +1,131 @@
> > +/*
> > + * Generic VM initialization for x86-64 NUMA setups.
> > + * Copyright 2002,2003 Andi Kleen, SuSE Labs.
> > + * Adapted for Xen: Ryan Harper <ryanh@xxxxxxxxxx>
> > + */
> > +
> > +#include <xen/mm.h>
> > +#include <xen/string.h>
> > +#include <xen/init.h>
> > +#include <xen/ctype.h>
>
> You don't seem to use any helpers./types directly defined by at least
> this header...
>
> > +#include <xen/nodemask.h>
> > +#include <xen/numa.h>
> > +#include <xen/time.h>
>
> ... this one and ...
>
> > +#include <xen/smp.h>
>
> ... this one. Can you check the list of headers and introduce the
> minimum? If the dependency is required by another headers, then I think
> that dependency should be moved in the header requiring it.
>
I will check it in next version. If it isn't needed, I will remove it.
> > +#include <xen/pfn.h>
> > +#include <xen/sched.h>
>
> Please sort the includes in alphabetical order.
>
OK
> > +
> > +struct node_data node_data[MAX_NUMNODES];
> > +
> > +/* Mapping from pdx to node id */
> > +int memnode_shift;
> > +typeof(*memnodemap) _memnodemap[64];
> > +unsigned long memnodemapsize;
> > +u8 *memnodemap;
> > +
> > +/*
> > + * Given a shift value, try to populate memnodemap[]
> > + * Returns :
> > + * 1 if OK
> > + * 0 if memnodmap[] too small (of shift too small)
> > + * -1 if node overlap or lost ram (shift too big)
> > + */
> > +static int __init populate_memnodemap(const struct node *nodes,
> > + int numnodes, int shift, nodeid_t
> *nodeids)
> > +{
> > + unsigned long spdx, epdx;
> > + int i, res = -1;
> > +
> > + memset(memnodemap, NUMA_NO_NODE, memnodemapsize *
> sizeof(*memnodemap));
> > + for ( i = 0; i < numnodes; i++ )
> > + {
> > + spdx = paddr_to_pdx(nodes[i].start);
> > + epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > + if ( spdx >= epdx )
> > + continue;
> > + if ( (epdx >> shift) >= memnodemapsize )
> > + return 0;
> > + do {
> > + if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
> > + return -1;
> > +
> > + if ( !nodeids )
> > + memnodemap[spdx >> shift] = i;
> > + else
> > + memnodemap[spdx >> shift] = nodeids[i];
> > +
> > + spdx += (1UL << shift);
> > + } while ( spdx < epdx );
> > + res = 1;
> > + }
> > +
> > + return res;
> > +}
> > +
> > +static int __init allocate_cachealigned_memnodemap(void)
> > +{
> > + unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
> > + unsigned long mfn = mfn_x(alloc_boot_pages(size, 1));
> > +
> > + memnodemap = mfn_to_virt(mfn);
> > + mfn <<= PAGE_SHIFT;
> > + size <<= PAGE_SHIFT;
> > + printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
> > + mfn, mfn + size);
> > + memnodemapsize = size / sizeof(*memnodemap);
> > +
> > + return 0;
> > +}
> > +
> > +/*
> > + * The LSB of all start and end addresses in the node map is the value
> of the
> > + * maximum possible shift.
> > + */
> > +static int __init extract_lsb_from_nodes(const struct node *nodes,
> > + int numnodes)
> > +{
> > + int i, nodes_used = 0;
> > + unsigned long spdx, epdx;
> > + unsigned long bitfield = 0, memtop = 0;
> > +
> > + for ( i = 0; i < numnodes; i++ )
> > + {
> > + spdx = paddr_to_pdx(nodes[i].start);
> > + epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > + if ( spdx >= epdx )
> > + continue;
> > + bitfield |= spdx;
> > + nodes_used++;
> > + if ( epdx > memtop )
> > + memtop = epdx;
> > + }
> > + if ( nodes_used <= 1 )
> > + i = BITS_PER_LONG - 1;
> > + else
> > + i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
> > + memnodemapsize = (memtop >> i) + 1;
> > + return i;
> > +}
> > +
> > +int __init compute_hash_shift(struct node *nodes, int numnodes,
> > + nodeid_t *nodeids)
> > +{
> > + int shift;
> > +
> > + shift = extract_lsb_from_nodes(nodes, numnodes);
> > + if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
> > + memnodemap = _memnodemap;
> > + else if ( allocate_cachealigned_memnodemap() )
> > + return -1;
> > + printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
> > +
> > + if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
> > + {
> > + printk(KERN_INFO "Your memory is not aligned you need to "
> > + "rebuild your hypervisor with a bigger NODEMAPSIZE "
> > + "shift=%d\n", shift);
> > + return -1;
> > + }
> > +
> > + return shift;
> > +}
> > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> > index bada2c0bb9..abe5617d01 100644
> > --- a/xen/include/asm-x86/numa.h
> > +++ b/xen/include/asm-x86/numa.h
> > @@ -26,7 +26,6 @@ extern int compute_hash_shift(struct node *nodes, int
> numnodes,
> > extern nodeid_t pxm_to_node(unsigned int pxm);
> >
> > #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
> > -#define VIRTUAL_BUG_ON(x)
> >
> > extern void numa_add_cpu(int cpu);
> > extern void numa_init_array(void);
> > @@ -47,34 +46,6 @@ static inline void clear_node_cpumask(int cpu)
> > cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> > }
> >
> > -/* Simple perfect hash to map pdx to node numbers */
> > -extern int memnode_shift;
> > -extern unsigned long memnodemapsize;
> > -extern u8 *memnodemap;
> > -
> > -struct node_data {
> > - unsigned long node_start_pfn;
> > - unsigned long node_spanned_pages;
> > -};
> > -
> > -extern struct node_data node_data[];
> > -
> > -static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
> > -{
> > - nodeid_t nid;
> > - VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
> memnodemapsize);
> > - nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
> > - VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
> > - return nid;
> > -}
> > -
> > -#define NODE_DATA(nid) (&(node_data[nid]))
> > -
> > -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
> > -#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
> > -#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
> > - NODE_DATA(nid)->node_spanned_pages)
> > -
> > extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
> >
> > void srat_parse_regions(u64 addr);
> > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> > index 7aef1a88dc..39e8a4e00a 100644
> > --- a/xen/include/xen/numa.h
> > +++ b/xen/include/xen/numa.h
> > @@ -18,4 +18,39 @@
> > (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
> > ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
> >
> > +/* The following content can be used when NUMA feature is enabled */
> > +#if defined(CONFIG_NUMA)
>
> Please use #ifdef CONFIG_NUMA
>
> > +
> > +/* Simple perfect hash to map pdx to node numbers */
> > +extern int memnode_shift;
> > +extern unsigned long memnodemapsize;
> > +extern u8 *memnodemap;
> > +extern typeof(*memnodemap) _memnodemap[64];
>
> AFAICT, this will be turned static against in a follow-up patch. Can
> this be avoided?
>
I will try it in next version.
> > +
> > +struct node_data {
> > + unsigned long node_start_pfn;
> > + unsigned long node_spanned_pages;
> > +};
> > +
> > +extern struct node_data node_data[];
> > +#define VIRTUAL_BUG_ON(x)
> > +
> > +static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
> > +{
> > + nodeid_t nid;
> > + VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
> memnodemapsize);
> > + nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
> > + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
> > + return nid;
> > +}
> > +
> > +#define NODE_DATA(nid) (&(node_data[nid]))
> > +
> > +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
> > +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
> > +#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
> > + NODE_DATA(nid)->node_spanned_pages)
> > +
> > +#endif /* CONFIG_NUMA */
> > +
> > #endif /* _XEN_NUMA_H */
> >
>
> Cheers,
>
> --
> Julien Grall
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |