[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 2/6] xen/x86: move generically usable NUMA code from x86 to common
On 22.08.2022 04:58, Wei Chen wrote: > --- /dev/null > +++ b/xen/common/numa.c > @@ -0,0 +1,440 @@ > +/* > + * Generic VM initialization for NUMA setups. > + * Copyright 2002,2003 Andi Kleen, SuSE Labs. > + * Adapted for Xen: Ryan Harper <ryanh@xxxxxxxxxx> > + */ > + > +#include <xen/init.h> > +#include <xen/keyhandler.h> > +#include <xen/mm.h> > +#include <xen/nodemask.h> > +#include <xen/numa.h> > +#include <xen/param.h> > +#include <xen/sched.h> > +#include <xen/softirq.h> > + > +struct node_data __ro_after_init node_data[MAX_NUMNODES]; > + > +/* Mapping from pdx to node id */ > +unsigned int __ro_after_init memnode_shift; > +unsigned long __ro_after_init memnodemapsize; > +uint8_t *__ro_after_init memnodemap; > +static uint8_t __ro_after_init _memnodemap[64]; > + > +nodeid_t __ro_after_init cpu_to_node[NR_CPUS] = { I don't think this can be __ro_after_init, or you'll break CPU hotplug. > + [0 ... NR_CPUS-1] = NUMA_NO_NODE > +}; > + > +cpumask_t __ro_after_init node_to_cpumask[MAX_NUMNODES]; Same here. > +nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; > + > +bool __read_mostly numa_off; This, otoh, can be, or have I missed a place where it's written by a non-__init function? > +bool numa_disabled(void) > +{ > + return numa_off || arch_numa_disabled(false); > +} > + > +/* > + * Given a shift value, try to populate memnodemap[] > + * Returns : > + * 1 if OK > + * 0 if memnodmap[] too small (of shift too small) > + * -1 if node overlap or lost ram (shift too big) > + */ > +static int __init populate_memnodemap(const struct node *nodes, > + nodeid_t numnodes, unsigned int shift, I don't think you can use nodeid_t for a variable holding a node count. Think of what would happen if there were 256 nodes, the IDs of which all fit in nodeid_t. (Same again further down.) > + nodeid_t *nodeids) > +{ > + unsigned long spdx, epdx; > + nodeid_t i; This is likely inefficient for a loop counter variable. Note how you use "unsigned int" in e.g. extract_lsb_from_nodes(). > +unsigned int __init compute_hash_shift(const struct node *nodes, > + nodeid_t numnodes, nodeid_t *nodeids) > +{ > + unsigned int shift; > + > + shift = extract_lsb_from_nodes(nodes, numnodes); > + if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) ) > + memnodemap = _memnodemap; > + else if ( allocate_cachealigned_memnodemap() ) > + return -1; With this the function can't very well have "unsigned int" return type. > +void __init numa_init_array(void) > +{ > + int rr, i; "unsigned int" for i and perhaps nodeid_t for rr? > +static int __init numa_emulation(unsigned long start_pfn, > + unsigned long end_pfn) > +{ > + unsigned int i; > + struct node nodes[MAX_NUMNODES]; > + uint64_t sz = pfn_to_paddr(end_pfn - start_pfn) / numa_fake; > + > + /* Kludge needed for the hash function */ > + if ( hweight64(sz) > 1 ) > + { > + u64 x = 1; uint64_t and a blank line between declaration(s) and statement(s) please. > + while ( (x << 1) < sz ) > + x <<= 1; > + if ( x < sz / 2 ) > + printk(KERN_ERR "Numa emulation unbalanced. Complain to > maintainer\n"); > + sz = x; > + } > + > + memset(&nodes, 0, sizeof(nodes)); > + for ( i = 0; i < numa_fake; i++ ) > + { > + nodes[i].start = pfn_to_paddr(start_pfn) + i * sz; > + if ( i == numa_fake - 1 ) > + sz = pfn_to_paddr(end_pfn) - nodes[i].start; > + nodes[i].end = nodes[i].start + sz; > + printk(KERN_INFO "Faking node %u at %"PRIx64"-%"PRIx64" > (%"PRIu64"MB)\n", > + i, nodes[i].start, nodes[i].end, > + (nodes[i].end - nodes[i].start) >> 20); > + node_set_online(i); > + } > + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL); > + if ( memnode_shift < 0 ) Does the compiler not warn here, comparing an unsigned value for being negative? > --- a/xen/include/xen/numa.h > +++ b/xen/include/xen/numa.h > @@ -18,4 +18,70 @@ > (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \ > ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE) > > +/* The following content can be used when NUMA feature is enabled */ > +#ifdef CONFIG_NUMA > + > +extern nodeid_t cpu_to_node[NR_CPUS]; > +extern cpumask_t node_to_cpumask[]; > + > +#define cpu_to_node(cpu) (cpu_to_node[cpu]) > +#define parent_node(node) (node) > +#define node_to_first_cpu(node) (__ffs(node_to_cpumask[node])) > +#define node_to_cpumask(node) (node_to_cpumask[node]) Please could you take the opportunity and drop unnecessary parentheses from here? Afaict only parent_node() need them to be kept. > +struct node { > + paddr_t start, end; > +}; > + > +extern unsigned int compute_hash_shift(const struct node *nodes, > + nodeid_t numnodes, nodeid_t *nodeids); > + > +#define VIRTUAL_BUG_ON(x) > + > +extern bool numa_off; > +extern void numa_add_cpu(unsigned int cpu); Please can you have variable and function declarations visually separated, by adding a blank line between them? > +extern void numa_init_array(void); > +extern void numa_set_node(unsigned int cpu, nodeid_t node); > +extern void numa_initmem_init(unsigned long start_pfn, unsigned long > end_pfn); > +extern int numa_scan_nodes(paddr_t start, paddr_t end); > + > +extern int arch_numa_setup(const char *opt); > +extern bool arch_numa_disabled(bool init_as_disable); > +extern void setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end); > + > +static inline void clear_node_cpumask(unsigned int cpu) > +{ > + cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]); > +} > + > +/* Simple perfect hash to map pdx to node numbers */ > +extern unsigned int memnode_shift; > +extern unsigned long memnodemapsize; > +extern uint8_t *memnodemap; > + > +struct node_data { > + unsigned long node_start_pfn; > + unsigned long node_spanned_pages; > +}; > + > +extern struct node_data node_data[]; > + > +static inline __attribute_pure__ nodeid_t phys_to_nid(paddr_t addr) Nit: The conventional place for attributes is between return type and function (or object) name. > +{ > + nodeid_t nid; > + VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize); > + nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift]; > + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); > + return nid; > +} > + > +#define NODE_DATA(nid) (&(node_data[nid])) Again please take the opportunity and drop the unnecessary inner parentheses. > +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) > +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) > +#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ > + NODE_DATA(nid)->node_spanned_pages) Pleae correct indentation here - it was correct originally (except for the fact that it was using hard tabs). Jan
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |