[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 10/11] [PVOPS] Use enlightenment to setup nodes
It looks as the comments I provided in <20100216174900.GB21067@xxxxxxxxxxxxxxxxxxx> were ignored. http://article.gmane.org/gmane.comp.emulators.xen.devel/78118 Here I am repeating some of them and adding some new ones. Please address/answer them. .. snip.. > - acpi_numa_init(); > + if (acpi_numa > 0) > + acpi_numa_init(); The style guide is to use tabs, not spaces. It looks out-off-place? Why not just do acpi_numa_init() by itself? > #endif > > initmem_init(0, max_pfn); > diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c > index 459913b..3a856dc 100644 > --- a/arch/x86/mm/numa_64.c > +++ b/arch/x86/mm/numa_64.c > @@ -11,7 +11,11 @@ > #include <linux/ctype.h> > #include <linux/module.h> > #include <linux/nodemask.h> > +#include <linux/cpumask.h> > #include <linux/sched.h> > +#include <linux/bitops.h> > +#include <xen/interface/xen.h> > +#include <xen/interface/memory.h> > > #include <asm/e820.h> > #include <asm/proto.h> > @@ -19,6 +23,8 @@ > #include <asm/numa.h> > #include <asm/acpi.h> > #include <asm/k8.h> > +#include <asm/xen/hypervisor.h> > +#include <asm/xen/hypercall.h> > > struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; > EXPORT_SYMBOL(node_data); > @@ -428,7 +434,6 @@ static int __init numa_emulation(unsigned long start_pfn, > unsigned long last_pfn > */ > if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { > long n = simple_strtol(cmdline, NULL, 0); > - Uhh, why? > num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); > if (num_nodes < 0) > return num_nodes; > @@ -522,6 +527,246 @@ out: > numa_init_array(); > return 0; > } > + > +/************************************************************************/ > +#ifdef CONFIG_XEN_NUMA_GUEST Yikes. Can you move all of this code in it is own file so that there is no need to sprinkle this with #ifdefs? > +/* XEN PV GUEST NUMA */ > +struct xen_domain_numa_layout HYPERVISOR_pv_numa_layout; > + > +static inline void __init > +bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp, int nbits) > +{ > + /* We may need to pad the final longword with zeroes. */ > + if (nbits & (BITS_PER_LONG-1)) > + lp[BITS_TO_LONGS(nbits)-1] = 0; > + memcpy(lp, bp, (nbits+7)/8); > +} > + > +static void __init > +xenctl_cpumask_to_cpumask(cpumask_t *cpumask, struct xenctl_cpumask > *xcpumask) > +{ > + unsigned int nr_cpus; > + uint8_t *bytemap; > + > + bytemap = xcpumask->bits; > + > + nr_cpus = > + min_t(unsigned int, XENCTL_NR_CPUS, NR_CPUS); > + > + cpumask_clear(cpumask); > + bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, nr_cpus); > +} > + > +static void __init > +xen_dump_numa_layout(struct xen_domain_numa_layout *layout) > +{ > + unsigned int i, j; > + char vcpumaskstr[128]; > + printk("NUMA-LAYOUT : vcpus(%u), vnodes(%u), ", > + layout->max_vcpus, layout->max_vnodes); No good. You need printk(KERN_DEBUG .. > + switch (layout->type) > + { > + case XEN_DOM_NUMA_CONFINED: > + printk("type(CONFINED)\n"); ditoo. > + break; > + case XEN_DOM_NUMA_SPLIT: > + printk("type(SPLIT)\n"); ditto. > + break; > + case XEN_DOM_NUMA_STRIPED: > + printk("type(STRIPED)\n"); ditto. > + break; > + default: > + printk("type(UNDEFINED)\n"); ditto. > + } > + > + for (i = 0; i < layout->max_vnodes; i++) > + { > + cpumask_t vcpu_mask; > + struct xenctl_cpumask *xvcpumask; > + struct xen_vnode_data *vnode_data = &layout->vnode_data[i]; > + xvcpumask = &vnode_data->vcpu_mask; > + xenctl_cpumask_to_cpumask(&vcpu_mask, xvcpumask); > + cpumask_scnprintf(vcpumaskstr, sizeof(vcpumaskstr), &vcpu_mask); > + printk("vnode[%u]:mnode(%u), node_nr_pages(%lx), vcpu_mask(%s)\n", ditto > + vnode_data->vnode_id, vnode_data->mnode_id, > + (unsigned long)vnode_data->nr_pages, vcpumaskstr); > + } > + > + printk("vnode distances :\n"); ditoo > + for (i = 0; i < layout->max_vnodes; i++) > + printk("\tvnode[%u]", i); ditto > + for (i = 0; i < layout->max_vnodes; i++) > + { > + printk("\nvnode[%u]", i); ditoo > + for (j = 0; j < layout->max_vnodes; j++) > + printk("\t%u", layout->vnode_distance[i*layout->max_vnodes + j]); > + printk("\n"); ditto > + } > + return; > +} > + > +static void __init xen_init_slit_table(struct xen_domain_numa_layout *layout) > +{ > + /* Construct a slit table (using layout->vnode_distance). > + * Copy it to acpi_slit. */ > + return; > +} > + > +/* Distribute the vcpus over the vnodes according to their affinity */ > +static void __init xen_init_numa_array(struct xen_domain_numa_layout *layout) > +{ > + int vcpu, vnode; > + > + printk(KERN_INFO "xen_numa_init_array - cpu_to_node initialization\n"); > + > + > + for (vnode = 0; vnode < layout->max_vnodes; vnode++) > + { > + struct xen_vnode_data *vnode_data = &layout->vnode_data[vnode]; > + struct xenctl_cpumask *xvcpu_mask = &vnode_data->vcpu_mask; > + cpumask_t vcpu_mask; > + > + xenctl_cpumask_to_cpumask(&vcpu_mask, xvcpu_mask); > + > + for (vcpu = 0; vcpu < layout->max_vcpus; vcpu++) > + { > + if (cpu_isset(vcpu, vcpu_mask)) > + { > + if (early_cpu_to_node(vcpu) != NUMA_NO_NODE) > + { > + printk(KERN_INFO "EARLY vcpu[%d] on vnode[%d]\n", > + vcpu, early_cpu_to_node(vcpu)); > + continue; > + } > + printk(KERN_INFO "vcpu[%d] on vnode[%d]\n", vcpu, vnode); > + numa_set_node(vcpu, vnode); > + } > + } > + } > + return; > +} > + > +static int __init xen_numa_emulation(struct xen_domain_numa_layout *layout, > + unsigned long start_pfn, unsigned long last_pfn) > +{ > + int num_vnodes, i; > + u64 node_start_addr, node_end_addr, max_addr; > + > + printk(KERN_INFO "xen_numa_emulation : max_vnodes(%d), max_vcpus(%d)", > + layout->max_vnodes, > layout->max_vcpus); > + > + if (layout->type != XEN_DOM_NUMA_SPLIT) > + { > + printk(KERN_INFO "xen_numa_emulation : Invalid layout type"); > + return -1; > + } > + > + memset(&nodes, 0, sizeof(nodes)); > + > + num_vnodes = layout->max_vnodes; > + BUG_ON(num_vnodes > MAX_NUMNODES); > + > + max_addr = last_pfn << PAGE_SHIFT; > + > + node_start_addr = start_pfn << PAGE_SHIFT; > + for (i = 0; i < num_vnodes; i++) > + { > + struct xen_vnode_data *vnode_data = &layout->vnode_data[i]; > + u64 node_size = vnode_data->nr_pages << PAGE_SHIFT; > + > + node_size &= FAKE_NODE_MIN_HASH_MASK; /* 64MB aligned */ > + > + if (i == (num_vnodes-1)) > + node_end_addr = max_addr; > + else > + node_end_addr = node_start_addr + node_size; > + /* node_start_addr updated inside the function */ > + if (setup_node_range(i, nodes, &node_start_addr, > + (node_end_addr-node_start_addr), max_addr+1)) > + goto failed; > + } > + > + printk(KERN_INFO "XEN domain numa emulation - setup nodes\n"); > + > + memnode_shift = compute_hash_shift(nodes, num_vnodes, NULL); > + if (memnode_shift < 0) { > + printk(KERN_ERR "No NUMA hash function found.\n"); > + goto failed; > + } > + /* XXX: Shouldn't be needed because we disabled acpi_numa very early ! */ > + /* > + * We need to vacate all active ranges that may have been registered by > + * SRAT and set acpi_numa to -1 so that srat_disabled() always returns > + * true. NUMA emulation has succeeded so we will not scan ACPI nodes. > + */ > + remove_all_active_ranges(); > + > + BUG_ON(acpi_numa >= 0); > + for_each_node_mask(i, node_possible_map) { > + e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, > + nodes[i].end >> PAGE_SHIFT); > + setup_node_bootmem(i, nodes[i].start, nodes[i].end); > + } > + xen_init_slit_table(layout); > + xen_init_numa_array(layout); > + return 0; > +failed: > + return -1; > +} > + > +static int __init > +xen_get_domain_numa_layout(struct xen_domain_numa_layout *pv_layout) > +{ > + int rc; > + struct xenmem_numa_op memop; > + memop.cmd = XENMEM_get_domain_numa_layout; > + memop.u.dinfo.domid = DOMID_SELF; > + memop.u.dinfo.version = XEN_DOM_NUMA_INTERFACE_VERSION; > + memop.u.dinfo.bufsize = sizeof(*pv_layout); > + set_xen_guest_handle(memop.u.dinfo.buf, pv_layout); > + > + if ((rc = HYPERVISOR_memory_op(XENMEM_numa_op, &memop))) > + { > + printk(KERN_INFO "XEN NUMA GUEST:xen_get_domain_numa_layout > failed\n"); > + xen_start_info->flags &= ~SIF_NUMA_DOMAIN; > + goto done; > + } > + > + if (pv_layout->version != XEN_DOM_NUMA_INTERFACE_VERSION) > + { > + printk(KERN_INFO "XEN NUMA GUEST: version mismatch (disabling)\n"); > + xen_start_info->flags &= ~SIF_NUMA_DOMAIN; > + rc = -1; > + } > + xen_dump_numa_layout(pv_layout); > +done: > + return rc; > +} > + > +static int __init > +xen_pv_numa(unsigned long start_pfn, unsigned long last_pfn) > +{ > + int rc = 0; > + if (!xen_pv_domain() || !(xen_start_info->flags & SIF_NUMA_DOMAIN)) > + { > + rc = -1; > + printk(KERN_INFO "xen numa emulation disabled\n"); > + goto done; The tabs/spaces don't look right. Can you run this patch through checkpatch.pl? > + } > + if ((rc = xen_get_domain_numa_layout(&HYPERVISOR_pv_numa_layout))) > + goto done; > + rc = xen_numa_emulation(&HYPERVISOR_pv_numa_layout, start_pfn, last_pfn); > +done: > + return rc; > +} > +#else > +static inline int __init > +xen_pv_numa(unsigned long start_pfn, unsigned long last_pfn) > +{ > + return -1; > +} > +#endif /* CONFIG_XEN_NUMA_GUEST */ > +/************************************************************************/ > #endif /* CONFIG_NUMA_EMU */ > > void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) > @@ -532,6 +777,9 @@ void __init initmem_init(unsigned long start_pfn, > unsigned long last_pfn) > nodes_clear(node_online_map); > > #ifdef CONFIG_NUMA_EMU > + if (!xen_pv_numa(start_pfn, last_pfn)) > + return; > + > if (cmdline && !numa_emulation(start_pfn, last_pfn)) > return; > nodes_clear(node_possible_map); > diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig > index 7675f9b..7cf6a4f 100644 > --- a/arch/x86/xen/Kconfig > +++ b/arch/x86/xen/Kconfig > @@ -73,3 +73,12 @@ config XEN_PCI_PASSTHROUGH > help > Enable support for passing PCI devices through to > unprivileged domains. (COMPLETELY UNTESTED) > + > +config XEN_NUMA_GUEST > + bool "Enable support NUMA aware Xen domains" > + depends on XEN && X86_64 && NUMA && NUMA_EMU > + help > + Enable support for NUMA aware Xen domains. With this > + option, if the memory for the domain is allocated > + from different memory nodes, the domain is made aware > + of this through a Virtual NUMA enlightenment. > diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c > index df3e84c..2ee9f0b 100644 > --- a/arch/x86/xen/setup.c > +++ b/arch/x86/xen/setup.c > @@ -285,6 +285,8 @@ void __init xen_arch_setup(void) > printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); > disable_acpi(); > } > + acpi_numa = -1; > + numa_off = 1; > #endif > > /* > diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h > index 32ab005..7f5b85e 100644 > --- a/include/xen/interface/memory.h > +++ b/include/xen/interface/memory.h > @@ -240,6 +240,102 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); > */ > #define XENMEM_machine_memory_map 10 > > +/* xen guest numa operations */ > +#define XENMEM_numa_op 15 > + > +#define XEN_DOM_NUMA_INTERFACE_VERSION 0x00000001 > +#define XENCTL_NR_CPUS 64 > +#define XENCTL_BITS_PER_BYTE 8 > +#define XENCTL_BITS_TO_BYTES(bits) \ > + (((bits)+XENCTL_BITS_PER_BYTE-1)/XENCTL_BITS_PER_BYTE) > + > +#define XENCTL_DECLARE_BITMAP(name,bits) \ > + uint8_t name[XENCTL_BITS_TO_BYTES(bits)] > +struct xenctl_cpumask{ XENCTL_DECLARE_BITMAP(bits, XENCTL_NR_CPUS); }; > + > +/* Not used in guest */ > +#define XENMEM_machine_numa_layout 0x01 > +struct xenmem_node_data { > + uint32_t node_id; > + uint64_t node_memsize; > + uint64_t node_memfree; > + struct xenctl_cpumask cpu_mask; /* node_to_cpumask */ > +}; > + > +/* NUMA layout for the machine. > + * Structure has to fit within a page. */ > +struct xenmem_machine_numa_layout { > + uint32_t max_nodes; > + /* Only (max_nodes*max_nodes) entries are filled */ > + GUEST_HANDLE(uint32_t) node_distance; > + /* max_vnodes entries of xenmem_node_data type */ > + GUEST_HANDLE(void) node_data; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xenmem_machine_numa_layout); > + > + > +#define XENMEM_machine_nodemap 0x02 > +struct xenmem_machine_nodemap { > + /* On call the size of the available buffer */ > + uint32_t bufsize; > + > + /* memnode map parameters */ > + int32_t shift; > + uint32_t mapsize; > + GUEST_HANDLE(void) map; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xenmem_machine_nodemap); > + > +/* NUMA layout for the domain at the time of startup. > + * Structure has to fit within a page. */ > +#define XENMEM_set_domain_numa_layout 0x03 > +#define XENMEM_get_domain_numa_layout 0x04 > + > +/* NUMA layout for the domain at the time of startup. > + * Structure has to fit within a page. */ > +#define XEN_MAX_VNODES 8 > + > +struct xen_vnode_data { > + uint32_t vnode_id; > + uint32_t mnode_id; > + uint64_t nr_pages; > + struct xenctl_cpumask vcpu_mask; /* vnode_to_vcpumask */ > +}; > + > +#define XEN_DOM_NUMA_CONFINED 0x01 > +#define XEN_DOM_NUMA_SPLIT 0x02 > +#define XEN_DOM_NUMA_STRIPED 0x03 > +struct xen_domain_numa_layout { > + uint32_t version; > + uint32_t type; > + > + uint32_t max_vcpus; > + uint32_t max_vnodes; > + > + /* Only (max_vnodes*max_vnodes) entries are filled */ > + uint32_t vnode_distance[XEN_MAX_VNODES * XEN_MAX_VNODES]; > + /* Only (max_vnodes) entries are filled */ > + struct xen_vnode_data vnode_data[XEN_MAX_VNODES]; > +}; > + > +struct xenmem_domain_numa_layout { > + domid_t domid; > + uint32_t version; > + > + uint32_t bufsize; > + GUEST_HANDLE(void) buf; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xenmem_domain_numa_layout); > + > +struct xenmem_numa_op { > + uint32_t cmd; > + union { > + struct xenmem_machine_numa_layout minfo; > + struct xenmem_machine_nodemap mnodemap; > + struct xenmem_domain_numa_layout dinfo; > + } u; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xenmem_numa_op); > > /* > * Prevent the balloon driver from changing the memory reservation > diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h > index 9ffaee0..17cb17d 100644 > --- a/include/xen/interface/xen.h > +++ b/include/xen/interface/xen.h > @@ -494,6 +494,8 @@ struct dom0_vga_console_info { > /* These flags are passed in the 'flags' field of start_info_t. */ > #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ > #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ > +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ > +#define SIF_NUMA_DOMAIN (1<<3) /* Is the domain NUMA aware ? */ > #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ > > typedef uint64_t cpumap_t; > @@ -504,6 +506,7 @@ typedef uint8_t xen_domain_handle_t[16]; > #define __mk_unsigned_long(x) x ## UL > #define mk_unsigned_long(x) __mk_unsigned_long(x) > > +DEFINE_GUEST_HANDLE(uint32_t); > DEFINE_GUEST_HANDLE(uint64_t); > > #else /* __ASSEMBLY__ */ > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |