[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH RFC v2 1/2] linux/vnuma: vNUMA for PV domu guest
On Tue, Sep 17, 2013 at 10:21 AM, Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> wrote: > On 09/17/2013 04:34 AM, Elena Ufimtseva wrote: >> >> Requests NUMA topology info from Xen by issuing subop >> hypercall. Initializes NUMA nodes, sets number of CPUs, >> distance table and NUMA nodes memory ranges during boot. >> vNUMA topology defined by user in VM config file. Memory >> ranges are represented by structure vnuma_topology_info >> where start and end of memory area are defined in guests >> pfns numbers, constructed and aligned accordingly to >> e820 domain map. >> In case the received structure has errors, will fail to >> dummy numa init. >> Requires XEN with applied patches from vnuma patchset; >> >> Changes since v1: >> - moved the test for xen_pv_domain() into xen_numa_init; >> - replaced memory block search/allocation by single memblock_alloc; >> - moved xen_numa_init to vnuma.c from enlighten.c; >> - moved memblock structure to public interface memory.h; >> - specified signedness of vnuma topology structure members; >> - removed excessive debug output; >> >> TODO: >> - consider common interface for Dom0, HVM and PV guests to provide >> vNUMA topology; >> - dynamic numa balancing at the time of this patch (kernel 3.11 >> 6e4664525b1db28f8c4e1130957f70a94c19213e with boot parameter >> numa_balancing=true that is such by default) crashes numa-enabled >> guest. Investigate further. >> >> Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> >> --- >> arch/x86/include/asm/xen/vnuma.h | 12 +++++ >> arch/x86/mm/numa.c | 5 +++ >> arch/x86/xen/Makefile | 2 +- >> arch/x86/xen/vnuma.c | 92 >> ++++++++++++++++++++++++++++++++++++++ >> include/xen/interface/memory.h | 27 +++++++++++ >> 5 files changed, 137 insertions(+), 1 deletion(-) >> create mode 100644 arch/x86/include/asm/xen/vnuma.h >> create mode 100644 arch/x86/xen/vnuma.c >> >> diff --git a/arch/x86/include/asm/xen/vnuma.h >> b/arch/x86/include/asm/xen/vnuma.h >> new file mode 100644 >> index 0000000..1bf4cae >> --- /dev/null >> +++ b/arch/x86/include/asm/xen/vnuma.h >> @@ -0,0 +1,12 @@ >> +#ifndef _ASM_X86_VNUMA_H >> +#define _ASM_X86_VNUMA_H >> + >> +#ifdef CONFIG_XEN >> +int xen_vnuma_support(void); >> +int xen_numa_init(void); >> +#else >> +int xen_vnuma_support(void) { return 0; }; >> +int xen_numa_init(void) {}; > > > This should return -EINVAL. Or perhaps you can add > > #ifdef CONFIG_XEN > #include "asm/xen/vnuma.h" > #endif > > in numa.c and not bother with ifdef here. > > >> +#endif >> + >> +#endif /* _ASM_X86_VNUMA_H */ >> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c >> index 8bf93ba..a95fadf 100644 >> --- a/arch/x86/mm/numa.c >> +++ b/arch/x86/mm/numa.c >> @@ -19,6 +19,7 @@ >> #include <asm/amd_nb.h> >> #include "numa_internal.h" >> +#include "asm/xen/vnuma.h" >> int __initdata numa_off; >> nodemask_t numa_nodes_parsed __initdata; >> @@ -621,6 +622,10 @@ static int __init dummy_numa_init(void) >> void __init x86_numa_init(void) >> { >> if (!numa_off) { >> +#ifdef CONFIG_XEN >> + if (!numa_init(xen_numa_init)) >> + return; >> +#endif >> #ifdef CONFIG_X86_NUMAQ >> if (!numa_init(numaq_numa_init)) >> return; >> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile >> index 96ab2c0..de9deab 100644 >> --- a/arch/x86/xen/Makefile >> +++ b/arch/x86/xen/Makefile >> @@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp) >> obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ >> time.o xen-asm.o xen-asm_$(BITS).o \ >> grant-table.o suspend.o platform-pci-unplug.o \ >> - p2m.o >> + p2m.o vnuma.o >> obj-$(CONFIG_EVENT_TRACING) += trace.o >> diff --git a/arch/x86/xen/vnuma.c b/arch/x86/xen/vnuma.c >> new file mode 100644 >> index 0000000..3c6c73f >> --- /dev/null >> +++ b/arch/x86/xen/vnuma.c >> @@ -0,0 +1,92 @@ >> +#include <linux/err.h> >> +#include <linux/memblock.h> >> +#include <xen/interface/xen.h> >> +#include <xen/interface/memory.h> >> +#include <asm/xen/interface.h> >> +#include <asm/xen/hypercall.h> >> +#include <asm/xen/vnuma.h> >> +#ifdef CONFIG_NUMA >> +/* Xen PV NUMA topology initialization */ >> +static unsigned int xen_vnuma_init = 0; >> +int xen_vnuma_support() >> +{ >> + return xen_vnuma_init; >> +} >> +int __init xen_numa_init(void) >> +{ >> + int rc; >> + unsigned int i, j, cpu, idx, pcpus; >> + u64 phys, physd, physc; >> + unsigned int *vdistance, *cpu_to_node; > > > cpu_to_node may not be a particularly good name as there is a macro with > the same name in topology.h Sure, will fix. > > >> + unsigned long mem_size, dist_size, cpu_to_node_size; >> + struct vnuma_memarea *varea; >> + >> + struct vnuma_topology_info numa_topo = { >> + .domid = DOMID_SELF >> + }; >> + rc = -EINVAL; >> + if (!xen_pv_domain()) >> + return rc; > > > No need to set rc here, just return -EINVAL; > > And please add spaces between lines to separate logical blocks a little.\ Ok. > > >> + pcpus = num_possible_cpus(); >> + mem_size = pcpus * sizeof(struct vnuma_memarea); >> + dist_size = pcpus * pcpus * sizeof(*numa_topo.vdistance); >> + cpu_to_node_size = pcpus * sizeof(*numa_topo.cpu_to_node); >> + phys = memblock_alloc(mem_size, PAGE_SIZE); >> + physd = memblock_alloc(dist_size, PAGE_SIZE); >> + physc = memblock_alloc(cpu_to_node_size, PAGE_SIZE); >> + if (!phys || !physc || !physd) >> + goto vnumaout; >> + varea = __va(phys); >> + vdistance = __va(physd); >> + cpu_to_node = __va(physc); >> + set_xen_guest_handle(numa_topo.vmemarea, varea); >> + set_xen_guest_handle(numa_topo.vdistance, vdistance); >> + set_xen_guest_handle(numa_topo.cpu_to_node, cpu_to_node); >> + rc = HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo); >> + if (rc < 0) >> + goto vnumaout; >> + rc = -EINVAL; >> + if (numa_topo.nr_nodes == 0) { >> + /* will pass to dummy_numa_init */ >> + goto vnumaout; >> + } >> + if (numa_topo.nr_nodes > num_possible_cpus()) { >> + pr_debug("vNUMA: Node without cpu is not supported in this >> version.\n"); >> + goto vnumaout; >> + } >> + /* >> + * NUMA nodes memory ranges are in pfns, constructed and >> + * aligned based on e820 ram domain map >> + */ >> + for (i = 0; i < numa_topo.nr_nodes; i++) { >> + if (numa_add_memblk(i, varea[i].start, varea[i].end)) >> + /* pass to numa_dummy_init */ >> + goto vnumaout; >> + node_set(i, numa_nodes_parsed); >> + } >> + setup_nr_node_ids(); >> + /* Setting the cpu, apicid to node */ >> + for_each_cpu(cpu, cpu_possible_mask) { >> + set_apicid_to_node(cpu, cpu_to_node[cpu]); >> + numa_set_node(cpu, cpu_to_node[cpu]); >> + __apicid_to_node[cpu] = cpu_to_node[cpu]; > > > Isn't this what set_apicid_to_node() above will do? Yes, exactly the same ) will fix. > > -boris > > >> + cpumask_set_cpu(cpu, >> node_to_cpumask_map[cpu_to_node[cpu]]); >> + } >> + for (i = 0; i < numa_topo.nr_nodes; i++) { >> + for (j = 0; j < numa_topo.nr_nodes; j++) { >> + idx = (j * numa_topo.nr_nodes) + i; >> + numa_set_distance(i, j, *(vdistance + idx)); >> + } >> + } >> + rc = 0; >> + xen_vnuma_init = 1; >> +vnumaout: >> + if (phys) >> + memblock_free(__pa(phys), mem_size); >> + if (physd) >> + memblock_free(__pa(physd), dist_size); >> + if (physc) >> + memblock_free(__pa(physc), cpu_to_node_size); >> + return rc; >> +} >> +#endif >> diff --git a/include/xen/interface/memory.h >> b/include/xen/interface/memory.h >> index 2ecfe4f..4237f51 100644 >> --- a/include/xen/interface/memory.h >> +++ b/include/xen/interface/memory.h >> @@ -263,4 +263,31 @@ struct xen_remove_from_physmap { >> }; >> DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); >> +/* vNUMA structures */ >> +struct vnuma_memarea { >> + uint64_t start, end; >> +}; >> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memarea); >> + >> +struct vnuma_topology_info { >> + /* OUT */ >> + domid_t domid; >> + /* IN */ >> + uint16_t nr_nodes; /* number of virtual numa nodes */ >> + uint32_t _pad; >> + /* distance table */ >> + GUEST_HANDLE(uint) vdistance; >> + /* cpu mapping to vnodes */ >> + GUEST_HANDLE(uint) cpu_to_node; >> + /* >> + * array of numa memory areas constructed by Xen >> + * where start and end are pfn numbers of the area >> + * Xen takes into account domains e820 map >> + */ >> + GUEST_HANDLE(vnuma_memarea) vmemarea; >> +}; >> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info); >> + >> +#define XENMEM_get_vnuma_info 25 >> + >> #endif /* __XEN_PUBLIC_MEMORY_H__ */ > > -- Elena _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |