Xen project Mailing List

Re: [Xen-devel] [PATCH RFC v2 1/2] linux/vnuma: vNUMA for PV domu guest

On 17/09/13 09:34, Elena Ufimtseva wrote: > Requests NUMA topology info from Xen by issuing subop > hypercall. Initializes NUMA nodes, sets number of CPUs, > distance table and NUMA nodes memory ranges during boot. > vNUMA topology defined by user in VM config file. Memory > ranges are represented by structure vnuma_topology_info > where start and end of memory area are defined in guests > pfns numbers, constructed and aligned accordingly to > e820 domain map. > In case the received structure has errors, will fail to > dummy numa init. > Requires XEN with applied patches from vnuma patchset; > > Changes since v1: > - moved the test for xen_pv_domain() into xen_numa_init; > - replaced memory block search/allocation by single memblock_alloc; > - moved xen_numa_init to vnuma.c from enlighten.c; > - moved memblock structure to public interface memory.h; > - specified signedness of vnuma topology structure members; > - removed excessive debug output; > > TODO: > - consider common interface for Dom0, HVM and PV guests to provide > vNUMA topology; > - dynamic numa balancing at the time of this patch (kernel 3.11 > 6e4664525b1db28f8c4e1130957f70a94c19213e with boot parameter > numa_balancing=true that is such by default) crashes numa-enabled > guest. Investigate further. > --- a/arch/x86/mm/numa.c > +++ b/arch/x86/mm/numa.c > @@ -19,6 +19,7 @@ > #include <asm/amd_nb.h> #include <asm/xen/vnuma.h> here... > #include "numa_internal.h" > +#include "asm/xen/vnuma.h" ... not here. > --- /dev/null > +++ b/arch/x86/xen/vnuma.c > @@ -0,0 +1,92 @@ > +#include <linux/err.h> > +#include <linux/memblock.h> > +#include <xen/interface/xen.h> > +#include <xen/interface/memory.h> > +#include <asm/xen/interface.h> > +#include <asm/xen/hypercall.h> > +#include <asm/xen/vnuma.h> > +#ifdef CONFIG_NUMA > +/* Xen PV NUMA topology initialization */ > +static unsigned int xen_vnuma_init = 0; > +int xen_vnuma_support() > +{ > + return xen_vnuma_init; > +} I'm not sure how this and the usage in the next patch actually work. xen_vnuma_init is only set after the test of numa_off prior to calling xen_numa_init() which will set xen_vnuma_init. > +int __init xen_numa_init(void) > +{ > + int rc; > + unsigned int i, j, cpu, idx, pcpus; > + u64 phys, physd, physc; > + unsigned int *vdistance, *cpu_to_node; > + unsigned long mem_size, dist_size, cpu_to_node_size; > + struct vnuma_memarea *varea; > + > + struct vnuma_topology_info numa_topo = { > + .domid = DOMID_SELF > + }; > + rc = -EINVAL; > + if (!xen_pv_domain()) > + return rc; > + pcpus = num_possible_cpus(); > + mem_size = pcpus * sizeof(struct vnuma_memarea); > + dist_size = pcpus * pcpus * sizeof(*numa_topo.vdistance); > + cpu_to_node_size = pcpus * sizeof(*numa_topo.cpu_to_node); > + phys = memblock_alloc(mem_size, PAGE_SIZE); > + physd = memblock_alloc(dist_size, PAGE_SIZE); > + physc = memblock_alloc(cpu_to_node_size, PAGE_SIZE); > + if (!phys || !physc || !physd) > + goto vnumaout; > + varea = __va(phys); > + vdistance = __va(physd); > + cpu_to_node = __va(physc); > + set_xen_guest_handle(numa_topo.vmemarea, varea); > + set_xen_guest_handle(numa_topo.vdistance, vdistance); > + set_xen_guest_handle(numa_topo.cpu_to_node, cpu_to_node); > + rc = HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo); > + if (rc < 0) > + goto vnumaout; > + rc = -EINVAL; > + if (numa_topo.nr_nodes == 0) { > + /* will pass to dummy_numa_init */ > + goto vnumaout; > + } > + if (numa_topo.nr_nodes > num_possible_cpus()) { > + pr_debug("vNUMA: Node without cpu is not supported in this > version.\n"); > + goto vnumaout; > + } > + /* > + * NUMA nodes memory ranges are in pfns, constructed and > + * aligned based on e820 ram domain map > + */ > + for (i = 0; i < numa_topo.nr_nodes; i++) { > + if (numa_add_memblk(i, varea[i].start, varea[i].end)) > + /* pass to numa_dummy_init */ > + goto vnumaout; If there's a failure here, numa may be partially setup. Do you need to undo any of the bits that have already setup? > + node_set(i, numa_nodes_parsed); > + } > + setup_nr_node_ids(); > + /* Setting the cpu, apicid to node */ > + for_each_cpu(cpu, cpu_possible_mask) { > + set_apicid_to_node(cpu, cpu_to_node[cpu]); > + numa_set_node(cpu, cpu_to_node[cpu]); > + __apicid_to_node[cpu] = cpu_to_node[cpu]; > + cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]); > + } > + for (i = 0; i < numa_topo.nr_nodes; i++) { > + for (j = 0; j < numa_topo.nr_nodes; j++) { > + idx = (j * numa_topo.nr_nodes) + i; > + numa_set_distance(i, j, *(vdistance + idx)); > + } > + } > + rc = 0; > + xen_vnuma_init = 1; > +vnumaout: Call this label "out". > + if (phys) > + memblock_free(__pa(phys), mem_size); > + if (physd) > + memblock_free(__pa(physd), dist_size); > + if (physc) > + memblock_free(__pa(physc), cpu_to_node_size); > + return rc; If you return an error, x86_numa_init() will try to call setup for other NUMA system. Consider calling numa_dummy_init() directly instead and then returning success. > +} Please use blank lines to space the logical bits of this function out some more. > +#endif > diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h > index 2ecfe4f..4237f51 100644 > --- a/include/xen/interface/memory.h > +++ b/include/xen/interface/memory.h > @@ -263,4 +263,31 @@ struct xen_remove_from_physmap { > }; > DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); > > +/* vNUMA structures */ > +struct vnuma_memarea { > + uint64_t start, end; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memarea); > + > +struct vnuma_topology_info { > + /* OUT */ > + domid_t domid; > + /* IN */ > + uint16_t nr_nodes; /* number of virtual numa nodes */ > + uint32_t _pad; Is this _pad intended to make this structure uniform across 32-bit and 64-bit guests? The following GUEST_HANDLES() have different sizes on 32- and 64-bit guests. > + /* distance table */ > + GUEST_HANDLE(uint) vdistance; > + /* cpu mapping to vnodes */ > + GUEST_HANDLE(uint) cpu_to_node; > + /* > + * array of numa memory areas constructed by Xen > + * where start and end are pfn numbers of the area > + * Xen takes into account domains e820 map > + */ > + GUEST_HANDLE(vnuma_memarea) vmemarea; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info); > + > +#define XENMEM_get_vnuma_info 25 > + > #endif /* __XEN_PUBLIC_MEMORY_H__ */ David _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.