[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 1/2] xen: vnuma for pv guests
Issues Xen hypercall subop XENMEM_get_vnumainfo and sets the NUMA topology, otherwise sets dummy NUMA node and prevents numa_init from calling other numa initializators as they dont work with pv guests. Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> --- arch/x86/include/asm/xen/vnuma.h | 10 ++++ arch/x86/mm/numa.c | 3 + arch/x86/xen/Makefile | 1 + arch/x86/xen/setup.c | 6 +- arch/x86/xen/vnuma.c | 121 ++++++++++++++++++++++++++++++++++++++ include/xen/interface/memory.h | 50 ++++++++++++++++ 6 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/xen/vnuma.h create mode 100644 arch/x86/xen/vnuma.c diff --git a/arch/x86/include/asm/xen/vnuma.h b/arch/x86/include/asm/xen/vnuma.h new file mode 100644 index 0000000..8c8b098 --- /dev/null +++ b/arch/x86/include/asm/xen/vnuma.h @@ -0,0 +1,10 @@ +#ifndef _ASM_X86_VNUMA_H +#define _ASM_X86_VNUMA_H + +#ifdef CONFIG_XEN +int xen_numa_init(void); +#else +static inline int xen_numa_init(void) { return -1; }; +#endif + +#endif /* _ASM_X86_VNUMA_H */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 1d045f9..37a9c84 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -18,6 +18,7 @@ #include <asm/acpi.h> #include <asm/amd_nb.h> +#include "asm/xen/vnuma.h" #include "numa_internal.h" int __initdata numa_off; @@ -687,6 +688,8 @@ static int __init dummy_numa_init(void) void __init x86_numa_init(void) { if (!numa_off) { + if (!numa_init(xen_numa_init)) + return; #ifdef CONFIG_ACPI_NUMA if (!numa_init(x86_acpi_numa_init)) return; diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 96ab2c0..185ec9b 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DOM0) += apic.o vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o +obj-$(CONFIG_NUMA) += vnuma.o diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 0982233..0235f19 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -20,6 +20,7 @@ #include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <asm/xen/vnuma.h> #include <xen/xen.h> #include <xen/page.h> @@ -622,6 +623,9 @@ void __init xen_arch_setup(void) WARN_ON(xen_set_default_idle()); fiddle_vdso(); #ifdef CONFIG_NUMA - numa_off = 1; + if (xen_initial_domain()) + numa_off = 1; + else + numa_off = 0; #endif } diff --git a/arch/x86/xen/vnuma.c b/arch/x86/xen/vnuma.c new file mode 100644 index 0000000..a02f9c6 --- /dev/null +++ b/arch/x86/xen/vnuma.c @@ -0,0 +1,121 @@ +#include <linux/err.h> +#include <linux/memblock.h> +#include <xen/interface/xen.h> +#include <xen/interface/memory.h> +#include <asm/xen/interface.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/vnuma.h> + +/* + * Called from numa_init if numa_off = 0; + */ +int __init xen_numa_init(void) +{ + unsigned int i, j, idx; + unsigned int cpu, pcpus, nr_nodes, nr_cpus; + unsigned int *vdistance, *cpu_to_node; + unsigned long mem_size, dist_size, cpu_to_node_size; + struct vmemrange *vmem; + u64 physm, physd, physc; + int rc; + + struct vnuma_topology_info numa_topo = { + .domid = DOMID_SELF + }; + + rc = -EINVAL; + physm = physd = physc = 0; + + /* For now only PV guests are supported */ + if (!xen_pv_domain()) + return rc; + + /* get the number of nodes for allocation of memblocks */ + pcpus = num_possible_cpus(); + nr_cpus = setup_max_cpus < pcpus ? setup_max_cpus : pcpus; + + /* support for nodes with at least one cpu */ + nr_nodes = nr_cpus; + + /* + * Allocate arrays for nr_cpus/nr_nodes sizes and let + * hypervisor know that these are the boundaries. Partial + * copy is not allowed and hypercall will fail. + */ + + mem_size = nr_nodes * sizeof(struct vmemrange); + dist_size = nr_nodes * nr_nodes * sizeof(*numa_topo.distance.h); + cpu_to_node_size = nr_cpus * sizeof(*numa_topo.cpu_to_node.h); + + physm = memblock_alloc(mem_size, PAGE_SIZE); + physd = memblock_alloc(dist_size, PAGE_SIZE); + physc = memblock_alloc(cpu_to_node_size, PAGE_SIZE); + + if (!physm || !physd || !physc) + goto out; + + vmem = __va(physm); + vdistance = __va(physd); + cpu_to_node = __va(physc); + + numa_topo.nr_nodes = nr_nodes; + numa_topo.nr_cpus = nr_cpus; + + set_xen_guest_handle(numa_topo.memrange.h, vmem); + set_xen_guest_handle(numa_topo.distance.h, vdistance); + set_xen_guest_handle(numa_topo.cpu_to_node.h, cpu_to_node); + + if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo) < 0) + goto out; + + /* + * NUMA nodes memory ranges are in pfns, constructed and + * aligned based on e820 ram domain map. + */ + for (i = 0; i < nr_nodes; i++) { + if (numa_add_memblk(i, vmem[i].start, vmem[i].end)) + goto out; + node_set(i, numa_nodes_parsed); + } + + setup_nr_node_ids(); + /* Setting the cpu, apicid to node */ + for_each_cpu(cpu, cpu_possible_mask) { + set_apicid_to_node(cpu, cpu_to_node[cpu]); + numa_set_node(cpu, cpu_to_node[cpu]); + cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]); + } + + for (i = 0; i < nr_nodes; i++) { + for (j = 0; j < nr_nodes; j++) { + idx = (i * nr_nodes) + j; + numa_set_distance(i, j, *(vdistance + idx)); + } + } + + rc = 0; +out: + if (physm) + memblock_free(__pa(physm), mem_size); + if (physd) + memblock_free(__pa(physd), dist_size); + if (physc) + memblock_free(__pa(physc), cpu_to_node_size); + /* + * Set a dummy node and return success. This prevents calling any + * hardware-specific initializers which do not work in a PV guest. + * Taken from dummy_numa_init code. + */ + if (rc != 0) { + for (i = 0; i < MAX_LOCAL_APIC; i++) + set_apicid_to_node(i, NUMA_NO_NODE); + nodes_clear(numa_nodes_parsed); + nodes_clear(node_possible_map); + nodes_clear(node_online_map); + node_set(0, numa_nodes_parsed); + /* cpus up to max_cpus will be assigned to one node */ + numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); + setup_nr_node_ids(); + } + return 0; +} diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 2ecfe4f..96d6387 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -263,4 +263,54 @@ struct xen_remove_from_physmap { }; DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); +/* vNUMA structures */ +struct vmemrange { + uint64_t start, end; +}; +DEFINE_GUEST_HANDLE_STRUCT(vmemrange); + +struct vnuma_topology_info { + /* OUT */ + domid_t domid; + /* + * nr_nodes and nr_cpus are used for retreival of sizes + * of will be allocated arrays for vnuma topology. + * We need to know vcpus numberfor domain as NR_CPUS + * is less then domain max_vcpus, number of possible + * cpus will equal to NR_CPUS and we have no way of + * learning domain vcpus number. + */ + /* number of virtual numa nodes */ + unsigned int nr_nodes; + unsigned int nr_cpus; + /* distance table */ + union { + GUEST_HANDLE(uint) h; + uint64_t _pad; + } distance; + /* cpu mapping to vnodes */ + union { + GUEST_HANDLE(uint) h; + uint64_t _pad; + } cpu_to_node; + /* + * memory areas constructed by Xen, start and end + * of the ranges are specific to domain e820 map. + * Xen toolstack constructs these ranges for domain + * when building it. + */ + union { + GUEST_HANDLE(vmemrange) h; + uint64_t _pad; + } memrange; +}; +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info); + +/* + * Used to retreive vnuma topology info. + * Use XENMEM_get_vnuma_nodes to obtain number of + * nodes before allocating memory for topology. + */ +#define XENMEM_get_vnuma_info 26 + #endif /* __XEN_PUBLIC_MEMORY_H__ */ -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |