Xen project Mailing List

Re: [Xen-devel] [PATCH RESEND v7 6/9] libxl: build numa nodes memory blocks

On Thu, Aug 21, 2014 at 01:10:26AM -0400, Elena Ufimtseva wrote: > Create the vmemrange structure based on the > PV guests E820 map. Values are in in Megabytes. > Also export the E820 filter code e820_sanitize > out to be available internally. > > Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> > --- > tools/libxl/libxl_internal.h | 9 ++ > tools/libxl/libxl_numa.c | 193 > ++++++++++++++++++++++++++++++++++++++++++ > tools/libxl/libxl_x86.c | 3 +- > 3 files changed, 204 insertions(+), 1 deletion(-) > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index beb052e..63ccb5e 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc, > libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap); > } > > +bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info); > + > +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t > *nr_entries, > + unsigned long map_limitkb, unsigned long balloon_kb); > + > +int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid, > + struct libxl_domain_build_info *b_info, > + vmemrange_t *memblks); > + > _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid, > const libxl_ms_vm_genid *id); > > diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c > index 94ca4fe..4ae547e 100644 > --- a/tools/libxl/libxl_numa.c > +++ b/tools/libxl/libxl_numa.c > @@ -19,6 +19,8 @@ > > #include "libxl_internal.h" > > +#include "libxl_vnuma.h" > + > /* > * What follows are helpers for generating all the k-combinations > * without repetitions of a set S with n elements in it. Formally > @@ -508,6 +510,197 @@ int libxl__get_numa_candidate(libxl__gc *gc, > } > > /* > + * Check if we can fit vnuma nodes to numa pnodes > + * from vnode_to_pnode array. > + */ > +bool libxl__vnodemap_is_usable(libxl__gc *gc, > + libxl_domain_build_info *info) > +{ > + unsigned int i; > + libxl_numainfo *ninfo = NULL; > + unsigned long long *claim; > + unsigned int node; > + uint64_t *sz_array; > + int nr_nodes = 0; > + > + /* Cannot use specified mapping if not NUMA machine. */ > + ninfo = libxl_get_numainfo(CTX, &nr_nodes); > + if (ninfo == NULL) > + return false; > + > + sz_array = info->vnuma_mem; > + claim = libxl__calloc(gc, info->vnodes, sizeof(*claim)); > + /* Get total memory required on each physical node. */ > + for (i = 0; i < info->vnodes; i++) > + { > + node = info->vnuma_vnodemap[i]; > + > + if (node < nr_nodes) > + claim[node] += (sz_array[i] << 20); > + else > + goto vnodemapout; > + } > + for (i = 0; i < nr_nodes; i++) { > + if (claim[i] > ninfo[i].free) > + /* Cannot complete user request, falling to default. */ > + goto vnodemapout; > + } > + > + vnodemapout: Shouldn't we free 'claim' ? > + return true; > +} > + > +/* > + * Returns number of absent pages within e820 map > + * between start and end addresses passed. Needed > + * to correctly set numa memory ranges for domain. > + */ > +static unsigned long e820_memory_hole_size(unsigned long start, > + unsigned long end, > + struct e820entry e820[], > + unsigned int nr) > +{ > + unsigned int i; > + unsigned long absent, start_blk, end_blk; > + > + /* init absent number of pages with all memmap size. */ > + absent = end - start; > + for (i = 0; i < nr; i++) { > + /* if not E820_RAM region, skip it. */ > + if (e820[i].type == E820_RAM) { I would try to adhere to what the comment say so make this: if (e820[i].type != E820_RAM) continue; .. and then continue with the rest of the code below. > + start_blk = e820[i].addr; > + end_blk = e820[i].addr + e820[i].size; > + /* beginning address is within this region? */ > + if (start >= start_blk && start <= end_blk) { > + if (end > end_blk) > + absent -= end_blk - start; > + else > + /* fit the region? then no absent pages. */ > + absent -= end - start; > + continue; > + } > + /* found the end of range in this region? */ > + if (end <= end_blk && end >= start_blk) { > + absent -= end - start_blk; > + /* no need to look for more ranges. */ > + break; > + } > + } > + } > + return absent; > +} > + > +/* > + * For each node, build memory block start and end addresses. > + * Substract any memory hole from the range found in e820 map. > + * vnode memory size are passed here in megabytes, the result is > + * in memory block addresses. > + * Linux kernel will adjust numa memory block sizes on its own. > + * But we want to provide to the kernel numa block addresses that > + * will be the same in kernel and hypervisor. > + */ > +#define max(a,b) ((a > b) ? a : b) > +int libxl__vnuma_align_mem(libxl__gc *gc, > + uint32_t domid, > + /* IN: mem sizes in megabytes */ > + libxl_domain_build_info *b_info, > + /* OUT: linux NUMA blocks addresses */ > + vmemrange_t *memblks) > +{ > + unsigned int i; > + int j, rc; > + uint64_t next_start_blk, end_max = 0, size; > + uint32_t nr; > + struct e820entry map[E820MAX]; > + > + errno = ERROR_INVAL; > + if (b_info->vnodes == 0) > + return -EINVAL; > + > + if (!memblks || !b_info->vnuma_mem) > + return -EINVAL; > + > + libxl_ctx *ctx = libxl__gc_owner(gc); > + > + /* Retrieve e820 map for this host. */ > + rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX); > + > + if (rc < 0) { > + errno = rc; > + return -EINVAL; > + } > + nr = rc; > + rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb, > + (b_info->max_memkb - b_info->target_memkb) + > + b_info->u.pv.slack_memkb); > + if (rc) > + { > + errno = rc; > + return -EINVAL; > + } > + > + /* find max memory address for this host. */ > + for (j = 0; j < nr; j++) > + if (map[j].type == E820_RAM) { > + end_max = max(end_max, map[j].addr + map[j].size); > + } > + I think the compiler or smatch will complain about the missing optionl {}. Usually you do for (.. ) { if (..) { } } > + memset(memblks, 0, sizeof(*memblks) * b_info->vnodes); > + next_start_blk = 0; > + > + memblks[0].start = map[0].addr; > + > + for (i = 0; i < b_info->vnodes; i++) { So you are mixing two styles here. Earlier you had if (rc) { } But here you are doing for (..) { } If you want the same style the for loop should have been: for (..) { } You need to look at the code in this file and adopt the one that is used in major cases. > + > + memblks[i].start += next_start_blk; > + memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20); > + > + if (memblks[i].end > end_max) { > + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, > + "Shrunk vNUMA memory block %d address to max e820 > address: \ > + %#010lx -> %#010lx\n", i, memblks[i].end, end_max); > + memblks[i].end = end_max; > + break; > + } > + > + size = memblks[i].end - memblks[i].start; > + /* > + * For pv host with e820_host option turned on we need > + * to take into account memory holes. For pv host with > + * e820_host disabled or unset, the map is a contiguous > + * RAM region. > + */ > + if (libxl_defbool_val(b_info->u.pv.e820_host)) { > + while((memblks[i].end - memblks[i].start - > + e820_memory_hole_size(memblks[i].start, > + memblks[i].end, map, nr)) < size ) > + { > + memblks[i].end += MIN_VNODE_SIZE << 10; > + if (memblks[i].end > end_max) { > + memblks[i].end = end_max; > + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, > + "Shrunk vNUMA memory block %d address to max > e820 \ > + address: %#010lx -> %#010lx\n", i, > memblks[i].end, > + end_max); > + break; > + } > + } > + } > + next_start_blk = memblks[i].end; > + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start = %#010lx, \ > + end = %#010lx\n", i, memblks[i].start, memblks[i].end); > + } > + > + /* Did not form memory addresses for every node? */ > + if (i != b_info->vnodes) { > + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with > \ > + block addresses, only %d out of %d", i, b_info->vnodes); > + return -EINVAL; > + } > + return 0; > +} > + > +/* > * Local variables: > * mode: C > * c-basic-offset: 4 > diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c > index 7589060..46e84e4 100644 > --- a/tools/libxl/libxl_x86.c > +++ b/tools/libxl/libxl_x86.c > @@ -1,5 +1,6 @@ > #include "libxl_internal.h" > #include "libxl_arch.h" > +#include "libxl_vnuma.h" > > static const char *e820_names(int type) > { > @@ -14,7 +15,7 @@ static const char *e820_names(int type) > return "Unknown"; > } > > -static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], > +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], > uint32_t *nr_entries, > unsigned long map_limitkb, > unsigned long balloon_kb) > -- > 1.7.10.4 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.