|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH RESEND v7 6/9] libxl: build numa nodes memory blocks
On Thu, Aug 21, 2014 at 01:10:26AM -0400, Elena Ufimtseva wrote:
> Create the vmemrange structure based on the
> PV guests E820 map. Values are in in Megabytes.
> Also export the E820 filter code e820_sanitize
> out to be available internally.
>
> Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
> ---
> tools/libxl/libxl_internal.h | 9 ++
> tools/libxl/libxl_numa.c | 193
> ++++++++++++++++++++++++++++++++++++++++++
> tools/libxl/libxl_x86.c | 3 +-
> 3 files changed, 204 insertions(+), 1 deletion(-)
>
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index beb052e..63ccb5e 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
> libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap);
> }
>
> +bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info);
> +
> +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t
> *nr_entries,
> + unsigned long map_limitkb, unsigned long balloon_kb);
> +
> +int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid,
> + struct libxl_domain_build_info *b_info,
> + vmemrange_t *memblks);
> +
> _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid,
> const libxl_ms_vm_genid *id);
>
> diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
> index 94ca4fe..4ae547e 100644
> --- a/tools/libxl/libxl_numa.c
> +++ b/tools/libxl/libxl_numa.c
> @@ -19,6 +19,8 @@
>
> #include "libxl_internal.h"
>
> +#include "libxl_vnuma.h"
> +
> /*
> * What follows are helpers for generating all the k-combinations
> * without repetitions of a set S with n elements in it. Formally
> @@ -508,6 +510,197 @@ int libxl__get_numa_candidate(libxl__gc *gc,
> }
>
> /*
> + * Check if we can fit vnuma nodes to numa pnodes
> + * from vnode_to_pnode array.
> + */
> +bool libxl__vnodemap_is_usable(libxl__gc *gc,
> + libxl_domain_build_info *info)
> +{
> + unsigned int i;
> + libxl_numainfo *ninfo = NULL;
> + unsigned long long *claim;
> + unsigned int node;
> + uint64_t *sz_array;
> + int nr_nodes = 0;
> +
> + /* Cannot use specified mapping if not NUMA machine. */
> + ninfo = libxl_get_numainfo(CTX, &nr_nodes);
> + if (ninfo == NULL)
> + return false;
> +
> + sz_array = info->vnuma_mem;
> + claim = libxl__calloc(gc, info->vnodes, sizeof(*claim));
> + /* Get total memory required on each physical node. */
> + for (i = 0; i < info->vnodes; i++)
> + {
> + node = info->vnuma_vnodemap[i];
> +
> + if (node < nr_nodes)
> + claim[node] += (sz_array[i] << 20);
> + else
> + goto vnodemapout;
> + }
> + for (i = 0; i < nr_nodes; i++) {
> + if (claim[i] > ninfo[i].free)
> + /* Cannot complete user request, falling to default. */
> + goto vnodemapout;
> + }
> +
> + vnodemapout:
Shouldn't we free 'claim' ?
> + return true;
> +}
> +
> +/*
> + * Returns number of absent pages within e820 map
> + * between start and end addresses passed. Needed
> + * to correctly set numa memory ranges for domain.
> + */
> +static unsigned long e820_memory_hole_size(unsigned long start,
> + unsigned long end,
> + struct e820entry e820[],
> + unsigned int nr)
> +{
> + unsigned int i;
> + unsigned long absent, start_blk, end_blk;
> +
> + /* init absent number of pages with all memmap size. */
> + absent = end - start;
> + for (i = 0; i < nr; i++) {
> + /* if not E820_RAM region, skip it. */
> + if (e820[i].type == E820_RAM) {
I would try to adhere to what the comment say so make this:
if (e820[i].type != E820_RAM)
continue;
.. and then continue with the rest of the code below.
> + start_blk = e820[i].addr;
> + end_blk = e820[i].addr + e820[i].size;
> + /* beginning address is within this region? */
> + if (start >= start_blk && start <= end_blk) {
> + if (end > end_blk)
> + absent -= end_blk - start;
> + else
> + /* fit the region? then no absent pages. */
> + absent -= end - start;
> + continue;
> + }
> + /* found the end of range in this region? */
> + if (end <= end_blk && end >= start_blk) {
> + absent -= end - start_blk;
> + /* no need to look for more ranges. */
> + break;
> + }
> + }
> + }
> + return absent;
> +}
> +
> +/*
> + * For each node, build memory block start and end addresses.
> + * Substract any memory hole from the range found in e820 map.
> + * vnode memory size are passed here in megabytes, the result is
> + * in memory block addresses.
> + * Linux kernel will adjust numa memory block sizes on its own.
> + * But we want to provide to the kernel numa block addresses that
> + * will be the same in kernel and hypervisor.
> + */
> +#define max(a,b) ((a > b) ? a : b)
> +int libxl__vnuma_align_mem(libxl__gc *gc,
> + uint32_t domid,
> + /* IN: mem sizes in megabytes */
> + libxl_domain_build_info *b_info,
> + /* OUT: linux NUMA blocks addresses */
> + vmemrange_t *memblks)
> +{
> + unsigned int i;
> + int j, rc;
> + uint64_t next_start_blk, end_max = 0, size;
> + uint32_t nr;
> + struct e820entry map[E820MAX];
> +
> + errno = ERROR_INVAL;
> + if (b_info->vnodes == 0)
> + return -EINVAL;
> +
> + if (!memblks || !b_info->vnuma_mem)
> + return -EINVAL;
> +
> + libxl_ctx *ctx = libxl__gc_owner(gc);
> +
> + /* Retrieve e820 map for this host. */
> + rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
> +
> + if (rc < 0) {
> + errno = rc;
> + return -EINVAL;
> + }
> + nr = rc;
> + rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
> + (b_info->max_memkb - b_info->target_memkb) +
> + b_info->u.pv.slack_memkb);
> + if (rc)
> + {
> + errno = rc;
> + return -EINVAL;
> + }
> +
> + /* find max memory address for this host. */
> + for (j = 0; j < nr; j++)
> + if (map[j].type == E820_RAM) {
> + end_max = max(end_max, map[j].addr + map[j].size);
> + }
> +
I think the compiler or smatch will complain about the missing
optionl {}. Usually you do
for (.. )
{
if (..)
{
}
}
> + memset(memblks, 0, sizeof(*memblks) * b_info->vnodes);
> + next_start_blk = 0;
> +
> + memblks[0].start = map[0].addr;
> +
> + for (i = 0; i < b_info->vnodes; i++) {
So you are mixing two styles here. Earlier you had
if (rc)
{
}
But here you are doing
for (..) {
}
If you want the same style the for loop should have been:
for (..)
{
}
You need to look at the code in this file and adopt the one that
is used in major cases.
> +
> + memblks[i].start += next_start_blk;
> + memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20);
> +
> + if (memblks[i].end > end_max) {
> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> + "Shrunk vNUMA memory block %d address to max e820
> address: \
> + %#010lx -> %#010lx\n", i, memblks[i].end, end_max);
> + memblks[i].end = end_max;
> + break;
> + }
> +
> + size = memblks[i].end - memblks[i].start;
> + /*
> + * For pv host with e820_host option turned on we need
> + * to take into account memory holes. For pv host with
> + * e820_host disabled or unset, the map is a contiguous
> + * RAM region.
> + */
> + if (libxl_defbool_val(b_info->u.pv.e820_host)) {
> + while((memblks[i].end - memblks[i].start -
> + e820_memory_hole_size(memblks[i].start,
> + memblks[i].end, map, nr)) < size )
> + {
> + memblks[i].end += MIN_VNODE_SIZE << 10;
> + if (memblks[i].end > end_max) {
> + memblks[i].end = end_max;
> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> + "Shrunk vNUMA memory block %d address to max
> e820 \
> + address: %#010lx -> %#010lx\n", i,
> memblks[i].end,
> + end_max);
> + break;
> + }
> + }
> + }
> + next_start_blk = memblks[i].end;
> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start = %#010lx, \
> + end = %#010lx\n", i, memblks[i].start, memblks[i].end);
> + }
> +
> + /* Did not form memory addresses for every node? */
> + if (i != b_info->vnodes) {
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with
> \
> + block addresses, only %d out of %d", i, b_info->vnodes);
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +/*
> * Local variables:
> * mode: C
> * c-basic-offset: 4
> diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
> index 7589060..46e84e4 100644
> --- a/tools/libxl/libxl_x86.c
> +++ b/tools/libxl/libxl_x86.c
> @@ -1,5 +1,6 @@
> #include "libxl_internal.h"
> #include "libxl_arch.h"
> +#include "libxl_vnuma.h"
>
> static const char *e820_names(int type)
> {
> @@ -14,7 +15,7 @@ static const char *e820_names(int type)
> return "Unknown";
> }
>
> -static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
> +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
> uint32_t *nr_entries,
> unsigned long map_limitkb,
> unsigned long balloon_kb)
> --
> 1.7.10.4
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |