[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RESEND v7 6/9] libxl: build numa nodes memory blocks



On Thu, Aug 21, 2014 at 01:10:26AM -0400, Elena Ufimtseva wrote:
> Create the vmemrange structure based on the
> PV guests E820 map. Values are in in Megabytes.
> Also export the E820 filter code e820_sanitize
> out to be available internally.
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
> ---
>  tools/libxl/libxl_internal.h |    9 ++
>  tools/libxl/libxl_numa.c     |  193 
> ++++++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_x86.c      |    3 +-
>  3 files changed, 204 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index beb052e..63ccb5e 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
>      libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap);
>  }
>  
> +bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info);
> +
> +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t 
> *nr_entries,
> +                  unsigned long map_limitkb, unsigned long balloon_kb);
> +
> +int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid,
> +                           struct libxl_domain_build_info *b_info,
> +                           vmemrange_t *memblks);
> +
>  _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid,
>                                     const libxl_ms_vm_genid *id);
>  
> diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
> index 94ca4fe..4ae547e 100644
> --- a/tools/libxl/libxl_numa.c
> +++ b/tools/libxl/libxl_numa.c
> @@ -19,6 +19,8 @@
>  
>  #include "libxl_internal.h"
>  
> +#include "libxl_vnuma.h"
> +
>  /*
>   * What follows are helpers for generating all the k-combinations
>   * without repetitions of a set S with n elements in it. Formally
> @@ -508,6 +510,197 @@ int libxl__get_numa_candidate(libxl__gc *gc,
>  }
>  
>  /*
> + * Check if we can fit vnuma nodes to numa pnodes
> + * from vnode_to_pnode array.
> + */
> +bool libxl__vnodemap_is_usable(libxl__gc *gc,
> +                            libxl_domain_build_info *info)
> +{
> +    unsigned int i;
> +    libxl_numainfo *ninfo = NULL;
> +    unsigned long long *claim;
> +    unsigned int node;
> +    uint64_t *sz_array;
> +    int nr_nodes = 0;
> +
> +    /* Cannot use specified mapping if not NUMA machine. */
> +    ninfo = libxl_get_numainfo(CTX, &nr_nodes);
> +    if (ninfo == NULL)
> +        return false;
> +
> +    sz_array = info->vnuma_mem;
> +    claim = libxl__calloc(gc, info->vnodes, sizeof(*claim));
> +    /* Get total memory required on each physical node. */
> +    for (i = 0; i < info->vnodes; i++)
> +    {
> +        node = info->vnuma_vnodemap[i];
> +
> +        if (node < nr_nodes)
> +            claim[node] += (sz_array[i] << 20);
> +        else
> +            goto vnodemapout;
> +   }
> +   for (i = 0; i < nr_nodes; i++) {
> +       if (claim[i] > ninfo[i].free)
> +          /* Cannot complete user request, falling to default. */
> +          goto vnodemapout;
> +   }
> +
> + vnodemapout:

Shouldn't we free 'claim' ?

> +   return true;
> +}
> +
> +/*
> + * Returns number of absent pages within e820 map
> + * between start and end addresses passed. Needed
> + * to correctly set numa memory ranges for domain.
> + */
> +static unsigned long e820_memory_hole_size(unsigned long start,
> +                                            unsigned long end,
> +                                            struct e820entry e820[],
> +                                            unsigned int nr)
> +{
> +    unsigned int i;
> +    unsigned long absent, start_blk, end_blk;
> +
> +    /* init absent number of pages with all memmap size. */
> +    absent = end - start;
> +    for (i = 0; i < nr; i++) {
> +        /* if not E820_RAM region, skip it. */
> +        if (e820[i].type == E820_RAM) {

I would try to adhere to what the comment say so make this:

        if (e820[i].type != E820_RAM)
                continue;

        .. and then continue with the rest of the code below.

> +            start_blk = e820[i].addr;
> +            end_blk = e820[i].addr + e820[i].size;
> +            /* beginning address is within this region? */
> +            if (start >= start_blk && start <= end_blk) {
> +                if (end > end_blk)
> +                    absent -= end_blk - start;
> +                else
> +                    /* fit the region? then no absent pages. */
> +                    absent -= end - start;
> +                continue;
> +            }
> +            /* found the end of range in this region? */
> +            if (end <= end_blk && end >= start_blk) {
> +                absent -= end - start_blk;
> +                /* no need to look for more ranges. */
> +                break;
> +            }
> +        }
> +    }
> +    return absent;
> +}
> +
> +/*
> + * For each node, build memory block start and end addresses.
> + * Substract any memory hole from the range found in e820 map.
> + * vnode memory size are passed here in megabytes, the result is
> + * in memory block addresses.
> + * Linux kernel will adjust numa memory block sizes on its own.
> + * But we want to provide to the kernel numa block addresses that
> + * will be the same in kernel and hypervisor.
> + */
> +#define max(a,b) ((a > b) ? a : b)
> +int libxl__vnuma_align_mem(libxl__gc *gc,
> +                            uint32_t domid,
> +                            /* IN: mem sizes in megabytes */
> +                            libxl_domain_build_info *b_info,
> +                            /* OUT: linux NUMA blocks addresses */
> +                            vmemrange_t *memblks)
> +{
> +    unsigned int i;
> +    int j, rc;
> +    uint64_t next_start_blk, end_max = 0, size;
> +    uint32_t nr;
> +    struct e820entry map[E820MAX];
> +
> +    errno = ERROR_INVAL;
> +    if (b_info->vnodes == 0)
> +        return -EINVAL;
> +
> +    if (!memblks || !b_info->vnuma_mem)
> +        return -EINVAL;
> +
> +    libxl_ctx *ctx = libxl__gc_owner(gc);
> +
> +    /* Retrieve e820 map for this host. */
> +    rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
> +
> +    if (rc < 0) {
> +        errno = rc;
> +        return -EINVAL;
> +    }
> +    nr = rc;
> +    rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
> +                       (b_info->max_memkb - b_info->target_memkb) +
> +                       b_info->u.pv.slack_memkb);
> +    if (rc)
> +    {
> +        errno = rc;
> +        return -EINVAL;
> +    }
> +
> +    /* find max memory address for this host. */
> +    for (j = 0; j < nr; j++)
> +        if (map[j].type == E820_RAM) {
> +            end_max = max(end_max, map[j].addr + map[j].size);
> +        }
> +

I think the compiler or smatch will complain about the missing
optionl {}. Usually you do
        for (.. )
        {
                if (..)
                {
                }
        }

> +    memset(memblks, 0, sizeof(*memblks) * b_info->vnodes);
> +    next_start_blk = 0;
> +
> +    memblks[0].start = map[0].addr;
> +
> +    for (i = 0; i < b_info->vnodes; i++) {

So you are mixing two styles here. Earlier you had

if (rc)
{
}

But here you are doing
for (..) {
}

If you want the same style the for loop should have been:

for (..)
{
}

You need to look at the code in this file and adopt the one that
is used in major cases.

> +
> +        memblks[i].start += next_start_blk;
> +        memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20);
> +
> +        if (memblks[i].end > end_max) {
> +            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> +                    "Shrunk vNUMA memory block %d address to max e820 
> address: \
> +                    %#010lx -> %#010lx\n", i, memblks[i].end, end_max);
> +            memblks[i].end = end_max;
> +            break;
> +        }
> +
> +        size = memblks[i].end - memblks[i].start;
> +        /*
> +         * For pv host with e820_host option turned on we need
> +         * to take into account memory holes. For pv host with
> +         * e820_host disabled or unset, the map is a contiguous
> +         * RAM region.
> +         */
> +        if (libxl_defbool_val(b_info->u.pv.e820_host)) {
> +            while((memblks[i].end - memblks[i].start -
> +                   e820_memory_hole_size(memblks[i].start,
> +                   memblks[i].end, map, nr)) < size )
> +            {
> +                memblks[i].end += MIN_VNODE_SIZE << 10;
> +                if (memblks[i].end > end_max) {
> +                    memblks[i].end = end_max;
> +                    LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> +                            "Shrunk vNUMA memory block %d address to max 
> e820 \
> +                            address: %#010lx -> %#010lx\n", i, 
> memblks[i].end,
> +                            end_max);
> +                    break;
> +                }
> +            }
> +        }
> +        next_start_blk = memblks[i].end;
> +        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start  = %#010lx, \
> +                    end = %#010lx\n", i, memblks[i].start, memblks[i].end);
> +    }
> +
> +    /* Did not form memory addresses for every node? */
> +    if (i != b_info->vnodes)  {
> +        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with 
> \
> +                block addresses, only %d out of %d", i, b_info->vnodes);
> +        return -EINVAL;
> +    }
> +    return 0;
> +}
> +
> +/*
>   * Local variables:
>   * mode: C
>   * c-basic-offset: 4
> diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
> index 7589060..46e84e4 100644
> --- a/tools/libxl/libxl_x86.c
> +++ b/tools/libxl/libxl_x86.c
> @@ -1,5 +1,6 @@
>  #include "libxl_internal.h"
>  #include "libxl_arch.h"
> +#include "libxl_vnuma.h"
>  
>  static const char *e820_names(int type)
>  {
> @@ -14,7 +15,7 @@ static const char *e820_names(int type)
>      return "Unknown";
>  }
>  
> -static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
> +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
>                           uint32_t *nr_entries,
>                           unsigned long map_limitkb,
>                           unsigned long balloon_kb)
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.