[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools



On Fri, 2012-06-15 at 18:04 +0100, Dario Faggioli wrote:
> In such a way that only the cpus belonging to the cpupool of the
> domain being placed are considered for the placement itself.
> 
> This happens by filtering out all the nodes in which the cpupool has
> not any cpu from the placement candidates. After that -- as a cpu pooling
> not necessarily happens at NUMA nodes boundaries -- we also make sure
> only the actual cpus that are part of the pool are considered when
> counting how much processors a placement candidate is able to provide.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
> 
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -198,15 +198,27 @@ static void comb_get_nodemap(comb_iter_t
>          libxl_bitmap_set(nodemap, it[i]);
>  }
>  
> +/* Retrieve how many nodes a nodemap spans. */
> +static int nodemap_to_nr_nodes(const libxl_bitmap *nodemap)
> +{
> +    int i, nr_nodes = 0;
> +
> +    libxl_for_each_set_bit(i, *nodemap)
> +        nr_nodes++;
> +    return nr_nodes;
> +}
> +
>  /* Retrieve the number of cpus that the nodes that are part of the nodemap
> - * span. */
> + * span and that are also set in suitable_cpumap. */
>  static int nodemap_to_nodes_cpus(libxl_cputopology *tinfo, int nr_cpus,
> +                                 const libxl_bitmap *suitable_cpumap,
>                                   const libxl_bitmap *nodemap)
>  {
>      int i, nodes_cpus = 0;
>  
>      for (i = 0; i < nr_cpus; i++) {
> -        if (libxl_bitmap_test(nodemap, tinfo[i].node))
> +        if (libxl_bitmap_test(suitable_cpumap, i) &&
> +            libxl_bitmap_test(nodemap, tinfo[i].node))
>              nodes_cpus++;
>      }
>      return nodes_cpus;
> @@ -311,12 +323,13 @@ static int cpus_per_node_count(libxl_cpu
>  int libxl__get_numa_candidates(libxl__gc *gc,
>                                 uint32_t min_free_memkb, int min_cpus,
>                                 int min_nodes, int max_nodes,
> +                               const libxl_bitmap *suitable_cpumap,
>                                 libxl__numa_candidate *cndts[], int *nr_cndts)
>  {
>      libxl__numa_candidate *new_cndts = NULL;
>      libxl_cputopology *tinfo = NULL;
>      libxl_numainfo *ninfo = NULL;
> -    libxl_bitmap nodemap;
> +    libxl_bitmap suitable_nodemap, nodemap;
>      int nr_nodes, nr_cpus;
>      int array_size, rc;
>  
> @@ -340,6 +353,15 @@ int libxl__get_numa_candidates(libxl__gc
>      if (rc)
>          goto out;
>  
> +    /* Allocate and prepare the map of the node that can be utilized for
> +     * placement, basing on the map of suitable cpus. */
> +    rc = libxl_node_bitmap_alloc(CTX, &suitable_nodemap);
> +    if (rc)
> +        goto out;
> +    rc = libxl_cpumap_to_nodemap(CTX, suitable_cpumap, &suitable_nodemap);
> +    if (rc)
> +        goto out;
> +
>      /*
>       * Round up and down some of the constraints. For instance, the minimum
>       * number of cpus a candidate should have must at least be non-negative.
> @@ -391,9 +413,14 @@ int libxl__get_numa_candidates(libxl__gc
>          for (comb_ok = comb_init(gc, &comb_iter, nr_nodes, min_nodes); 
> comb_ok;
>               comb_ok = comb_next(comb_iter, nr_nodes, min_nodes)) {
>              uint32_t nodes_free_memkb;
> -            int nodes_cpus;
> +            int i, nodes_cpus;
>  
> +            /* Get the nodemap for the combination and filter unwnted nodes 
> */

                                                                 unwanted

>              comb_get_nodemap(comb_iter, &nodemap, min_nodes);
> +            libxl_for_each_set_bit(i, nodemap) {
> +                if (!libxl_bitmap_test(&suitable_nodemap, i))
> +                    libxl_bitmap_reset(&nodemap, i);
> +            }
>  
>              /* If there is not enough memoy in this combination, skip it
>               * and go generating the next one... */
> @@ -402,7 +429,8 @@ int libxl__get_numa_candidates(libxl__gc
>                  continue;
>  
>              /* And the same applies if this combination is short in cpus */
> -            nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, &nodemap);
> +            nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, 
> suitable_cpumap,
> +                                               &nodemap);
>              if (min_cpus > 0 && nodes_cpus < min_cpus)
>                  continue;
>  
> @@ -427,12 +455,13 @@ int libxl__get_numa_candidates(libxl__gc
>              new_cndts[*nr_cndts].nr_domains =
>                                      nodemap_to_nr_domains(gc, tinfo, 
> &nodemap);
>              new_cndts[*nr_cndts].free_memkb = nodes_free_memkb;
> -            new_cndts[*nr_cndts].nr_nodes = min_nodes;
> +            new_cndts[*nr_cndts].nr_nodes = nodemap_to_nr_nodes(&nodemap);
>              new_cndts[*nr_cndts].nr_cpus = nodes_cpus;
>  
>              LOG(DEBUG, "NUMA placement candidate #%d found: nr_nodes=%d, "
>                         "nr_cpus=%d, free_memkb=%"PRIu32"", *nr_cndts,
> -                       min_nodes, new_cndts[*nr_cndts].nr_cpus,
> +                       new_cndts[*nr_cndts].nr_nodes,
> +                       new_cndts[*nr_cndts].nr_cpus,
>                         new_cndts[*nr_cndts].free_memkb / 1024);
>  
>              (*nr_cndts)++;
> @@ -442,6 +471,7 @@ int libxl__get_numa_candidates(libxl__gc
>  
>      *cndts = new_cndts;
>   out:
> +    libxl_bitmap_dispose(&suitable_nodemap);
>      libxl_bitmap_dispose(&nodemap);
>      libxl_cputopology_list_free(tinfo, nr_cpus);
>      libxl_numainfo_list_free(ninfo, nr_nodes);
> @@ -485,23 +515,27 @@ static int numa_cmpf(const void *v1, con
>  }
>  
>  /* The actual automatic NUMA placement routine */
> -static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
> +static int numa_place_domain(libxl__gc *gc, uint32_t domid,
> +                             libxl_domain_build_info *info)
>  {
>      int nr_candidates = 0;
>      libxl__numa_candidate *candidates = NULL;
>      libxl_bitmap candidate_nodemap;
> -    libxl_cpupoolinfo *pinfo;
> -    int nr_pools, rc = 0;
> +    libxl_cpupoolinfo cpupool_info;
> +    int i, cpupool, rc = 0;
>      uint32_t memkb;
>  
> -    /* First of all, if cpupools are in use, better not to mess with them */
> -    pinfo = libxl_list_cpupool(CTX, &nr_pools);
> -    if (!pinfo)
> -        return ERROR_FAIL;
> -    if (nr_pools > 1) {
> -        LOG(NOTICE, "skipping NUMA placement as cpupools are in use");
> -        goto out;
> -    }
> +    /*
> +     * Extract the cpumap from the cpupool the domain belong to. In fact,
> +     * it only makes sense to consider the cpus/nodes that are in there
> +     * for placement.
> +     */
> +    rc = cpupool = libxl__domain_cpupool(gc, domid);
> +    if (rc < 0)
> +        return rc;
> +    rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
> +    if (rc)
> +        return rc;
>  
>      rc = libxl_domain_need_memory(CTX, info, &memkb);
>      if (rc)
> @@ -513,7 +547,8 @@ static int numa_place_domain(libxl__gc *
>  
>      /* Find all the candidates with enough free memory and at least
>       * as much pcpus as the domain has vcpus.  */
> -    rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus, 0, 0,
> +    rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus,
> +                                    0, 0, &cpupool_info.cpumap,
>                                      &candidates, &nr_candidates);
>      if (rc)
>          goto out;
> @@ -538,13 +573,20 @@ static int numa_place_domain(libxl__gc *
>      if (rc)
>          goto out;
>  
> +    /* Avoid trying to set the affinity to cpus that might be in the
> +     * nodemap but not in our cpupool. */
> +    libxl_for_each_set_bit(i, info->cpumap) {
> +        if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
> +            libxl_bitmap_reset(&info->cpumap, i);
> +    }
> +
>      LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
>                  "%"PRIu32" KB free selected", candidates[0].nr_nodes,
>                  candidates[0].nr_cpus, candidates[0].free_memkb / 1024);
>  
>   out:
>      libxl_bitmap_dispose(&candidate_nodemap);
> -    libxl_cpupoolinfo_list_free(pinfo, nr_pools);
> +    libxl_cpupoolinfo_dispose(&cpupool_info);
>      return rc;
>  }
>  
> @@ -567,7 +609,7 @@ int libxl__build_pre(libxl__gc *gc, uint
>       * whatever that turns out to be.
>       */
>      if (libxl_bitmap_is_full(&info->cpumap)) {
> -        int rc = numa_place_domain(gc, info);
> +        int rc = numa_place_domain(gc, domid, info);
>          if (rc)
>              return rc;
>      }
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2094,14 +2094,17 @@ typedef struct {
>   * least that amount of free memory and that number of cpus, respectively. If
>   * min_free_memkb and/or min_cpus are 0, the candidates' free memory and 
> number
>   * of cpus won't be checked at all, which means a candidate will always be
> - * considered suitable wrt the specific constraint.  cndts is where the list 
> of
> - * exactly nr_cndts candidates is returned. Note that, in case no candidates
> - * are found at all, the function returns successfully, but with nr_cndts 
> equal
> - * to zero.
> + * considered suitable wrt the specific constraint. suitable_cpumap is useful
> + * for specifyin we want only the cpus in that mask to be considered while

         specifying

Apart from those two spelling errors:
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

> + * generating placement candidates (for example because of cpupools). cndts 
> is
> + * where the list of exactly nr_cndts candidates is returned. Note that, in
> + * case no candidates are found at all, the function returns successfully, 
> but
> + * with nr_cndts equal to zero.
>   */
>  _hidden int libxl__get_numa_candidates(libxl__gc *gc,
>                                  uint32_t min_free_memkb, int min_cpus,
>                                  int min_nodes, int max_nodes,
> +                                const libxl_bitmap *suitable_cpumap,
>                                  libxl__numa_candidate *cndts[], int 
> *nr_cndts);
>  
>  /* allocation and deallocation for placement candidates */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.