[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools
On Fri, 2012-06-15 at 18:04 +0100, Dario Faggioli wrote: > In such a way that only the cpus belonging to the cpupool of the > domain being placed are considered for the placement itself. > > This happens by filtering out all the nodes in which the cpupool has > not any cpu from the placement candidates. After that -- as a cpu pooling > not necessarily happens at NUMA nodes boundaries -- we also make sure > only the actual cpus that are part of the pool are considered when > counting how much processors a placement candidate is able to provide. > > Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx> > > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -198,15 +198,27 @@ static void comb_get_nodemap(comb_iter_t > libxl_bitmap_set(nodemap, it[i]); > } > > +/* Retrieve how many nodes a nodemap spans. */ > +static int nodemap_to_nr_nodes(const libxl_bitmap *nodemap) > +{ > + int i, nr_nodes = 0; > + > + libxl_for_each_set_bit(i, *nodemap) > + nr_nodes++; > + return nr_nodes; > +} > + > /* Retrieve the number of cpus that the nodes that are part of the nodemap > - * span. */ > + * span and that are also set in suitable_cpumap. */ > static int nodemap_to_nodes_cpus(libxl_cputopology *tinfo, int nr_cpus, > + const libxl_bitmap *suitable_cpumap, > const libxl_bitmap *nodemap) > { > int i, nodes_cpus = 0; > > for (i = 0; i < nr_cpus; i++) { > - if (libxl_bitmap_test(nodemap, tinfo[i].node)) > + if (libxl_bitmap_test(suitable_cpumap, i) && > + libxl_bitmap_test(nodemap, tinfo[i].node)) > nodes_cpus++; > } > return nodes_cpus; > @@ -311,12 +323,13 @@ static int cpus_per_node_count(libxl_cpu > int libxl__get_numa_candidates(libxl__gc *gc, > uint32_t min_free_memkb, int min_cpus, > int min_nodes, int max_nodes, > + const libxl_bitmap *suitable_cpumap, > libxl__numa_candidate *cndts[], int *nr_cndts) > { > libxl__numa_candidate *new_cndts = NULL; > libxl_cputopology *tinfo = NULL; > libxl_numainfo *ninfo = NULL; > - libxl_bitmap nodemap; > + libxl_bitmap suitable_nodemap, nodemap; > int nr_nodes, nr_cpus; > int array_size, rc; > > @@ -340,6 +353,15 @@ int libxl__get_numa_candidates(libxl__gc > if (rc) > goto out; > > + /* Allocate and prepare the map of the node that can be utilized for > + * placement, basing on the map of suitable cpus. */ > + rc = libxl_node_bitmap_alloc(CTX, &suitable_nodemap); > + if (rc) > + goto out; > + rc = libxl_cpumap_to_nodemap(CTX, suitable_cpumap, &suitable_nodemap); > + if (rc) > + goto out; > + > /* > * Round up and down some of the constraints. For instance, the minimum > * number of cpus a candidate should have must at least be non-negative. > @@ -391,9 +413,14 @@ int libxl__get_numa_candidates(libxl__gc > for (comb_ok = comb_init(gc, &comb_iter, nr_nodes, min_nodes); > comb_ok; > comb_ok = comb_next(comb_iter, nr_nodes, min_nodes)) { > uint32_t nodes_free_memkb; > - int nodes_cpus; > + int i, nodes_cpus; > > + /* Get the nodemap for the combination and filter unwnted nodes > */ unwanted > comb_get_nodemap(comb_iter, &nodemap, min_nodes); > + libxl_for_each_set_bit(i, nodemap) { > + if (!libxl_bitmap_test(&suitable_nodemap, i)) > + libxl_bitmap_reset(&nodemap, i); > + } > > /* If there is not enough memoy in this combination, skip it > * and go generating the next one... */ > @@ -402,7 +429,8 @@ int libxl__get_numa_candidates(libxl__gc > continue; > > /* And the same applies if this combination is short in cpus */ > - nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, &nodemap); > + nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, > suitable_cpumap, > + &nodemap); > if (min_cpus > 0 && nodes_cpus < min_cpus) > continue; > > @@ -427,12 +455,13 @@ int libxl__get_numa_candidates(libxl__gc > new_cndts[*nr_cndts].nr_domains = > nodemap_to_nr_domains(gc, tinfo, > &nodemap); > new_cndts[*nr_cndts].free_memkb = nodes_free_memkb; > - new_cndts[*nr_cndts].nr_nodes = min_nodes; > + new_cndts[*nr_cndts].nr_nodes = nodemap_to_nr_nodes(&nodemap); > new_cndts[*nr_cndts].nr_cpus = nodes_cpus; > > LOG(DEBUG, "NUMA placement candidate #%d found: nr_nodes=%d, " > "nr_cpus=%d, free_memkb=%"PRIu32"", *nr_cndts, > - min_nodes, new_cndts[*nr_cndts].nr_cpus, > + new_cndts[*nr_cndts].nr_nodes, > + new_cndts[*nr_cndts].nr_cpus, > new_cndts[*nr_cndts].free_memkb / 1024); > > (*nr_cndts)++; > @@ -442,6 +471,7 @@ int libxl__get_numa_candidates(libxl__gc > > *cndts = new_cndts; > out: > + libxl_bitmap_dispose(&suitable_nodemap); > libxl_bitmap_dispose(&nodemap); > libxl_cputopology_list_free(tinfo, nr_cpus); > libxl_numainfo_list_free(ninfo, nr_nodes); > @@ -485,23 +515,27 @@ static int numa_cmpf(const void *v1, con > } > > /* The actual automatic NUMA placement routine */ > -static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info) > +static int numa_place_domain(libxl__gc *gc, uint32_t domid, > + libxl_domain_build_info *info) > { > int nr_candidates = 0; > libxl__numa_candidate *candidates = NULL; > libxl_bitmap candidate_nodemap; > - libxl_cpupoolinfo *pinfo; > - int nr_pools, rc = 0; > + libxl_cpupoolinfo cpupool_info; > + int i, cpupool, rc = 0; > uint32_t memkb; > > - /* First of all, if cpupools are in use, better not to mess with them */ > - pinfo = libxl_list_cpupool(CTX, &nr_pools); > - if (!pinfo) > - return ERROR_FAIL; > - if (nr_pools > 1) { > - LOG(NOTICE, "skipping NUMA placement as cpupools are in use"); > - goto out; > - } > + /* > + * Extract the cpumap from the cpupool the domain belong to. In fact, > + * it only makes sense to consider the cpus/nodes that are in there > + * for placement. > + */ > + rc = cpupool = libxl__domain_cpupool(gc, domid); > + if (rc < 0) > + return rc; > + rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool); > + if (rc) > + return rc; > > rc = libxl_domain_need_memory(CTX, info, &memkb); > if (rc) > @@ -513,7 +547,8 @@ static int numa_place_domain(libxl__gc * > > /* Find all the candidates with enough free memory and at least > * as much pcpus as the domain has vcpus. */ > - rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus, 0, 0, > + rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus, > + 0, 0, &cpupool_info.cpumap, > &candidates, &nr_candidates); > if (rc) > goto out; > @@ -538,13 +573,20 @@ static int numa_place_domain(libxl__gc * > if (rc) > goto out; > > + /* Avoid trying to set the affinity to cpus that might be in the > + * nodemap but not in our cpupool. */ > + libxl_for_each_set_bit(i, info->cpumap) { > + if (!libxl_bitmap_test(&cpupool_info.cpumap, i)) > + libxl_bitmap_reset(&info->cpumap, i); > + } > + > LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and " > "%"PRIu32" KB free selected", candidates[0].nr_nodes, > candidates[0].nr_cpus, candidates[0].free_memkb / 1024); > > out: > libxl_bitmap_dispose(&candidate_nodemap); > - libxl_cpupoolinfo_list_free(pinfo, nr_pools); > + libxl_cpupoolinfo_dispose(&cpupool_info); > return rc; > } > > @@ -567,7 +609,7 @@ int libxl__build_pre(libxl__gc *gc, uint > * whatever that turns out to be. > */ > if (libxl_bitmap_is_full(&info->cpumap)) { > - int rc = numa_place_domain(gc, info); > + int rc = numa_place_domain(gc, domid, info); > if (rc) > return rc; > } > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -2094,14 +2094,17 @@ typedef struct { > * least that amount of free memory and that number of cpus, respectively. If > * min_free_memkb and/or min_cpus are 0, the candidates' free memory and > number > * of cpus won't be checked at all, which means a candidate will always be > - * considered suitable wrt the specific constraint. cndts is where the list > of > - * exactly nr_cndts candidates is returned. Note that, in case no candidates > - * are found at all, the function returns successfully, but with nr_cndts > equal > - * to zero. > + * considered suitable wrt the specific constraint. suitable_cpumap is useful > + * for specifyin we want only the cpus in that mask to be considered while specifying Apart from those two spelling errors: Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx> > + * generating placement candidates (for example because of cpupools). cndts > is > + * where the list of exactly nr_cndts candidates is returned. Note that, in > + * case no candidates are found at all, the function returns successfully, > but > + * with nr_cndts equal to zero. > */ > _hidden int libxl__get_numa_candidates(libxl__gc *gc, > uint32_t min_free_memkb, int min_cpus, > int min_nodes, int max_nodes, > + const libxl_bitmap *suitable_cpumap, > libxl__numa_candidate *cndts[], int > *nr_cndts); > > /* allocation and deallocation for placement candidates */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |