[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1 of 3 v2] libxl: take node distances into account during NUMA placement
In fact, among placement candidates with the same number of nodes, the closer the various nodes are to each others, the better the performances for a domain placed there. Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx> Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -105,6 +105,9 @@ out: * - the number of vcpus runnable on the candidates is considered, and * candidates with fewer of them are preferred. If two candidate have * the same number of runnable vcpus, + * - the sum of the node distances in the candidates is considered, and + * candidates with smaller total distance are preferred. If total + * distance is the same for the two candidatess, * - the amount of free memory in the candidates is considered, and the * candidate with greater amount of it is preferred. * @@ -114,6 +117,10 @@ out: * overloading large (from a memory POV) nodes. That's right the effect * that counting the vcpus able to run on the nodes tries to prevent. * + * The relative distance within the nodes in the candidates is considered + * as the closer the nodes, the better for the domain ending up on the + * candidate. + * * Note that this completely ignore the number of nodes each candidate span, * as the fact that fewer nodes is better is already accounted for in the * algorithm. @@ -124,6 +131,9 @@ static int numa_cmpf(const libxl__numa_c if (c1->nr_vcpus != c2->nr_vcpus) return c1->nr_vcpus - c2->nr_vcpus; + if (c1->dists_sum != c2->dists_sum) + return c1->dists_sum - c2->dists_sum; + return c2->free_memkb - c1->free_memkb; } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2735,6 +2735,7 @@ static inline void libxl__ctx_unlock(lib typedef struct { int nr_cpus, nr_nodes; int nr_vcpus; + int dists_sum; uint32_t free_memkb; libxl_bitmap nodemap; } libxl__numa_candidate; diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c --- a/tools/libxl/libxl_numa.c +++ b/tools/libxl/libxl_numa.c @@ -218,6 +218,40 @@ static int nodemap_to_nr_vcpus(libxl__gc return nr_vcpus; } +/* Sum the relative distances of nodes in the nodemap to help finding + * out which candidate is the "tightest" one. */ +static int nodemap_to_dists_sum(libxl_numainfo *ninfo, libxl_bitmap *nodemap) +{ + int tot_dist = 0; + int i, j, a = 0, b; + + for (i = 0; i < libxl_bitmap_count_set(nodemap); i++) { + while (!libxl_bitmap_test(nodemap, a)) + a++; + + /* As it is usually non-zero, we do take the latency of + * of a node to itself into account. */ + b = a; + for (j = 0; j < libxl_bitmap_count_set(nodemap) - i; j++) { + while (!libxl_bitmap_test(nodemap, b)) + b++; + + /* + * In most architectures, going from node A to node B costs + * exactly as much as going from B to A does. However, let's + * not rely on this and consider both contributions, just to + * be ready for everything future might reserve for us. + */ + tot_dist += ninfo[a].dists[b]; + tot_dist += ninfo[b].dists[a]; + b++; + } + a++; + } + + return tot_dist; +} + /* * This function tries to figure out if the host has a consistent number * of cpus along all its NUMA nodes. In fact, if that is the case, we can @@ -415,6 +449,7 @@ int libxl__get_numa_candidate(libxl__gc */ libxl__numa_candidate_put_nodemap(gc, &new_cndt, &nodemap); new_cndt.nr_vcpus = nodemap_to_nr_vcpus(gc, tinfo, &nodemap); + new_cndt.dists_sum = nodemap_to_dists_sum(ninfo, &nodemap); new_cndt.free_memkb = nodes_free_memkb; new_cndt.nr_nodes = libxl_bitmap_count_set(&nodemap); new_cndt.nr_cpus = nodes_cpus; @@ -430,12 +465,14 @@ int libxl__get_numa_candidate(libxl__gc LOG(DEBUG, "New best NUMA placement candidate found: " "nr_nodes=%d, nr_cpus=%d, nr_vcpus=%d, " - "free_memkb=%"PRIu32"", new_cndt.nr_nodes, - new_cndt.nr_cpus, new_cndt.nr_vcpus, + "dists_sum=%d, free_memkb=%"PRIu32"", + new_cndt.nr_nodes, new_cndt.nr_cpus, + new_cndt.nr_vcpus, new_cndt.dists_sum, new_cndt.free_memkb / 1024); libxl__numa_candidate_put_nodemap(gc, cndt_out, &nodemap); cndt_out->nr_vcpus = new_cndt.nr_vcpus; + cndt_out->dists_sum = new_cndt.dists_sum; cndt_out->free_memkb = new_cndt.free_memkb; cndt_out->nr_nodes = new_cndt.nr_nodes; cndt_out->nr_cpus = new_cndt.nr_cpus; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |