[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 10 of 11] libxl, xl: heuristics for reordering NUMA placement candidates



Once we know which ones of all the possible combinations
represents valid placement candidates for a domain, use some
heuistics for deciding which one to pick (instead to just
taking the first one).

First of all, the smaller candidates are better both from the
domain's point of view (fewer memory spreading among nodes) and
fom the system point of view (fewer memoy fragmentation). In
case of candidates of equal sizes, the one that has the greater
amount of memory by at least 10% wins, as this is (again) good
for keeping the fragmentation small. Finally, the number of
domains running on the nodes involved in the combinations is
checked, and the "least populated" candidate is the one that
is considered.

This makes the wholle automatic NUMA placement mechanism very
similar to what xm/xend does, although no memory considerations
are present there.

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>

diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -633,6 +633,8 @@ libxl_numa_candidate *libxl_domain_numa_
 int libxl_numa_candidate_add_cpus(libxl_ctx *ctx,
                                   int min_cpus, int max_nodes,
                                   libxl_numa_candidate *candidate);
+int libxl_numa_candidate_count_domains(libxl_ctx *ctx,
+                                       libxl_numa_candidate *candidate);
 void libxl_numa_candidates_list_free(libxl_numa_candidate *list, int nr);
 
 /*
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -441,6 +441,7 @@ libxl_cputopology = Struct("cputopology"
 
 libxl_numa_candidate = Struct("numa_candidate", [
     ("nr_nodes", integer),
+    ("nr_domains", integer),
     ("free_memkb", uint32),
     ("nodemap", libxl_nodemap),
     ], dir=DIR_OUT)
diff --git a/tools/libxl/libxl_utils.c b/tools/libxl/libxl_utils.c
--- a/tools/libxl/libxl_utils.c
+++ b/tools/libxl/libxl_utils.c
@@ -849,6 +849,70 @@ int libxl_numa_candidate_add_cpus(libxl_
     return rc;
 }
 
+int libxl_numa_candidate_count_domains(libxl_ctx *ctx,
+                                       libxl_numa_candidate *candidate)
+{
+    libxl_nodemap dom_nodemap;
+    libxl_cputopology *tinfo;
+    int nr_doms, nr_cpus, rc = 0;
+    libxl_dominfo *dinfo;
+    int i, j, k;
+
+    dinfo = libxl_list_domain(ctx, &nr_doms);
+    if (dinfo == NULL) {
+        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl_list_domain failed\n");
+        rc = ERROR_NOMEM;
+        goto out;
+    }
+
+    if (libxl_nodemap_alloc(ctx, &dom_nodemap) < 0) {
+        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl_nodemap_alloc failed\n");
+        rc = ERROR_NOMEM;
+        goto out_dominfo;
+    }
+
+    tinfo = libxl_get_cpu_topology(ctx, &nr_cpus);
+    if (tinfo == NULL) {
+        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl_get_topologyinfo failed\n");
+        rc = ERROR_NOMEM;
+        goto out_nodemap;
+    }
+
+    candidate->nr_domains = 0;
+    for (i = 0; i < nr_doms; i++) {
+        libxl_vcpuinfo *vinfo;
+        int nr_vcpus, nr_cpus;
+
+        vinfo = libxl_list_vcpu(ctx, dinfo[i].domid, &nr_vcpus, &nr_cpus);
+        if (vinfo == NULL)
+            continue;
+
+        libxl_nodemap_set_none(&dom_nodemap);
+        for (j = 0; j < nr_vcpus; j++) {
+            libxl_for_each_set_cpu(k, vinfo[j].cpumap)
+                libxl_nodemap_set(&dom_nodemap, tinfo[k].node);
+        }
+
+        libxl_for_each_set_node(j, dom_nodemap) {
+            if (libxl_nodemap_test(&candidate->nodemap, j)) {
+                candidate->nr_domains++;
+                goto found;
+            }
+        }
+ found:
+        libxl_vcpuinfo_list_free(vinfo, nr_vcpus);
+    }
+
+
+    libxl_cputopology_list_free(tinfo, nr_cpus);
+ out_nodemap:
+    libxl_nodemap_dispose(&dom_nodemap);
+ out_dominfo:
+    libxl_dominfo_list_free(dinfo, nr_doms);
+ out:
+    return rc;
+}
+
 void libxl_numa_candidates_list_free(libxl_numa_candidate *list, int nr)
 {
     int i;
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -522,6 +522,34 @@ static int cpus_per_node(libxl_cputopolo
     return cpus_nodes;
 }
 
+/*
+ * The NUMA placement candidates are reordered according to the following
+ * heuristics:
+ *  - candidates involving fewer nodes come first. In case two (or
+ *    more) candidates span the same number of nodes,
+ *  - candidates with greater amount of free memory come first. In
+ *    case two (or more) candidates differ in their amount of free
+ *    memory by less than 10%,
+ *  - candidates with fewer domains insisting on them at the time of
+ *    this call come first.
+ */
+static int candidates_cmpf(const void *v1, const void *v2)
+{
+    const libxl_numa_candidate *c1 = (const libxl_numa_candidate*) v1;
+    const libxl_numa_candidate *c2 = (const libxl_numa_candidate*) v2;
+    double mem_diff = labs(c1->free_memkb - c2->free_memkb);
+    double mem_avg = (c1->free_memkb + c2->free_memkb) / 2.0;
+
+    if (c1->nr_nodes != c2->nr_nodes)
+        return c1->nr_nodes - c2->nr_nodes;
+
+    if ((mem_diff / mem_avg) * 100.0 < 10.0 &&
+        c1->nr_domains != c2->nr_domains)
+        return c1->nr_domains - c2->nr_domains;
+
+    return c2->free_memkb - c1->free_memkb;
+}
+
 /* Try to achieve "optimal" NUMA placement */
 static int place_domain(libxl_domain_build_info *b_info)
 {
@@ -575,6 +603,18 @@ static int place_domain(libxl_domain_bui
         goto out_topologyinfo;
     }
 
+    /* Account for the number of domains insisting on a candidate placement */
+    for (i = 0; i < nr_candidates; i++) {
+        if (libxl_numa_candidate_count_domains(ctx, &candidates[i])) {
+            fprintf(stderr, "libxl_numa_candidate_count_domains failed\n");
+            err = ENOMEM;
+            goto out_cndtslist;
+        }
+    }
+
+    /* Reorder candidates (see @candidates_cmpf for the heuristics) */
+    qsort(candidates, nr_candidates, sizeof(candidates[0]), candidates_cmpf);
+
     /* Pick a candidate and ensure it gives us enough PCPUs */
     dom_max_nodes = -1; err = ERROR_FAIL;
     for (candidate = 0; err && candidate < nr_candidates; candidate++) {
@@ -596,6 +636,7 @@ static int place_domain(libxl_domain_bui
         }
     }
 
+out_cndtslist:
     libxl_numa_candidates_list_free(candidates, nr_candidates);
 out_topologyinfo:
     libxl_cputopology_list_free(tinfo, nr_cpus);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.