|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 4/8] vnuma topology parsing routines
Parses vnuma topoplogy number of nodes and memory
ranges. If not defined, initializes vnuma with
only one node and default topology.
Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
tools/libxl/libxl_vnuma.h | 11 ++
tools/libxl/xl_cmdimpl.c | 406 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 417 insertions(+)
create mode 100644 tools/libxl/libxl_vnuma.h
diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h
new file mode 100644
index 0000000..f1568ae
--- /dev/null
+++ b/tools/libxl/libxl_vnuma.h
@@ -0,0 +1,11 @@
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#define VNUMA_NO_NODE ~((unsigned int)0)
+
+/*
+ * Max vNUMA node size in Mb is taken 64Mb even now Linux lets
+ * 32Mb, thus letting some slack. Will be modified to match Linux.
+ */
+#define MIN_VNODE_SIZE 64U
+
+#define MAX_VNUMA_NODES (unsigned int)1 << 10
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 5195914..59855ed 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -40,6 +40,7 @@
#include "libxl_json.h"
#include "libxlutil.h"
#include "xl.h"
+#include "libxl_vnuma.h"
/* For calls which return an errno on failure */
#define CHK_ERRNOVAL( call ) ({ \
@@ -725,6 +726,403 @@ static void parse_top_level_sdl_options(XLU_Config
*config,
xlu_cfg_replace_string (config, "xauthority", &sdl->xauthority, 0);
}
+
+static unsigned int get_list_item_uint(XLU_ConfigList *list, unsigned int i)
+{
+ const char *buf;
+ char *ep;
+ unsigned long ul;
+ int rc = -EINVAL;
+ buf = xlu_cfg_get_listitem(list, i);
+ if (!buf)
+ return rc;
+ ul = strtoul(buf, &ep, 10);
+ if (ep == buf)
+ return rc;
+ if (ul >= UINT16_MAX)
+ return rc;
+ return (unsigned int)ul;
+}
+
+static void vdistance_set(unsigned int *vdistance,
+ unsigned int nr_vnodes,
+ unsigned int samenode,
+ unsigned int othernode)
+{
+ unsigned int idx, slot;
+ for (idx = 0; idx < nr_vnodes; idx++)
+ for (slot = 0; slot < nr_vnodes; slot++)
+ *(vdistance + slot * nr_vnodes + idx) =
+ idx == slot ? samenode : othernode;
+}
+
+static void vcputovnode_default(unsigned int *cpu_to_node,
+ unsigned int nr_vnodes,
+ unsigned int max_vcpus)
+{
+ unsigned int cpu;
+ for (cpu = 0; cpu < max_vcpus; cpu++)
+ cpu_to_node[cpu] = cpu % nr_vnodes;
+}
+
+/* Split domain memory between vNUMA nodes equally */
+static int split_vnumamem(libxl_domain_build_info *b_info)
+{
+ unsigned long long vnodemem = 0;
+ unsigned long n;
+ unsigned int i;
+
+ /* In MBytes */
+ if (b_info->nr_nodes == 0)
+ return -1;
+ vnodemem = (b_info->max_memkb >> 10) / b_info->nr_nodes;
+ if (vnodemem < MIN_VNODE_SIZE)
+ return -1;
+ /* reminder in MBytes */
+ n = (b_info->max_memkb >> 10) % b_info->nr_nodes;
+ /* get final sizes in MBytes */
+ for (i = 0; i < (b_info->nr_nodes - 1); i++)
+ b_info->numa_memszs[i] = vnodemem;
+ /* add the reminder to the last node */
+ b_info->numa_memszs[i] = vnodemem + n;
+ return 0;
+}
+
+static void vnode_to_pnode_default(unsigned int *vnode_to_pnode,
+ unsigned int nr_vnodes)
+{
+ unsigned int i;
+ for (i = 0; i < nr_vnodes; i++)
+ vnode_to_pnode[i] = VNUMA_NO_NODE;
+}
+
+/*
+ * init vNUMA to "zero config" with one node and all other
+ * topology parameters set to default.
+ */
+static int vnuma_zero_config(libxl_domain_build_info *b_info)
+{
+ b_info->nr_nodes = 1;
+ /* all memory goes to this one vnode */
+ if (!(b_info->numa_memszs = (uint64_t *)calloc(b_info->nr_nodes,
+ sizeof(*b_info->numa_memszs))))
+ goto bad_vnumazerocfg;
+
+ if (!(b_info->cpu_to_node = (unsigned int *)calloc(b_info->max_vcpus,
+ sizeof(*b_info->cpu_to_node))))
+ goto bad_vnumazerocfg;
+
+ if (!(b_info->distance = (unsigned int *)calloc(b_info->nr_nodes *
+ b_info->nr_nodes, sizeof(*b_info->distance))))
+ goto bad_vnumazerocfg;
+
+ if (!(b_info->vnode_to_pnode = (unsigned int *)calloc(b_info->nr_nodes,
+ sizeof(*b_info->vnode_to_pnode))))
+ goto bad_vnumazerocfg;
+
+ b_info->numa_memszs[0] = b_info->max_memkb >> 10;
+
+ /* all vcpus assigned to this vnode */
+ vcputovnode_default(b_info->cpu_to_node, b_info->nr_nodes,
+ b_info->max_vcpus);
+
+ /* default vdistance is 10 */
+ vdistance_set(b_info->distance, b_info->nr_nodes, 10, 10);
+
+ /* VNUMA_NO_NODE for vnode_to_pnode */
+ vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_nodes);
+
+ /*
+ * will be placed to some physical nodes defined by automatic
+ * numa placement or VNUMA_NO_NODE will not request exact node
+ */
+ libxl_defbool_set(&b_info->vnuma_autoplacement, true);
+ return 0;
+
+ bad_vnumazerocfg:
+ return -1;
+}
+
+/* Caller must exit */
+static void free_vnuma_info(libxl_domain_build_info *b_info)
+{
+ free(b_info->numa_memszs);
+ free(b_info->distance);
+ free(b_info->cpu_to_node);
+ free(b_info->vnode_to_pnode);
+ b_info->nr_nodes = 0;
+}
+
+/*
+static int vdistance_parse(char *vdistcfg, unsigned int *vdistance,
+ unsigned int nr_vnodes)
+{
+ char *endptr, *toka, *tokb, *saveptra = NULL, *saveptrb = NULL;
+ unsigned int *vdist_tmp = NULL;
+ int rc = 0;
+ unsigned int i, j, parsed = 0;
+ unsigned long dist;
+
+ rc = -EINVAL;
+ if (vdistance == NULL) {
+ return rc;
+ }
+ vdist_tmp = (unsigned int *)malloc(nr_vnodes * nr_vnodes *
sizeof(*vdistance));
+ if (vdist_tmp == NULL)
+ return rc;
+
+ i = j = 0;
+ for (toka = strtok_r(vdistcfg, ",", &saveptra); toka;
+ toka = strtok_r(NULL, ",", &saveptra)) {
+ if ( i >= nr_vnodes )
+ goto vdist_parse_err;
+ for (tokb = strtok_r(toka, " ", &saveptrb); tokb;
+ tokb = strtok_r(NULL, " ", &saveptrb)) {
+ if (j >= nr_vnodes)
+ goto vdist_parse_err;
+ dist = strtol(tokb, &endptr, 10);
+ if (dist > UINT16_MAX || dist < 0)
+ goto vdist_parse_err;
+ if (tokb == endptr)
+ goto vdist_parse_err;
+ *(vdist_tmp + j*nr_vnodes + i) = dist;
+ parsed++;
+ j++;
+ }
+ i++;
+ j = 0;
+ }
+ rc = parsed;
+ memcpy(vdistance, vdist_tmp, nr_vnodes * nr_vnodes * sizeof(*vdistance));
+
+ vdist_parse_err:
+ free(vdist_tmp);
+ return rc;
+}
+*/
+
+static void parse_vnuma_config(XLU_Config *config, libxl_domain_build_info
*b_info)
+{
+ XLU_ConfigList *vnumamemcfg;
+ XLU_ConfigList *vdistancecfg, *vnodemap, *vcpumap;
+ int nr_vnuma_regions;
+ int nr_vdist, nr_vnodemap, nr_vcpumap, i;
+ unsigned long long vnuma_memparsed = 0;
+ long l;
+ unsigned long ul;
+ const char *buf;
+
+ if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) {
+ if (l > MAX_VNUMA_NODES) {
+ fprintf(stderr, "Too many vnuma nodes, max %d is allowed.\n",
MAX_VNUMA_NODES);
+ goto bad_vnuma_config;
+ }
+ b_info->nr_nodes = l;
+
+ xlu_cfg_get_defbool(config, "vnuma_autoplacement",
&b_info->vnuma_autoplacement, 0);
+
+ /* Only construct nodes with at least one vcpu for now */
+ if (b_info->nr_nodes != 0 && b_info->max_vcpus >= b_info->nr_nodes) {
+ if (!xlu_cfg_get_list(config, "vnumamem",
+ &vnumamemcfg, &nr_vnuma_regions, 0)) {
+
+ if (nr_vnuma_regions != b_info->nr_nodes) {
+ fprintf(stderr, "Number of numa regions (vnumamem = %d) is
incorrect (should be %d).\n",
+ nr_vnuma_regions, b_info->nr_nodes);
+ goto bad_vnuma_config;
+ }
+
+ b_info->numa_memszs = calloc(b_info->nr_nodes,
+ sizeof(*b_info->numa_memszs));
+ if (b_info->numa_memszs == NULL) {
+ fprintf(stderr, "Unable to allocate memory for vnuma
ranges.\n");
+ goto bad_vnuma_config;
+ }
+
+ char *ep;
+ /*
+ * Will parse only nr_vnodes times, even if we have more/less
regions.
+ * Take care of it later if less or discard if too many
regions.
+ */
+ for (i = 0; i < b_info->nr_nodes; i++) {
+ buf = xlu_cfg_get_listitem(vnumamemcfg, i);
+ if (!buf) {
+ fprintf(stderr,
+ "xl: Unable to get element %d in vnuma memory
list.\n", i);
+ break;
+ }
+ ul = strtoul(buf, &ep, 10);
+ if (ep == buf) {
+ fprintf(stderr,
+ "xl: Invalid argument parsing vnumamem:
%s.\n", buf);
+ break;
+ }
+
+ /* 32Mb is a min size for a node, taken from Linux */
+ if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) {
+ fprintf(stderr, "xl: vnuma memory %lu is not within %u
- %u range.\n",
+ ul, MIN_VNODE_SIZE, UINT32_MAX);
+ break;
+ }
+
+ /* memory in MBytes */
+ b_info->numa_memszs[i] = ul;
+ }
+
+ /* Total memory for vNUMA parsed to verify */
+ for (i = 0; i < nr_vnuma_regions; i++)
+ vnuma_memparsed = vnuma_memparsed +
(b_info->numa_memszs[i]);
+
+ /* Amount of memory for vnodes same as total? */
+ if ((vnuma_memparsed << 10) != (b_info->max_memkb)) {
+ fprintf(stderr, "xl: vnuma memory is not the same as
domain memory size.\n");
+ goto bad_vnuma_config;
+ }
+ } else {
+ b_info->numa_memszs = calloc(b_info->nr_nodes,
+ sizeof(*b_info->numa_memszs));
+ if (b_info->numa_memszs == NULL) {
+ fprintf(stderr, "Unable to allocate memory for vnuma
ranges.\n");
+ goto bad_vnuma_config;
+ }
+
+ fprintf(stderr, "WARNING: vNUMA memory ranges were not
specified.\n");
+ fprintf(stderr, "Using default equal vnode memory size %lu
Kbytes to cover %lu Kbytes.\n",
+ b_info->max_memkb / b_info->nr_nodes,
b_info->max_memkb);
+
+ if (split_vnumamem(b_info) < 0) {
+ fprintf(stderr, "Could not split vnuma memory into equal
chunks.\n");
+ goto bad_vnuma_config;
+ }
+ }
+
+ b_info->distance = calloc(b_info->nr_nodes * b_info->nr_nodes,
+ sizeof(*b_info->distance));
+ if (b_info->distance == NULL)
+ goto bad_vnuma_config;
+
+ if (!xlu_cfg_get_list(config, "vdistance", &vdistancecfg,
&nr_vdist, 0)) {
+ int d1, d2;
+ /*
+ * First value is the same node distance, the second as the
+ * rest of distances. The following is required right now to
+ * avoid non-symmetrical distance table as it may break latest
kernel.
+ * TODO: Better way to analyze extended distance table,
possibly
+ * OS specific.
+ */
+ d1 = get_list_item_uint(vdistancecfg, 0);
+ d2 = get_list_item_uint(vdistancecfg, 1);
+
+ if (d1 >= 0 && d2 >= 0 && d1 < d2) {
+ vdistance_set(b_info->distance, b_info->nr_nodes, d1, d2);
+ } else {
+ fprintf(stderr, "WARNING: vnuma distance values are
incorrect.\n");
+ goto bad_vnuma_config;
+ }
+
+ } else {
+ fprintf(stderr, "Could not parse vnuma distances.\n");
+ vdistance_set(b_info->distance, b_info->nr_nodes, 10, 20);
+ }
+
+ b_info->cpu_to_node = (unsigned int *)calloc(b_info->max_vcpus,
+ sizeof(*b_info->cpu_to_node));
+ if (b_info->cpu_to_node == NULL)
+ goto bad_vnuma_config;
+
+ if (!xlu_cfg_get_list(config, "numa_cpumask",
+ &vcpumap, &nr_vcpumap, 0)) {
+ if (nr_vcpumap == b_info->max_vcpus) {
+ unsigned int vnode, vcpumask = 0, vmask;
+ vmask = ~(~0 << nr_vcpumap);
+ for (i = 0; i < nr_vcpumap; i++) {
+ vnode = get_list_item_uint(vcpumap, i);
+ if (vnode >= 0 && vnode < b_info->nr_nodes) {
+ vcpumask |= (1 << i);
+ b_info->cpu_to_node[i] = vnode;
+ }
+ }
+
+ /* Did it covered all vnodes in the vcpu mask? */
+ if ( !(((vmask & vcpumask) + 1) == (1 << nr_vcpumap)) ) {
+ fprintf(stderr, "WARNING: Not all vnodes were covered
in numa_cpumask.\n");
+ goto bad_vnuma_config;
+ }
+ } else {
+ fprintf(stderr, "WARNING: Bad vnuma_vcpumap.\n");
+ goto bad_vnuma_config;
+ }
+ }
+ else
+ vcputovnode_default(b_info->cpu_to_node,
+ b_info->nr_nodes,
+ b_info->max_vcpus);
+
+ /* There is mapping to NUMA physical nodes? */
+ b_info->vnode_to_pnode = (unsigned int *)calloc(b_info->nr_nodes,
+ sizeof(*b_info->vnode_to_pnode));
+ if (b_info->vnode_to_pnode == NULL)
+ goto bad_vnuma_config;
+ if (!xlu_cfg_get_list(config, "vnuma_vnodemap",&vnodemap,
+ &nr_vnodemap, 0)) {
+ /*
+ * If not specified or incorred, will be defined
+ * later based on the machine architecture, configuration
+ * and memory availble when creating domain.
+ */
+ if (nr_vnodemap == b_info->nr_nodes) {
+ unsigned int vnodemask = 0, pnode, smask;
+ smask = ~(~0 << b_info->nr_nodes);
+ for (i = 0; i < b_info->nr_nodes; i++) {
+ pnode = get_list_item_uint(vnodemap, i);
+ if (pnode >= 0) {
+ vnodemask |= (1 << i);
+ b_info->vnode_to_pnode[i] = pnode;
+ }
+ }
+
+ /* Did it covered all vnodes in the mask? */
+ if ( !(((vnodemask & smask) + 1) == (1 << nr_vnodemap)) ) {
+ fprintf(stderr, "WARNING: Not all vnodes were covered
vnuma_vnodemap.\n");
+
+ if (libxl_defbool_val(b_info->vnuma_autoplacement)) {
+ fprintf(stderr, "Automatic placement will be used
for vnodes.\n");
+ vnode_to_pnode_default(b_info->vnode_to_pnode,
b_info->nr_nodes);
+ } else
+ goto bad_vnuma_config;
+ }
+ } else {
+ fprintf(stderr, "WARNING: Incorrect vnuma_vnodemap.\n");
+
+ if (libxl_defbool_val(b_info->vnuma_autoplacement)) {
+ fprintf(stderr, "Automatic placement will be used for
vnodes.\n");
+ vnode_to_pnode_default(b_info->vnode_to_pnode,
b_info->nr_nodes);
+ } else
+ goto bad_vnuma_config;
+ }
+ } else {
+ fprintf(stderr, "WARNING: Missing vnuma_vnodemap.\n");
+
+ if (libxl_defbool_val(b_info->vnuma_autoplacement)) {
+ fprintf(stderr, "Automatic placement will be used for
vnodes.\n");
+ vnode_to_pnode_default(b_info->vnode_to_pnode,
b_info->nr_nodes);
+ } else
+ goto bad_vnuma_config;
+ }
+ }
+ else if (vnuma_zero_config(b_info))
+ goto bad_vnuma_config;
+ }
+ /* If vnuma topology is not defined for domain, init one node */
+ else if (vnuma_zero_config(b_info))
+ goto bad_vnuma_config;
+ return;
+
+ bad_vnuma_config:
+ free_vnuma_info(b_info);
+ exit(1);
+}
+
static void parse_config_data(const char *config_source,
const char *config_data,
int config_len,
@@ -1081,6 +1479,14 @@ static void parse_config_data(const char *config_source,
exit(1);
}
+ libxl_defbool_set(&b_info->vnuma_autoplacement, false);
+
+ /*
+ * If there is no vnuma in config, "zero" vnuma config
+ * will be initialized with one node and other defaults.
+ */
+ parse_vnuma_config(config, b_info);
+
xlu_cfg_replace_string (config, "bootloader",
&b_info->u.pv.bootloader, 0);
switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args",
&b_info->u.pv.bootloader_args, 1))
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |