[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 5/7] libxl: vNUMA configuration parser



Parses VM vNUMA related config, verifies and
sets default values for errorneous parameters.
config options are:

vnodes = 2
vnumamem = [2048, 2048]
vdistance = [10, 40, 40, 10]
vnuma_vcpumap ="1, 0, 1, 0"
vnuma_vnodemap = [1, 0]

TODO:
- change to python list vnuma_vcpumap;
- add an option to specify vdistance as half matrix;

Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>

---
Changes since RFC v2:
- vnuma memory areas are spicified in MBytes now;
- added parsing for vnode to pnode map;
- added support for python lists parsing;
- added simplified form for vdistance config - [10, 20];
the first one is same node distance, the other is for the rest;
- added memory release for unexpected exit;
- remove overriding config parameters;
- added default vNUMA memory sizes construction in
case the option is omitted or incorrect;
---
 tools/libxl/xl_cmdimpl.c |  294 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 293 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index a8261be..1a03367 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -572,6 +572,89 @@ vcpp_out:
     return rc;
 }
 
+static void vnuma_info_release(libxl_domain_build_info *info)
+{
+    info->nr_vnodes = 0;
+    if ( info->vnuma_memszs ) free(info->vnuma_memszs);
+    if ( info->vdistance ) free(info->vdistance);
+    if ( info->vcpu_to_vnode ) free(info->vcpu_to_vnode);
+    if ( info->vnode_to_pnode ) free(info->vnode_to_pnode);
+}
+static int get_list_item_uint(XLU_ConfigList *list, unsigned int i)
+{
+    const char *buf;
+    char *ep;
+    unsigned long ul;
+    int rc = -EINVAL;
+    buf = xlu_cfg_get_listitem(list, i);
+    if (!buf) 
+        return rc;
+    ul = strtoul(buf, &ep, 10);
+    if (ep == buf) 
+        return rc;
+    if (ul >= UINT16_MAX) 
+        return rc; 
+    return (int)ul;
+}
+
+static void vdistance_default(unsigned int *vdistance,
+                                unsigned int nr_vnodes,
+                                unsigned int samenode,
+                                unsigned int othernode)
+{
+    int i, j;
+    for (i = 0; i < nr_vnodes; i++)
+        for (j = 0; j < nr_vnodes; j++)
+            *(vdistance + j * nr_vnodes + i) = i == j ? samenode : othernode;
+}
+
+static void vcputovnode_default(unsigned int *vcpu_to_vnode,
+                                unsigned int nr_vnodes,
+                                unsigned int max_vcpus)
+{
+    int i;
+    if (vcpu_to_vnode == NULL)
+        return;
+    for(i = 0; i < max_vcpus; i++)
+        vcpu_to_vnode[i] = i % nr_vnodes;
+}
+
+static int vcputovnode_parse(char *cfg, unsigned int *vmap,
+                                        unsigned int nr_vnodes,
+                                        unsigned int nr_vcpus)
+{
+    char *toka, *endptr, *saveptra = NULL;
+    unsigned int *vmap_tmp = NULL, nodemap = 0, smask;
+    
+    int rc = 0;
+    int i;
+    rc = -EINVAL;
+    i = 0;
+    smask = ~(~0 << nr_vnodes);
+    if(vmap == NULL)
+        return rc;
+    vmap_tmp = (unsigned int *)malloc(sizeof(*vmap) * nr_vcpus);
+    memset(vmap_tmp, 0, sizeof(*vmap) * nr_vcpus);
+    for (toka = strtok_r(cfg, " ", &saveptra); toka;
+        toka = strtok_r(NULL, " ", &saveptra)) {
+        if (i >= nr_vcpus) goto vmap_parse_out;
+            vmap_tmp[i] = strtoul(toka, &endptr, 10);
+            nodemap |= (1 << vmap_tmp[i]);
+            if( endptr == toka) 
+                goto vmap_parse_out;
+        i++;
+    }
+    memcpy(vmap, vmap_tmp, sizeof(*vmap) * nr_vcpus);
+    if( ((nodemap & smask) + 1) == (1 << nr_vnodes) )
+        rc = i;
+    else
+        /* Not all nodes have vcpus, will use default map */
+        rc = -EINVAL;
+vmap_parse_out:
+    if (vmap_tmp != NULL) free(vmap_tmp);
+    return rc;
+}
+
 static void parse_config_data(const char *config_source,
                               const char *config_data,
                               int config_len,
@@ -906,7 +989,12 @@ static void parse_config_data(const char *config_source,
     {
         char *cmdline = NULL;
         const char *root = NULL, *extra = "";
-
+        XLU_ConfigList *vnumamemcfg, *vdistancecfg, *vnodemap;
+        int nr_vnuma_regions, nr_vdist, nr_vnodemap;
+        unsigned long long vnuma_memparsed = 0;
+        unsigned int dist;
+        unsigned long ul;
+        
         xlu_cfg_replace_string (config, "kernel", &b_info->u.pv.kernel, 0);
 
         xlu_cfg_get_string (config, "root", &root, 0);
@@ -924,6 +1012,210 @@ static void parse_config_data(const char *config_source,
             exit(1);
         }
 
+        if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) {
+            b_info->nr_vnodes = l;
+            if (b_info->nr_vnodes != 0 && b_info->max_vcpus >= 
b_info->nr_vnodes) {
+                if (!xlu_cfg_get_list(config, "vnumamem", &vnumamemcfg, 
&nr_vnuma_regions, 0)) {
+                    /* 
+                     * If number of regions parsed != number of nodes, check
+                     * the memory configuration anyways and if its ok we 
adjust total 
+                     * number of nodes. The memory parsed is in MBytes. 
+                     */
+                    b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
+                                                sizeof(*b_info->vnuma_memszs));
+                    if (b_info->vnuma_memszs == NULL) {
+                        fprintf(stderr, "unable to allocate memory for vnuma 
ranges\n");
+                        exit(1);
+                    }
+                    char *ep;
+                    for (i = 0; i < nr_vnuma_regions; i++) {
+                        buf = xlu_cfg_get_listitem(vnumamemcfg, i);
+                        if (!buf) {
+                            fprintf(stderr,
+                                    "xl: Unable to get element %d in vnuma 
memroy list\n", i);
+                            break;
+                        }
+                        ul = strtoul(buf, &ep, 10);
+                        if (ep == buf) {
+                            fprintf(stderr,
+                                    "xl: Invalid argument parsing vnumamem: 
%s\n", buf);
+                            break;
+                        }
+                        if (ul >= UINT32_MAX) {
+                            fprintf(stderr, "xl: vnuma memory range %lx is too 
big\n", ul);
+                            break;
+                        }
+                        /* memory in MBytes */
+                        b_info->vnuma_memszs[i] = ul;
+                    }
+                    for(i = 0; i < nr_vnuma_regions; i++)
+                        vnuma_memparsed = vnuma_memparsed + 
(b_info->vnuma_memszs[i]);
+                    /* Now we have all inputs. Check for misconfigurations and 
adjust if needed */
+                    /* Amount of memory for vnodes same as total? */
+                    if((vnuma_memparsed << 10) == (b_info->max_memkb)) {
+                        if(b_info->nr_vnodes != nr_vnuma_regions)
+                        {
+                            fprintf(stderr, "xl: vnuma nr_vnodes looks 
incorrect, adjusting to %d \n", nr_vnuma_regions);
+                            b_info->nr_vnodes = nr_vnuma_regions;
+                        }
+                    }
+                    else
+                    {
+                        fprintf(stderr, "WARNING: vNUMA memory %llu Kbytes is 
%s than total memory.\
+                                Each vnode will get equal memory size of %lu 
Kbytes\n",
+                                vnuma_memparsed << 10,
+                                (vnuma_memparsed << 10)> b_info->max_memkb ? 
"more" : "less",
+                                b_info->max_memkb / b_info->nr_vnodes);
+                        vnuma_memparsed = (b_info->max_memkb >> 10) / 
b_info->nr_vnodes;
+                        unsigned long n;
+                        /* reminder in MBytes */ 
+                        n = (b_info->max_memkb >> 10) % b_info->nr_vnodes;
+                        /* get final sizes in MBytes */
+                        for(i = 0; i < (b_info->nr_vnodes - 1); i++)
+                            b_info->vnuma_memszs[i] = vnuma_memparsed;
+                        /* add the reminder to the last node */
+                        b_info->vnuma_memszs[i] = vnuma_memparsed + n;
+                    }
+                }
+                else
+                {   
+                    b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
+                                                sizeof(*b_info->vnuma_memszs));
+                    if (b_info->vnuma_memszs == NULL) {
+                        fprintf(stderr, "unable to allocate memory for vnuma 
ranges\n");
+                        exit(1);
+                    }
+                    fprintf(stderr, "WARNING: vNUMA memory ranges were not 
specified,\
+                            using default equal vnode memory size %lu Kbytes 
to cover %lu Kbytes.\n", 
+                            b_info->max_memkb / b_info->nr_vnodes,
+                            b_info->max_memkb);
+                    vnuma_memparsed = b_info->max_memkb / b_info->nr_vnodes;
+                    unsigned long n;
+                    vnuma_memparsed = (b_info->max_memkb >> 10) / 
b_info->nr_vnodes;
+                    /* reminder in MBytes */ 
+                    n = (b_info->max_memkb >> 10) % b_info->nr_vnodes;
+                    /* get final sizes in MBytes */
+                    for(i = 0; i < (b_info->nr_vnodes - 1); i++)
+                        b_info->vnuma_memszs[i] = vnuma_memparsed;
+                    b_info->vnuma_memszs[i] = vnuma_memparsed + n;
+                }
+                if(!xlu_cfg_get_list(config, "vdistance", &vdistancecfg, 
&nr_vdist, 0)) {
+                   b_info->vdistance = calloc(b_info->nr_vnodes * 
b_info->nr_vnodes, 
+                                        sizeof(*b_info->vdistance));
+                   if (b_info->vdistance == NULL) {
+                          vnuma_info_release(b_info); 
+                          exit(1);
+                   }
+                   vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 
20);
+                   /*
+                    * If only two elements are in the list, consider first as 
value for
+                    * same node distance, the second as the rest of distances.
+                    */
+                   if (nr_vdist == 2) {
+                        int d1, d2;
+                        d1 = get_list_item_uint(vdistancecfg, 0);
+                        d2 = get_list_item_uint(vdistancecfg, 1); 
+                        if (d1 >= 0 && d2 >= 0)
+                            vdistance_default(b_info->vdistance, 
b_info->nr_vnodes, d1, d2); 
+                        else
+                            vdistance_default(b_info->vdistance, 
b_info->nr_vnodes, 10, 20);
+                   }
+                   else {
+                       if(nr_vdist < (b_info->nr_vnodes * b_info->nr_vnodes)) {
+                           /* 
+                            * not all distances were specified, use default 
values and set the ones, 
+                            * user did specify
+                            */
+                           vdistance_default(b_info->vdistance, 
b_info->nr_vnodes, 10, 20);
+                           for (i = 0; i < nr_vdist; i++) {
+                                dist = get_list_item_uint(vdistancecfg, i);
+                                if (dist >= 0)
+                                    b_info->vdistance[i] = dist;
+                                else {
+                                    fprintf(stderr, "WARNING: vNUMA distance 
was not correctly specified\n");
+                                    vnuma_info_release(b_info); 
+                                    exit(1);
+                                }
+                           }
+                       } 
+                       else {   
+                           for (i = 0; i < b_info->nr_vnodes * 
b_info->nr_vnodes; i++) {
+                                dist = get_list_item_uint(vdistancecfg, i);
+                                if (dist > 0)
+                                    b_info->vdistance[i] = dist;
+                                else {
+                                    fprintf(stderr, "WARNING: vNUMA distance 
was not correctly specified\n");
+                                    vnuma_info_release(b_info);
+                                    exit(1);
+                                }
+                            }
+                       } 
+                    }
+                }
+                else {
+                    b_info->vdistance = (unsigned int 
*)calloc(b_info->nr_vnodes * b_info->nr_vnodes, 
+                        sizeof(*b_info->vdistance));
+                    if (b_info->vdistance == NULL) {
+                        vnuma_info_release(b_info);
+                        exit(1);
+                    }
+                    vdistance_default(b_info->vdistance, b_info->nr_vnodes, 
10, 20);
+                }
+                    
+                b_info->vcpu_to_vnode = (unsigned int 
*)calloc(b_info->max_vcpus,
+                                                                    
sizeof(*b_info->vcpu_to_vnode));
+                if (b_info->vcpu_to_vnode == NULL) 
+                    exit(1);
+                if (!xlu_cfg_get_string(config, "vnuma_vcpumap", &buf, 0)) {
+                    char *buf2 = strdup(buf);
+                    if (vcputovnode_parse(buf2, b_info->vcpu_to_vnode,
+                                            b_info->nr_vnodes, 
b_info->max_vcpus) < 0) {
+                        vcputovnode_default(b_info->vcpu_to_vnode,
+                                            b_info->nr_vnodes,
+                                            b_info->max_vcpus);
+                    }
+                    if(buf2) free(buf2);
+                }
+                else
+                    vcputovnode_default(b_info->vcpu_to_vnode, 
b_info->nr_vnodes, b_info->max_vcpus);
+               
+                if (!xlu_cfg_get_list(config, "vnuma_vnodemap", &vnodemap, 
&nr_vnodemap, 0)) {
+                   /* 
+                   * In case vnode to pnode is not correct, the map will 
defined later
+                   * based on the machine architecture, configuration and 
memory availble
+                   */
+                   if (nr_vnodemap == b_info->nr_vnodes) {
+                       b_info->vnode_to_pnode = (unsigned int 
*)calloc(b_info->nr_vnodes, 
+                                                    
sizeof(*b_info->vnode_to_pnode));
+                       if (b_info->vnode_to_pnode == NULL) {
+                            vnuma_info_release(b_info);
+                            exit(1);
+                       }
+                       
+                       unsigned int vnodemask = 0, pnode, smask;
+                       smask = ~(~0 << nr_vnodemap);
+                       for (i = 0; i < nr_vnodemap; i++) {
+                           pnode = get_list_item_uint(vnodemap, i); 
+                           if (pnode >= 0) {
+                               vnodemask |= (1 << i);
+                               b_info->vnode_to_pnode[i] = pnode;
+                           }
+                       }
+                       /* Did it covered all vnodes in the mask? */
+                       if ( !((vnodemask & smask) + 1) == (1 << nr_vnodemap) ) 
{
+                           fprintf(stderr, "WARNING: Not all vnodes were 
covered in vnodemap\n");
+                           free(b_info->vnode_to_pnode);
+                           b_info->vnode_to_pnode = NULL;
+                       }
+                   }
+                }
+            }
+            else 
+                b_info->nr_vnodes=0;
+        }
+        else
+            b_info->nr_vnodes = 0;
+        
         xlu_cfg_replace_string (config, "bootloader", 
&b_info->u.pv.bootloader, 0);
         switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args",
                                       &b_info->u.pv.bootloader_args, 1))
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.