[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4 4/7] xl: vnuma distance, vcpu and pnode masks parser
Parses vm config options vdistance, vcpu_to_vnuma mask and vnode_to_pnode mask. If not configures, uses default settings. Includes documentation about vnuma topology config parameters in xl.cfg. Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> --- docs/man/xl.cfg.pod.5 | 60 +++++++++++++++++++ tools/libxl/xl_cmdimpl.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 index 3b227b7..ccc25de 100644 --- a/docs/man/xl.cfg.pod.5 +++ b/docs/man/xl.cfg.pod.5 @@ -216,6 +216,66 @@ if the values of B<memory=> and B<maxmem=> differ. A "pre-ballooned" HVM guest needs a balloon driver, without a balloon driver it will crash. +=item B<vnuma_nodes=N> + +Number of vNUMA nodes the guest will be initialized with on boot. + +=item B<vnuma_mem=[vmem1, vmem2, ...]> + +The vnode memory sizes defined in MBytes. If the sum of all vnode memories +does not match the domain memory or not all the nodes defined here, will fail. +If not specified, memory will be equally split between vnodes. Currently +minimum vnode size is 64MB. + +Example: vnuma_mem=[1024, 1024, 2048, 2048] + +=item B<vdistance=[d1, d2]> + +Defines the distance table for vNUMA nodes. Distance for NUMA machines usually + represented by two dimensional array and all distance may be spcified in one +line here, by rows. Distance can be specified as two numbers [d1, d2], +where d1 is same node distance, d2 is a value for all other distances. +If not specified, the defaul distance will be used, e.g. [10, 20]. + +Examples: +vnodes = 3 +vdistance=[10, 20] +will expand to this distance table (this is default setting as well): +[10, 20, 20] +[20, 10, 20] +[20, 20, 10] + +=item B<vnuma_vcpumap=[vcpu1, vcpu2, ...]> + +Defines vcpu to vnode mapping as a string of integers, representing node +numbers. If not defined, the vcpus are interleaved over the virtual nodes. +Current limitation: vNUMA nodes have to have at least one vcpu, otherwise +default vcpu_to_vnode will be used. + +Example: +to map 4 vcpus to 2 nodes - 0,1 vcpu -> vnode1, 2,3 vcpu -> vnode2: +vnuma_vcpumap = [0, 0, 1, 1] + +=item B<vnuma_vnodemap=[p1, p2, ..., pn]> + +vnode to pnode mapping. Can be configured if manual vnode allocation +required. Will be only taken into effect on real NUMA machines and if +memory or other constraints do not prevent it. If the mapping is ok, +automatic NUMA placement will be disabled. If the mapping incorrect +and vnuma_autoplacement is true, automatical numa placement will be used, +otherwise fails to create domain. + +Example: +assume two node NUMA node machine: +vnuma_vndoemap=[1, 0] +first vnode will be placed on node 1, second on node0. + +=item B<vnuma_autoplacement=[0|1]> + +If enabled, automatically will find the best placement physical node candidate for +each vnode if vnuma_vnodemap is incorrect or memory requirements prevent +using it. Set to '0' by default. + =back =head3 Event Actions diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index c79e73e..f6a7774 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -633,6 +633,23 @@ static void vnuma_info_release(libxl_domain_build_info *info) info->nr_vnodes = 0; } +static int get_list_item_uint(XLU_ConfigList *list, unsigned int i) +{ + const char *buf; + char *ep; + unsigned long ul; + int rc = -EINVAL; + buf = xlu_cfg_get_listitem(list, i); + if (!buf) + return rc; + ul = strtoul(buf, &ep, 10); + if (ep == buf) + return rc; + if (ul >= UINT16_MAX) + return rc; + return (int)ul; +} + static void vdistance_default(unsigned int *vdistance, unsigned int nr_vnodes, unsigned int samenode, @@ -1090,7 +1107,9 @@ static void parse_config_data(const char *config_source, const char *root = NULL, *extra = ""; XLU_ConfigList *vnumamemcfg; + XLU_ConfigList *vdistancecfg, *vnodemap, *vcpumap; int nr_vnuma_regions; + int nr_vdist, nr_vnodemap; unsigned long long vnuma_memparsed = 0; unsigned long ul; @@ -1198,6 +1217,133 @@ static void parse_config_data(const char *config_source, exit(1); } } + + b_info->vdistance = calloc(b_info->nr_vnodes * b_info->nr_vnodes, + sizeof(*b_info->vdistance)); + if (b_info->vdistance == NULL) { + vnuma_info_release(b_info); + exit(1); + } + + if(!xlu_cfg_get_list(config, "vdistance", &vdistancecfg, &nr_vdist, 0) && + nr_vdist == 2) { + /* + * First value is the same node distance, the second as the + * rest of distances. The following is required right now to + * avoid non-symmetrical distance table as it may break latest kernel. + * TODO: Better way to analyze extended distance table, possibly + * OS specific. + */ + int d1, d2; + d1 = get_list_item_uint(vdistancecfg, 0); + d2 = get_list_item_uint(vdistancecfg, 1); + + if (d1 >= 0 && d2 >= 0 && d1 < d2) { + vdistance_default(b_info->vdistance, b_info->nr_vnodes, d1, d2); + } else { + fprintf(stderr, "WARNING: Distances are not correct.\n"); + vnuma_info_release(b_info); + exit(1); + } + + } else + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 20); + + b_info->vcpu_to_vnode = (unsigned int *)calloc(b_info->max_vcpus, + sizeof(*b_info->vcpu_to_vnode)); + if (b_info->vcpu_to_vnode == NULL) { + vnuma_info_release(b_info); + exit(1); + } + nr_vnodemap = 0; + if (!xlu_cfg_get_list(config, "vnuma_vcpumap", + &vcpumap, &nr_vnodemap, 0)) { + if (nr_vnodemap == b_info->max_vcpus) { + unsigned int vnodemask = 0, vnode = 0, smask, vmask; + smask = ~(~0 << b_info->nr_vnodes); + vmask = ~(~0 << nr_vnodemap); + for (i = 0; i < nr_vnodemap; i++) { + vnode = get_list_item_uint(vcpumap, i); + if (vnode >= 0 && vnode < b_info->nr_vnodes) { + vnodemask |= (1 << vnode); + b_info->vcpu_to_vnode[i] = vnode; + } + } + /* Did it covered all vnodes in the vcpu mask? */ + if ( !(((smask & vnodemask) + 1) == (1 << b_info->nr_vnodes)) ) { + fprintf(stderr, "WARNING: Not all vnodes were covered in vnuma_vcpumap.\n"); + vnuma_info_release(b_info); + exit(1); + } + } else { + fprintf(stderr, "WARNING: Bad vnuma_vcpumap.\n"); + vnuma_info_release(b_info); + exit(1); + } + } + else + vcputovnode_default(b_info->vcpu_to_vnode, + b_info->nr_vnodes, + b_info->max_vcpus); + + /* There is mapping to NUMA physical nodes? */ + b_info->vnode_to_pnode = (unsigned int *)calloc(b_info->nr_vnodes, + sizeof(*b_info->vnode_to_pnode)); + if (b_info->vnode_to_pnode == NULL) { + vnuma_info_release(b_info); + exit(1); + } + nr_vnodemap = 0; + if (!xlu_cfg_get_list(config, "vnuma_vnodemap", &vnodemap, + &nr_vnodemap, 0)) { + /* + * If not specified or incorred, will be defined + * later based on the machine architecture, configuration + * and memory availble when creating domain. + */ + if (nr_vnodemap == b_info->nr_vnodes) { + unsigned int vnodemask = 0, pnode = 0, smask; + smask = ~(~0 << b_info->nr_vnodes); + for (i = 0; i < b_info->nr_vnodes; i++) { + pnode = get_list_item_uint(vnodemap, i); + if (pnode >= 0) { + vnodemask |= (1 << i); + b_info->vnode_to_pnode[i] = pnode; + } + } + /* Did it covered all vnodes in the mask? */ + if ( !(((vnodemask & smask) + 1) == (1 << nr_vnodemap)) ) { + fprintf(stderr, "WARNING: Not all vnodes were covered vnuma_vnodemap.\n"); + + if (libxl_defbool_val(b_info->vnuma_placement)) { + fprintf(stderr, "Automatic placement will be used for vnodes.\n"); + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_vnodes); + } else { + vnuma_info_release(b_info); + exit(1); + } + } + } else { + fprintf(stderr, "WARNING: Incorrect vnuma_vnodemap.\n"); + if (libxl_defbool_val(b_info->vnuma_placement)) { + fprintf(stderr, "Automatic placement will be used for vnodes.\n"); + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_vnodes); + } else { + vnuma_info_release(b_info); + exit(1); + } + } + } else { + fprintf(stderr, "WARNING: Missing vnuma_vnodemap.\n"); + + if (libxl_defbool_val(b_info->vnuma_placement)) { + fprintf(stderr, "Automatic placement will be used for vnodes.\n"); + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_vnodes); + } else { + vnuma_info_release(b_info); + exit(1); + } + } } else if (vnuma_zero_config(b_info)) { -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |