[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions
On Wed, Dec 04, 2013 at 12:47:11AM -0500, Elena Ufimtseva wrote: > Parses vnuma topoplogy number of nodes and memory > ranges. If not defined, initializes vnuma with > only one node and default topology. > > Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> > --- > Changes since v3: > - added subop hypercall to retrive number of vnodes > and vcpus for domain to make correct allocations before > requesting vnuma topology. > --- > tools/libxl/libxl_types.idl | 6 +- > tools/libxl/libxl_vnuma.h | 11 ++ > tools/libxl/xl_cmdimpl.c | 234 > +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 250 insertions(+), 1 deletion(-) > create mode 100644 tools/libxl/libxl_vnuma.h > > diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > index cba8eff..ba46f58 100644 > --- a/tools/libxl/libxl_types.idl > +++ b/tools/libxl/libxl_types.idl > @@ -311,7 +311,11 @@ libxl_domain_build_info = Struct("domain_build_info",[ > ("disable_migrate", libxl_defbool), > ("cpuid", libxl_cpuid_policy_list), > ("blkdev_start", string), > - > + ("vnuma_memszs", Array(uint64, "nr_vnodes")), > + ("vcpu_to_vnode", Array(uint32, "nr_vnodemap")), > + ("vdistance", Array(uint32, "nr_vdist")), > + ("vnode_to_pnode", Array(uint32, "nr_vnode_to_pnode")), > + ("vnuma_placement", libxl_defbool), Are those 'v' prefixes needed? > ("device_model_version", libxl_device_model_version), > ("device_model_stubdomain", libxl_defbool), > # if you set device_model you must set device_model_version too > diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h > new file mode 100644 > index 0000000..f1568ae > --- /dev/null > +++ b/tools/libxl/libxl_vnuma.h > @@ -0,0 +1,11 @@ > +#include "libxl_osdeps.h" /* must come before any other headers */ > + > +#define VNUMA_NO_NODE ~((unsigned int)0) > + > +/* > + * Max vNUMA node size in Mb is taken 64Mb even now Linux lets > + * 32Mb, thus letting some slack. Will be modified to match Linux. -EPARSE :-) > + */ > +#define MIN_VNODE_SIZE 64U > + > +#define MAX_VNUMA_NODES (unsigned int)1 << 10 > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index 341863e..c79e73e 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -40,6 +40,7 @@ > #include "libxl_json.h" > #include "libxlutil.h" > #include "xl.h" > +#include "libxl_vnuma.h" > > #define CHK_ERRNO( call ) ({ \ > int chk_errno = (call); \ > @@ -622,6 +623,134 @@ vcpp_out: > return rc; > } > > +/* Should exit after calling this */ /* Caller should exit after calling this. */ > +static void vnuma_info_release(libxl_domain_build_info *info) > +{ > + free(info->vnuma_memszs); > + free(info->vdistance); > + free(info->vcpu_to_vnode); > + free(info->vnode_to_pnode); > + info->nr_vnodes = 0; > +} > + > +static void vdistance_default(unsigned int *vdistance, > + unsigned int nr_vnodes, > + unsigned int samenode, > + unsigned int othernode) > +{ > + int i, j; > + for (i = 0; i < nr_vnodes; i++) For clarity I would replace 'i' with 'idx' and 'j' with 'slot'. Or leave 'j' alone, but just change 'i' to 'idx' > + for (j = 0; j < nr_vnodes; j++) > + *(vdistance + j * nr_vnodes + i) = i == j ? samenode : othernode; > +} > + > +static void vcputovnode_default(unsigned int *vcpu_to_vnode, > + unsigned int nr_vnodes, > + unsigned int max_vcpus) > +{ > + int i; > + for(i = 0; i < max_vcpus; i++) You editor ate a space :-) > + vcpu_to_vnode[i] = i % nr_vnodes; > +} > + > +/* Split domain memory between vNUMA nodes equally */ > +static int split_vnumamem(libxl_domain_build_info *b_info) > +{ > + unsigned long long vnodemem = 0; > + unsigned long n; > + unsigned int i; > + > + /* In MBytes */ > + vnodemem = (b_info->max_memkb >> 10) / b_info->nr_vnodes; If b_info->nr_vnodes is zero you are going to have a problem. > + if (vnodemem < MIN_VNODE_SIZE) > + return -1; > + /* reminder in MBytes */ > + n = (b_info->max_memkb >> 10) % b_info->nr_vnodes; > + /* get final sizes in MBytes */ > + for(i = 0; i < (b_info->nr_vnodes - 1); i++) Your editor is really hungry. Another space! > + b_info->vnuma_memszs[i] = vnodemem; > + /* add the reminder to the last node */ > + b_info->vnuma_memszs[i] = vnodemem + n; > + return 0; > +} > + > +static void vnode_to_pnode_default(unsigned int *vnode_to_pnode, > + unsigned int nr_vnodes) > +{ > + unsigned int i; > + for (i = 0; i < nr_vnodes; i++) > + vnode_to_pnode[i] = VNUMA_NO_NODE; > +} > + > +/* > + * vNUMA zero config initialization for every pv domain that has > + * no vnuma defined in config file. > + */ > +static int vnuma_zero_config(libxl_domain_build_info *b_info) > +{ > + b_info->nr_vnodes = 1; > + /* dont leak memory with realloc */ ? Right? > + unsigned int *vdist, *vntop, *vcputov; > + uint64_t *memsz; > + > + /* all memory goes to this one vnode */ > + memsz = b_info->vnuma_memszs; > + b_info->vnuma_memszs = (uint64_t *)realloc(b_info->vnuma_memszs, > + b_info->nr_vnodes * > + sizeof(*b_info->vnuma_memszs)); > + if (b_info->vnuma_memszs == NULL) { > + b_info->vnuma_memszs = memsz; > + goto bad_vnumazerocfg; > + } > + b_info->vnuma_memszs[0] = b_info->max_memkb >> 10; > + > + /* all vcpus assigned to this vnode */ > + vcputov = b_info->vcpu_to_vnode; Perhaps call it 'old_vnode' ? > + b_info->vcpu_to_vnode = (unsigned int *)realloc( > + b_info->vcpu_to_vnode, > + b_info->max_vcpus * > + sizeof(*b_info->vcpu_to_vnode)); > + if (b_info->vcpu_to_vnode == NULL) { > + b_info->vcpu_to_vnode = vcputov; > + goto bad_vnumazerocfg; > + } > + vcputovnode_default(b_info->vcpu_to_vnode, > + b_info->nr_vnodes, > + b_info->max_vcpus); > + > + /* default vdistance 10 */ > + vdist = b_info->vdistance; And 'old_dist' ? > + b_info->vdistance = (unsigned int *)realloc(b_info->vdistance, > + b_info->nr_vnodes * b_info->nr_vnodes * > + sizeof(*b_info->vdistance)); > + if (b_info->vdistance == NULL) { > + b_info->vdistance = vdist; > + goto bad_vnumazerocfg; > + } > + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 10); > + > + /* VNUMA_NO_NODE for vnode_to_pnode */ > + vntop = b_info->vnode_to_pnode; 'vntop'? old_pnode? > + b_info->vnode_to_pnode = (unsigned int *)realloc(b_info->vnode_to_pnode, > + b_info->nr_vnodes * > + sizeof(*b_info->vnode_to_pnode)); > + if (b_info->vnode_to_pnode == NULL) { > + b_info->vnode_to_pnode = vntop; > + goto bad_vnumazerocfg; > + } > + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_vnodes); > + > + /* > + * will be placed to some physical nodes defined by automatic > + * numa placement or VNUMA_NO_NODE will not request exact node > + */ > + libxl_defbool_set(&b_info->vnuma_placement, true); > + return 0; > + > +bad_vnumazerocfg: Actually your code could make this a bit simpler. p = realloc(...) if (p) b_info->vcpu_to_vnode = p; else goto err_out; p = realloc(..) if (p) b_info->vcpu_to_vnode = p; That way you don't have those 'old_vnode' or 'vntop' and only assign the new value if it worked - instead of doing the rewind. bad_vnumazercfg: > + return -1; > +} > + > static void parse_config_data(const char *config_source, > const char *config_data, > int config_len, > @@ -960,6 +1089,11 @@ static void parse_config_data(const char *config_source, > char *cmdline = NULL; > const char *root = NULL, *extra = ""; > > + XLU_ConfigList *vnumamemcfg; > + int nr_vnuma_regions; > + unsigned long long vnuma_memparsed = 0; > + unsigned long ul; > + > xlu_cfg_replace_string (config, "kernel", &b_info->u.pv.kernel, 0); > > xlu_cfg_get_string (config, "root", &root, 0); > @@ -977,6 +1111,106 @@ static void parse_config_data(const char > *config_source, > exit(1); > } > > + libxl_defbool_set(&b_info->vnuma_placement, false); > + > + if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) { > + if (l > MAX_VNUMA_NODES) { > + fprintf(stderr, "Too many vnuma nodes, max %d is > allowed.\n", MAX_VNUMA_NODES); > + exit(1); > + } > + > + b_info->nr_vnodes = l; > + > + xlu_cfg_get_defbool(config, "vnuma_autoplacement", > &b_info->vnuma_placement, 0); > + > + if (b_info->nr_vnodes != 0 && b_info->max_vcpus >= > b_info->nr_vnodes) { > + if (!xlu_cfg_get_list(config, "vnumamem", > + &vnumamemcfg, &nr_vnuma_regions, 0)) { > + > + if (nr_vnuma_regions != b_info->nr_vnodes) { > + fprintf(stderr, "Number of numa regions is > incorrect.\n"); You could make this be: "Number of numa regions (vnumamem=%d) is incorrect (should be %d)" > + exit(1); > + } > + > + b_info->vnuma_memszs = calloc(b_info->nr_vnodes, > + > sizeof(*b_info->vnuma_memszs)); > + if (b_info->vnuma_memszs == NULL) { > + fprintf(stderr, "unable to allocate memory for vnuma > ranges.\n"); Unable > + exit(1); > + } > + > + char *ep; > + /* > + * Will parse only nr_vnodes times, even if we have > more/less regions. > + * Take care of it later if less or discard if too many > regions. > + */ > + for (i = 0; i < b_info->nr_vnodes; i++) { > + buf = xlu_cfg_get_listitem(vnumamemcfg, i); > + if (!buf) { > + fprintf(stderr, > + "xl: Unable to get element %d in vnuma > memroy list.\n", i); s/memroy/memory/ > + break; > + } > + ul = strtoul(buf, &ep, 10); > + if (ep == buf) { > + fprintf(stderr, > + "xl: Invalid argument parsing vnumamem: > %s.\n", buf); > + break; > + } > + > + /* 32Mb is a min size for a node, taken from Linux */ > + if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) { > + fprintf(stderr, "xl: vnuma memory %lu is not > withing %u - %u range.\n", s/withing/within/ > + ul, MIN_VNODE_SIZE, UINT32_MAX); > + break; > + } > + > + /* memory in MBytes */ > + b_info->vnuma_memszs[i] = ul; > + } > + > + /* Total memory for vNUMA parsed to verify */ > + for(i = 0; i < nr_vnuma_regions; i++) Editor hungry :-) > + vnuma_memparsed = vnuma_memparsed + > (b_info->vnuma_memszs[i]); > + > + /* Amount of memory for vnodes same as total? */ > + if((vnuma_memparsed << 10) != (b_info->max_memkb)) { Ditto. > + fprintf(stderr, "xl: vnuma memory is not the same as > initial domain memory size.\n"); s/initial domain/domain/ > + vnuma_info_release(b_info); > + exit(1); > + } > + } else { > + b_info->vnuma_memszs = calloc(b_info->nr_vnodes, > + > sizeof(*b_info->vnuma_memszs)); > + if (b_info->vnuma_memszs == NULL) { > + vnuma_info_release(b_info); > + fprintf(stderr, "unable to allocate memory for vnuma > ranges.\n"); > + exit(1); > + } > + > + fprintf(stderr, "WARNING: vNUMA memory ranges were not > specified.\n"); > + fprintf(stderr, "Using default equal vnode memory size > %lu Kbytes to cover %lu Kbytes.\n", > + b_info->max_memkb / b_info->nr_vnodes, > b_info->max_memkb); > + > + if (split_vnumamem(b_info) < 0) { > + fprintf(stderr, "Could not split vnuma memory into > equal chunks.\n"); > + vnuma_info_release(b_info); > + exit(1); > + } > + } > + } > + else > + if (vnuma_zero_config(b_info)) { > + vnuma_info_release(b_info); > + exit(1); > + } > + } > + else > + if (vnuma_zero_config(b_info)) { > + vnuma_info_release(b_info); > + exit(1); > + } > + > xlu_cfg_replace_string (config, "bootloader", > &b_info->u.pv.bootloader, 0); > switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args", > &b_info->u.pv.bootloader_args, 1)) > -- > 1.7.10.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |