Xen project Mailing List

Re: [Xen-devel] [PATCH v6 05/10] libxl: vnuma topology configuration parser and doc

On Fri, Jul 18, 2014 at 6:53 AM, Wei Liu <wei.liu2@xxxxxxxxxx> wrote: > On Fri, Jul 18, 2014 at 01:50:04AM -0400, Elena Ufimtseva wrote: >> Parses vnuma topoplogy number of nodes and memory >> ranges. If not defined, initializes vnuma with >> only one node and default topology. This one node covers >> all domain memory and all vcpus assigned to it. >> >> Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> >> --- >> docs/man/xl.cfg.pod.5 | 77 ++++++++ >> tools/libxl/libxl_types.idl | 6 +- >> tools/libxl/libxl_vnuma.h | 8 + >> tools/libxl/xl_cmdimpl.c | 425 >> +++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 515 insertions(+), 1 deletion(-) >> create mode 100644 tools/libxl/libxl_vnuma.h >> >> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 >> index ff9ea77..0c7fbf8 100644 >> --- a/docs/man/xl.cfg.pod.5 >> +++ b/docs/man/xl.cfg.pod.5 >> @@ -242,6 +242,83 @@ if the values of B<memory=> and B<maxmem=> differ. >> A "pre-ballooned" HVM guest needs a balloon driver, without a balloon driver >> it will crash. >> >> +=item B<vnuma_nodes=N> >> + >> +Number of vNUMA nodes the guest will be initialized with on boot. >> +PV guest by default will have one vnuma node. >> + > > In the future, all these config options will be used for HVM / PVH > guests as well. But I'm fine with leaving it as is for the moment. Sure! > > [...] >> >> =head3 Event Actions >> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl >> index de25f42..5876822 100644 >> --- a/tools/libxl/libxl_types.idl >> +++ b/tools/libxl/libxl_types.idl >> @@ -318,7 +318,11 @@ libxl_domain_build_info = Struct("domain_build_info",[ >> ("disable_migrate", libxl_defbool), >> ("cpuid", libxl_cpuid_policy_list), >> ("blkdev_start", string), >> - >> + ("vnuma_mem", Array(uint64, "nr_nodes")), >> + ("vnuma_vcpumap", Array(uint32, "nr_nodemap")), >> + ("vdistance", Array(uint32, "nr_dist")), >> + ("vnuma_vnodemap", Array(uint32, "nr_node_to_pnode")), > > The main problem here is that we need to name counter variables > num_VARs. See idl.txt, idl.Array section. Ok, I will rename these. > >> + ("vnuma_autoplacement", libxl_defbool), >> ("device_model_version", libxl_device_model_version), >> ("device_model_stubdomain", libxl_defbool), >> # if you set device_model you must set device_model_version too >> diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h >> new file mode 100644 >> index 0000000..4ff4c57 >> --- /dev/null >> +++ b/tools/libxl/libxl_vnuma.h >> @@ -0,0 +1,8 @@ >> +#include "libxl_osdeps.h" /* must come before any other headers */ >> + >> +#define VNUMA_NO_NODE ~((unsigned int)0) >> + >> +/* Max vNUMA node size from Linux. */ > > Should be "Min" I guess. > >> +#define MIN_VNODE_SIZE 32U >> + >> +#define MAX_VNUMA_NODES (unsigned int)1 << 10 > > Does this also come from Linux? Or is it just some arbitrary number? > Worth a comment here. Yes, this comes from max NODE_SHIFT from arch/x86/Kconfig. I will comment on this in the code. > >> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c >> index 68df548..5d91c2c 100644 >> --- a/tools/libxl/xl_cmdimpl.c >> +++ b/tools/libxl/xl_cmdimpl.c >> @@ -40,6 +40,7 @@ >> #include "libxl_json.h" >> #include "libxlutil.h" >> #include "xl.h" >> +#include "libxl_vnuma.h" >> >> /* For calls which return an errno on failure */ >> #define CHK_ERRNOVAL( call ) ({ \ >> @@ -690,6 +691,423 @@ static void parse_top_level_sdl_options(XLU_Config >> *config, >> xlu_cfg_replace_string (config, "xauthority", &sdl->xauthority, 0); >> } >> >> + >> +static unsigned int get_list_item_uint(XLU_ConfigList *list, unsigned int i) >> +{ >> + const char *buf; >> + char *ep; >> + unsigned long ul; >> + int rc = -EINVAL; >> + >> + buf = xlu_cfg_get_listitem(list, i); >> + if (!buf) >> + return rc; >> + ul = strtoul(buf, &ep, 10); >> + if (ep == buf) >> + return rc; >> + if (ul >= UINT16_MAX) >> + return rc; >> + return (unsigned int)ul; >> +} >> + >> +static void vdistance_set(unsigned int *vdistance, >> + unsigned int nr_vnodes, >> + unsigned int samenode, >> + unsigned int othernode) >> +{ >> + unsigned int idx, slot; >> + for (idx = 0; idx < nr_vnodes; idx++) >> + for (slot = 0; slot < nr_vnodes; slot++) >> + *(vdistance + slot * nr_vnodes + idx) = >> + idx == slot ? samenode : othernode; >> +} >> + >> +static void vcputovnode_default(unsigned int *cpu_to_node, >> + unsigned int nr_vnodes, >> + unsigned int max_vcpus) >> +{ >> + unsigned int cpu; >> + for (cpu = 0; cpu < max_vcpus; cpu++) >> + cpu_to_node[cpu] = cpu % nr_vnodes; >> +} >> + >> +/* Split domain memory between vNUMA nodes equally. */ >> +static int split_vnumamem(libxl_domain_build_info *b_info) >> +{ >> + unsigned long long vnodemem = 0; >> + unsigned long n; >> + unsigned int i; >> + >> + if (b_info->nr_nodes == 0) >> + return -1; >> + >> + vnodemem = (b_info->max_memkb >> 10) / b_info->nr_nodes; >> + if (vnodemem < MIN_VNODE_SIZE) >> + return -1; >> + /* reminder in MBytes. */ >> + n = (b_info->max_memkb >> 10) % b_info->nr_nodes; >> + /* get final sizes in MBytes. */ >> + for (i = 0; i < (b_info->nr_nodes - 1); i++) >> + b_info->vnuma_mem[i] = vnodemem; >> + /* add the reminder to the last node. */ >> + b_info->vnuma_mem[i] = vnodemem + n; >> + return 0; >> +} >> + >> +static void vnuma_vnodemap_default(unsigned int *vnuma_vnodemap, >> + unsigned int nr_vnodes) >> +{ >> + unsigned int i; >> + for (i = 0; i < nr_vnodes; i++) >> + vnuma_vnodemap[i] = VNUMA_NO_NODE; >> +} >> + >> +/* >> + * init vNUMA to "zero config" with one node and all other >> + * topology parameters set to default. >> + */ >> +static int vnuma_zero_config(libxl_domain_build_info *b_info) >> +{ > > Haven't looked into details of this function, but I think this should be > renamed to vnuma_default_config, from the reading of comment. > >> + b_info->nr_nodes = 1; >> + /* all memory goes to this one vnode, as well as vcpus. */ >> + if (!(b_info->vnuma_mem = (uint64_t *)calloc(b_info->nr_nodes, >> + sizeof(*b_info->vnuma_mem)))) >> + goto bad_vnumazerocfg; >> + >> + if (!(b_info->vnuma_vcpumap = (unsigned int *)calloc(b_info->max_vcpus, >> + sizeof(*b_info->vnuma_vcpumap)))) >> + goto bad_vnumazerocfg; >> + >> + if (!(b_info->vdistance = (unsigned int *)calloc(b_info->nr_nodes * >> + b_info->nr_nodes, >> sizeof(*b_info->vdistance)))) >> + goto bad_vnumazerocfg; >> + >> + if (!(b_info->vnuma_vnodemap = (unsigned int *)calloc(b_info->nr_nodes, >> + sizeof(*b_info->vnuma_vnodemap)))) >> + goto bad_vnumazerocfg; >> + >> + b_info->vnuma_mem[0] = b_info->max_memkb >> 10; >> + >> + /* all vcpus assigned to this vnode. */ >> + vcputovnode_default(b_info->vnuma_vcpumap, b_info->nr_nodes, >> + b_info->max_vcpus); >> + >> + /* default vdistance is 10. */ >> + vdistance_set(b_info->vdistance, b_info->nr_nodes, 10, 10); >> + >> + /* VNUMA_NO_NODE for vnode_to_pnode. */ >> + vnuma_vnodemap_default(b_info->vnuma_vnodemap, b_info->nr_nodes); >> + >> + /* >> + * will be placed to some physical nodes defined by automatic >> + * numa placement or VNUMA_NO_NODE will not request exact node. >> + */ >> + libxl_defbool_set(&b_info->vnuma_autoplacement, true); >> + return 0; >> + >> + bad_vnumazerocfg: >> + return -1; >> +} >> + >> +static void free_vnuma_info(libxl_domain_build_info *b_info) >> +{ >> + free(b_info->vnuma_mem); >> + free(b_info->vdistance); >> + free(b_info->vnuma_vcpumap); >> + free(b_info->vnuma_vnodemap); >> + b_info->nr_nodes = 0; >> +} >> + >> +static int parse_vnuma_mem(XLU_Config *config, >> + libxl_domain_build_info **b_info) >> +{ >> + libxl_domain_build_info *dst; >> + XLU_ConfigList *vnumamemcfg; >> + int nr_vnuma_regions, i; >> + unsigned long long vnuma_memparsed = 0; >> + unsigned long ul; >> + const char *buf; >> + >> + dst = *b_info; >> + if (!xlu_cfg_get_list(config, "vnuma_mem", >> + &vnumamemcfg, &nr_vnuma_regions, 0)) { >> + >> + if (nr_vnuma_regions != dst->nr_nodes) { >> + fprintf(stderr, "Number of numa regions (vnumamem = %d) is \ >> + incorrect (should be %d).\n", nr_vnuma_regions, >> + dst->nr_nodes); >> + goto bad_vnuma_mem; >> + } >> + >> + dst->vnuma_mem = calloc(dst->nr_nodes, >> + sizeof(*dst->vnuma_mem)); >> + if (dst->vnuma_mem == NULL) { >> + fprintf(stderr, "Unable to allocate memory for vnuma >> ranges.\n"); >> + goto bad_vnuma_mem; >> + } >> + >> + char *ep; >> + /* >> + * Will parse only nr_vnodes times, even if we have more/less >> regions. >> + * Take care of it later if less or discard if too many regions. >> + */ >> + for (i = 0; i < dst->nr_nodes; i++) { >> + buf = xlu_cfg_get_listitem(vnumamemcfg, i); >> + if (!buf) { >> + fprintf(stderr, >> + "xl: Unable to get element %d in vnuma memory >> list.\n", i); >> + if (vnuma_zero_config(dst)) >> + goto bad_vnuma_mem; >> + >> + } > > I think we should fail here instead of creating "default" config. See > the reasoning I made on hypervisor side. Yes, I see your point. I replied there as well that we just can have both to drop to default/zero node config. > >> + ul = strtoul(buf, &ep, 10); >> + if (ep == buf) { >> + fprintf(stderr, "xl: Invalid argument parsing vnumamem: >> %s.\n", buf); >> + if (vnuma_zero_config(dst)) >> + goto bad_vnuma_mem; >> + } >> + > > Ditto. > >> + /* 32Mb is a min size for a node, taken from Linux */ >> + if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) { >> + fprintf(stderr, "xl: vnuma memory %lu is not within %u - %u >> range.\n", >> + ul, MIN_VNODE_SIZE, UINT32_MAX); >> + if (vnuma_zero_config(dst)) >> + goto bad_vnuma_mem; >> + } >> + > > Ditto. > > Wei. -- Elena _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.