commit ac8c06d07721053ebcd4115ad30a6fa06cfa4086 Author: Wei Liu Date: Wed Jun 11 10:52:20 2014 +0200 libxl/xl: push VCPU affinity pinning down to libxl This patch introduces an array of libxl_bitmap called "vcpu_hard_affinity" in libxl IDL to preserve VCPU to PCPU mapping. This is necessary for libxl to preserve all information to construct a domain. The array accommodates at most max_vcpus elements, each containing the affinity of the respective VCPU. If less than max_vcpus bitmaps are present, the VCPUs associated to the missing elements will just stay with their default affinity (they'll be free to execute on every PCPU). In case both this new field, and the already existing cpumap field are used, the content of the array will override what's set in cpumap. (In xl, we make sure that this never happens in xl, by using only one of the two at any given time.) The proper macro to mark the API change (called LIBXL_HAVE_BUILDINFO_VCPU_AFFINITY_ARRAYS) is added but it is commented. It will be uncommented by the patch in the series that completes the process, by adding the "vcpu_soft_affinity" array. This is because, after all, these two fields are being added sort-of together, and are very very similar, in both meaning and usage, so it makes sense for them to share the same marker. This patch was originally part of Wei's series about pushing as much information as possible on domain configuration in libxl, rather than xl. See here, for more details: http://lists.xen.org/archives/html/xen-devel/2014-06/msg01026.html http://lists.xen.org/archives/html/xen-devel/2014-06/msg01031.html Signed-off-by: Wei Liu Signed-off-by: Dario Faggioli > --- Changes from v9: * update the LIBXL_HAVE_xxx macro so that it can account for both vcpu_hard_affinity array, introduced in this patch, and vcpu_soft_affinity array, introduced in another patch of the series, as suggested during review; * update the LIBXL_HAVE_xxx macro comment accordingly. Also, reword it as requested during review; * in xl code, avoid preallocating the whole vcpu_hard_affinity array. That is not necessary: vcpus not present in the array will stay with their default affinity, no need to pass any empty, or full, or whatever cpumap for them; * still in xl, while dealing with a list ("cpus = [2, 3]"), it is not necessary to mess with cpumap, so kill any reference to that. Changes from v8: * typo in the changelog. Changes from v7: This patch is new in the series, and it is coming from Wei's series about pushing domain config information down to libxl. It is being incorporated in this series to reduce as much as possible the inter-dependencies between the two patch series, i.e., for ease of development and review. diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index a8477c9..d63cd11 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -340,6 +340,31 @@ typedef struct libxl__ctx libxl_ctx; #endif /* + * LIBXL_HAVE_BUILDINFO_VCPU_AFFINITY_ARRAYS + * + * If this is defined, then the libxl_domain_build_info structure will + * contain two arrays of libxl_bitmap-s, with all the necessary information + * to set the hard affinity (vcpu_hard_affinity) and the soft affinity + * (vcpu_soft_affinity) of the VCPUs. + * + * Note that, if the vcpu_hard_affinity array is used, libxl will ignore + * the content of the cpumap field of libxl_domain_build_info. That is to + * say, if the array is allocated and used by the caller, it is it and + * only it that determines the hard affinity of the domain's VCPUs. + * + * The number of libxl_bitmap-s in the arrays should be equal to the + * maximum number of VCPUs of the domain. If there only are N elements in + * an array, with N smaller the the maximum number of VCPUs, the hard or + * soft affinity (depending on which array we are talking about) will be + * set only for the first N VCPUs. The other VCPUs will just have affinity, + * both hard and soft, with all the host PCPUs. + * Each bitmap should be big enough to accommodate the maximum number of + * PCPUs of the host. + */ +/* to be uncommented when soft array added */ +/* #define LIBXL_HAVE_BUILDINFO_VCPU_AFFINITY_ARRAYS 1 */ + +/* * LIBXL_HAVE_BUILDINFO_USBDEVICE_LIST * * If this is defined, then the libxl_domain_build_info structure will diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 5d8a0be..7a60ee2 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -249,7 +249,7 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid, * libxl_domain_set_nodeaffinity() that enacts the actual placement. */ if (libxl_defbool_val(info->numa_placement)) { - if (info->cpumap.size) { + if (info->cpumap.size || info->num_vcpu_hard_affinity) { LOG(ERROR, "Can run NUMA placement only if no vcpu " "affinity is specified explicitly"); return ERROR_INVAL; @@ -271,10 +271,23 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid, } if (info->nodemap.size) libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap); - if (info->cpumap.size) + /* As mentioned in libxl.h, vcpu_hard_array takes precedence */ + if (info->num_vcpu_hard_affinity) { + int i; + + for (i = 0; i < info->num_vcpu_hard_affinity; i++) { + if (libxl_set_vcpuaffinity(ctx, domid, i, + &info->vcpu_hard_affinity[i], + NULL)) { + LOG(ERROR, "setting affinity failed on vcpu `%d'", i); + return ERROR_FAIL; + } + } + } else if (info->cpumap.size) libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus, &info->cpumap, NULL); + if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + LIBXL_MAXMEM_CONSTANT) < 0) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't set max memory"); diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 37df854..5607ea7 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -303,6 +303,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ ("avail_vcpus", libxl_bitmap), ("cpumap", libxl_bitmap), ("nodemap", libxl_bitmap), + ("vcpu_hard_affinity", Array(libxl_bitmap, "num_vcpu_hard_affinity")), ("numa_placement", libxl_defbool), ("tsc_mode", libxl_tsc_mode), ("max_memkb", MemKB), diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index c639c78..f2f5fb2 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -88,9 +88,6 @@ xlchild children[child_max]; static const char *common_domname; static int fd_lock = -1; -/* Stash for specific vcpu to pcpu mappping */ -static int *vcpu_to_pcpu; - static const char savefileheader_magic[32]= "Xen saved domain, xl format\n \0 \r"; @@ -703,7 +700,7 @@ static void parse_config_data(const char *config_source, XLU_Config *config; XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids, *vtpms; XLU_ConfigList *ioports, *irqs, *iomem; - int num_ioports, num_irqs, num_iomem; + int num_ioports, num_irqs, num_iomem, num_cpus; int pci_power_mgmt = 0; int pci_msitranslate = 0; int pci_permissive = 0; @@ -800,43 +797,32 @@ static void parse_config_data(const char *config_source, if (!xlu_cfg_get_long (config, "maxvcpus", &l, 0)) b_info->max_vcpus = l; - if (!xlu_cfg_get_list (config, "cpus", &cpus, 0, 1)) { - int n_cpus = 0; + if (!xlu_cfg_get_list (config, "cpus", &cpus, &num_cpus, 1)) { + int j = 0; - if (libxl_cpu_bitmap_alloc(ctx, &b_info->cpumap, 0)) { - fprintf(stderr, "Unable to allocate cpumap\n"); - exit(1); - } + /* Silently ignore values corresponding to non existing vcpus */ + if (num_cpus > b_info->max_vcpus) + num_cpus = b_info->max_vcpus; - /* Prepare the array for single vcpu to pcpu mappings */ - vcpu_to_pcpu = xmalloc(sizeof(int) * b_info->max_vcpus); - memset(vcpu_to_pcpu, -1, sizeof(int) * b_info->max_vcpus); + b_info->vcpu_hard_affinity = xmalloc(num_cpus * sizeof(libxl_bitmap)); - /* - * Idea here is to let libxl think all the domain's vcpus - * have cpu affinity with all the pcpus on the list. - * It is then us, here in xl, that matches each single vcpu - * to its pcpu (and that's why we need to stash such info in - * the vcpu_to_pcpu array now) after the domain has been created. - * Doing it like this saves the burden of passing to libxl - * some big array hosting the single mappings. Also, using - * the cpumap derived from the list ensures memory is being - * allocated on the proper nodes anyway. - */ - libxl_bitmap_set_none(&b_info->cpumap); - while ((buf = xlu_cfg_get_listitem(cpus, n_cpus)) != NULL) { + while ((buf = xlu_cfg_get_listitem(cpus, j)) != NULL && j < num_cpus) { i = atoi(buf); - if (!libxl_bitmap_cpu_valid(&b_info->cpumap, i)) { - fprintf(stderr, "cpu %d illegal\n", i); + + libxl_bitmap_init(&b_info->vcpu_hard_affinity[j]); + if (libxl_cpu_bitmap_alloc(ctx, + &b_info->vcpu_hard_affinity[j], 0)) { + fprintf(stderr, "Unable to allocate cpumap for vcpu %d\n", j); exit(1); } - libxl_bitmap_set(&b_info->cpumap, i); - if (n_cpus < b_info->max_vcpus) - vcpu_to_pcpu[n_cpus] = i; - n_cpus++; + libxl_bitmap_set_none(&b_info->vcpu_hard_affinity[j]); + libxl_bitmap_set(&b_info->vcpu_hard_affinity[j], i); + + j++; } + b_info->num_vcpu_hard_affinity = num_cpus; - /* We have a cpumap, disable automatic placement */ + /* We have a list of cpumaps, disable automatic placement */ libxl_defbool_set(&b_info->numa_placement, false); } else if (!xlu_cfg_get_string (config, "cpus", &buf, 0)) { @@ -2183,33 +2169,6 @@ start: if ( ret ) goto error_out; - /* If single vcpu to pcpu mapping was requested, honour it */ - if (vcpu_to_pcpu) { - libxl_bitmap vcpu_cpumap; - - ret = libxl_cpu_bitmap_alloc(ctx, &vcpu_cpumap, 0); - if (ret) - goto error_out; - for (i = 0; i < d_config.b_info.max_vcpus; i++) { - - if (vcpu_to_pcpu[i] != -1) { - libxl_bitmap_set_none(&vcpu_cpumap); - libxl_bitmap_set(&vcpu_cpumap, vcpu_to_pcpu[i]); - } else { - libxl_bitmap_set_any(&vcpu_cpumap); - } - if (libxl_set_vcpuaffinity(ctx, domid, i, &vcpu_cpumap, NULL)) { - fprintf(stderr, "setting affinity failed on vcpu `%d'.\n", i); - libxl_bitmap_dispose(&vcpu_cpumap); - free(vcpu_to_pcpu); - ret = ERROR_FAIL; - goto error_out; - } - } - libxl_bitmap_dispose(&vcpu_cpumap); - free(vcpu_to_pcpu); vcpu_to_pcpu = NULL; - } - ret = libxl_userdata_store(ctx, domid, "xl", config_data, config_len); if (ret) {