[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2 of 2 RFC] xl: allow for moving the domain's memory when changing vcpu affinity
On 06/07/12 10:54, Dario Faggioli wrote: Hmm... this will work and be reliable, but it seems a bit clunky. Long term we want to be able to do node migration in the background without shutting down a VM, right? If that can be done in the 4.3 timeframe, then it seems unnecessary to implement something like this.By introducing a new '-M' option to the `xl vcpu-pin' command. The actual memory "movement" is achieved suspending the domain to a ttemporary file and resuming it with the new vcpu-affinity -George . Signed-off-by: dario.faggioli<dario.faggioli@xxxxxxxxxx> diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -606,7 +606,7 @@ after B<vcpu-set>, go to B<SEE ALSO> sec Lists VCPU information for a specific domain. If no domain is specified, VCPU information for all domains will be provided. -=item B<vcpu-pin> I<domain-id> I<vcpu> I<cpus> +=item B<vcpu-pin> [I<OPTIONS>] I<domain-id> I<vcpu> I<cpus> Pins the VCPU to only run on the specific CPUs. The keyword B<all> can be used to apply the I<cpus> list to all VCPUs in the @@ -617,6 +617,22 @@ different run state is appropriate. Pin this, by ensuring certain VCPUs can only run on certain physical CPUs. +B<OPTIONS> + +=over 4 + +=item I<-M> + +If and only if using this together with I<vcpu> equal to B<all>, not only +pin the VCPUs to I<cpus>, it also tries to make sure all the memory for +the domain comes from the NUMA nodes I<cpus> belong to. This involves +suspending and resuming the domain, so some (hopefully small) downtime +should be expected, as well as the change of the domain id. + +=back + +=back + =item B<vm-list> Prints information about guests. This list excludes information about diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -4061,7 +4061,52 @@ int main_vcpulist(int argc, char **argv) return 0; } -static void vcpupin(const char *d, const char *vcpu, char *cpu) +/* + * Easiest way for having all the memory allocated in such a way that it + * reflects the vcpu affinity is to save and re-create the domain (as it + * happens when migrating and suspend/resumimg). + */ +static int vcpupin_move(const char *d, char *cpu) +{ + struct domain_create dom_info; + char savefile[] = "/tmp/vcpupin-autosave.XXXXXX"; + char extra_config[1024]; + uint8_t *config_data; + int config_len; + int save_fd; + + /* Setup a temporary core file for the domain */ + save_fd = mkostemp(savefile, O_WRONLY|O_CREAT|O_TRUNC); + if (save_fd< 0) + return -1; + + /* Save and destroy the current incarnation of the domain */ + MUST(libxl_userdata_retrieve(ctx, domid, "xl",&config_data,&config_len)); + save_domain_core_writeconfig(save_fd, savefile, config_data, config_len); + + MUST(libxl_domain_suspend(ctx, domid, save_fd, LIBXL_SUSPEND_LIVE, NULL)); + MUST(libxl_domain_destroy(ctx, domid)); + close(save_fd); + domid = INVALID_DOMID; + + /* Default parameters plus some extra_config for domain re-creation */ + memset(&dom_info, 0, sizeof(dom_info)); + dom_info.quiet = 1; + dom_info.migrate_fd = -1; + dom_info.monitor = dom_info.daemonize = 1; + dom_info.incr_generationid = 1; + dom_info.restore_file = savefile; + sprintf(extra_config, "cpus=\"%s\"\n", cpu); + dom_info.extra_config = extra_config; + + /* Finally, re-create the domain */ + MUST(create_domain(&dom_info)); + unlink(savefile); + + return 0; +} + +static void vcpupin(const char *d, const char *vcpu, char *cpu, int move) { libxl_vcpuinfo *vcpuinfo; libxl_cpumap cpumap; @@ -4078,6 +4123,10 @@ static void vcpupin(const char *d, const } vcpuid = -1; } + if (vcpuid != -1&& move) { + fprintf(stderr, "Error: can only move when VCPU is \"all\"\n"); + return; + } find_domain(d); @@ -4094,18 +4143,22 @@ static void vcpupin(const char *d, const } } else { - if (!(vcpuinfo = libxl_list_vcpu(ctx, domid,&nb_vcpu,&i))) { - fprintf(stderr, "libxl_list_vcpu failed.\n"); - goto vcpupin_out1; - } - for (i = 0; i< nb_vcpu; i++) { - if (libxl_set_vcpuaffinity(ctx, domid, vcpuinfo[i].vcpuid, -&cpumap) == -1) { - fprintf(stderr, "libxl_set_vcpuaffinity failed" - " on vcpu `%u'.\n", vcpuinfo[i].vcpuid); + if (move&& vcpupin_move(d, cpu)) + fprintf(stderr, "Could not move the domain\n"); + else { + if (!(vcpuinfo = libxl_list_vcpu(ctx, domid,&nb_vcpu,&i))) { + fprintf(stderr, "libxl_list_vcpu failed.\n"); + goto vcpupin_out1; } - } - libxl_vcpuinfo_list_free(vcpuinfo, nb_vcpu); + for (i = 0; i< nb_vcpu; i++) { + if (libxl_set_vcpuaffinity(ctx, domid, vcpuinfo[i].vcpuid, +&cpumap) == -1) { + fprintf(stderr, "libxl_set_vcpuaffinity failed" + " on vcpu `%u'.\n", vcpuinfo[i].vcpuid); + } + } + libxl_vcpuinfo_list_free(vcpuinfo, nb_vcpu); + } } vcpupin_out1: libxl_cpumap_dispose(&cpumap); @@ -4115,12 +4168,20 @@ static void vcpupin(const char *d, const int main_vcpupin(int argc, char **argv) { + int move_memory = 0; int opt; - if ((opt = def_getopt(argc, argv, "", "vcpu-pin", 3)) != -1) - return opt; - - vcpupin(argv[optind], argv[optind+1] , argv[optind+2]); + if ((opt = def_getopt(argc, argv, "M", "vcpu-pin", 3)) != -1) { + switch(opt) { + case 0: case 2: + return opt; + case 'M': + move_memory = 1; + break; + } + } + + vcpupin(argv[optind], argv[optind+1] , argv[optind+2], move_memory); return 0; } diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c --- a/tools/libxl/xl_cmdtable.c +++ b/tools/libxl/xl_cmdtable.c @@ -207,7 +207,10 @@ struct cmd_spec cmd_table[] = { { "vcpu-pin", &main_vcpupin, 0, 1, "Set which CPUs a VCPU can use", - "<Domain> <VCPU|all> <CPUs|all>", + "[options]<Domain> <VCPU|all> <CPUs|all>", + "-M Move the memory to th nodes corresponding to CPUs\n" + " (involves suspending/resuming the domain, so some\n" + " downtime is to be expected)" }, { "vcpu-set", &main_vcpuset, 0, 1, _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |