|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v6 COLO 02/15] secondary vm suspend/resume/checkpoint code
On Mon, Jun 08, 2015 at 11:45:46AM +0800, Yang Hongyang wrote:
> From: Wen Congyang <wency@xxxxxxxxxxxxxx>
>
> Secondary vm is running in colo mode. So we will do
> the following things again and again:
> 1. Resume secondary vm
> a. Send LIBXL_COLO_SVM_READY to master.
> b. If it is not the first resume, call
> libxl__checkpoint_devices_preresume().
> c. If it is the first resume(resume right after live migration),
> - call libxl__xc_domain_restore_done() to build the secondary vm.
> - enable secondary vm's logdirty.
> - call libxl__domain_resume() to resume secondary vm.
> - call libxl__checkpoint_devices_setup() to setup checkpoint devices.
> d. Send LIBXL_COLO_SVM_RESUMED to master.
> 2. Wait a new checkpoint
> a. Call libxl__checkpoint_devices_commit().
> b. Read LIBXL_COLO_NEW_CHECKPOINT from master.
> 3. Suspend secondary vm
> a. Suspend secondary vm.
> b. Call libxl__checkpoint_devices_postsuspend().
> c. Get secondary vm's dirty page information.
> d. Send LIBXL_COLO_SVM_SUSPENDED to master.
> e. Send secondary vm's dirty page information to master(count + pfn list).
>
> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
> Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> ---
[...]
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#ifndef LIBXL_COLO_H
> +#define LIBXL_COLO_H
> +
> +/*
> + * values to control suspend/resume primary vm and secondary vm
> + * at the same time
> + */
> +enum {
> + LIBXL_COLO_NEW_CHECKPOINT = 1,
> + LIBXL_COLO_SVM_SUSPENDED,
> + LIBXL_COLO_SVM_READY,
> + LIBXL_COLO_SVM_RESUMED,
> +};
> +
Any reason to not have this in IDL?
> +extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
> + int ret, int retval, int errnoval);
> +extern void libxl__colo_restore_setup(libxl__egc *egc,
> + libxl__colo_restore_state *crs);
> +extern void libxl__colo_restore_teardown(libxl__egc *egc,
> + libxl__colo_restore_state *crs,
> + int rc);
> +
> +#endif
> diff --git a/tools/libxl/libxl_colo_restore.c
> b/tools/libxl/libxl_colo_restore.c
> new file mode 100644
> index 0000000..6c39758
> --- /dev/null
> +++ b/tools/libxl/libxl_colo_restore.c
> @@ -0,0 +1,1158 @@
> +/*
> + * Copyright (C) 2014 FUJITSU LIMITED
> + * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
> + * Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#include "libxl_internal.h"
> +#include "libxl_colo.h"
> +#include "xc_bitops.h"
> +
> +#define XC_PAGE_SHIFT 12
> +#define PAGE_SHIFT XC_PAGE_SHIFT
I don't think you need these.
> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) &
> ~((1UL<<(_w))-1))
> +#define NRPAGES(x) (ROUNDUP(x, PAGE_SHIFT) >> PAGE_SHIFT)
And you can use XC_PAGE_SHIFT directly in above macro.
> +
> +enum {
> + LIBXL_COLO_SETUPED,
> + LIBXL_COLO_SUSPENDED,
> + LIBXL_COLO_RESUMED,
> +};
> +
Move it to IDL as well?
> +typedef struct libxl__colo_restore_checkpoint_state
> libxl__colo_restore_checkpoint_state;
> +struct libxl__colo_restore_checkpoint_state {
> + xc_hypercall_buffer_t _dirty_bitmap;
> + xc_hypercall_buffer_t *dirty_bitmap;
This one looks like layer violation to me. I don't have other good
suggestion on how to do this though. Maybe Ian and Ian have better idea.
> + unsigned long p2m_size;
> + libxl__domain_suspend_state dsps;
> + libxl__datacopier_state dc;
> + uint8_t section;
This could use a better name like "stage" / "state"?
> + libxl__logdirty_switch lds;
> + libxl__colo_restore_state *crs;
> + int status;
> + bool preresume;
> + /* used for teardown */
> + int teardown_devices;
> + int saved_rc;
> +
> + void (*callback)(libxl__egc *,
> + libxl__colo_restore_checkpoint_state *,
> + int);
> +
> + /*
> + * 0: secondary vm's dirty bitmap for domain @domid
> + * 1: secondary vm is ready(domain @domid)
> + * 2: secondary vm is resumed(domain @domid)
> + * 3. new checkpoint is triggered(domain @domid)
> + */
> + const char *copywhat[4];
> +};
> +
> +
> +static void libxl__colo_restore_domain_resume_callback(void *data);
> +static void libxl__colo_restore_domain_checkpoint_callback(void *data);
> +static void libxl__colo_restore_domain_suspend_callback(void *data);
> +
> +static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
> + NULL,
> +};
> +
[...]
> + crcs->status = LIBXL_COLO_RESUMED;
> +
> + /* avoid calling libxl__xc_domain_restore_done() more than once */
> + if (crs->saved_cb) {
> + dcs->callback = crs->saved_cb;
> + crs->saved_cb = NULL;
> +
I have a feeling that this trick should be avoided. But I'm not an
expert on this so I will defer judgement to Ian J.
> + lds->callback = colo_enable_logdirty_done;
> + colo_enable_logdirty(crs, egc);
> + return;
> + }
> +
> + colo_write_svm_resumed(egc, crcs);
> + return;
> +
> +out:
> + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
> +}
> +
[...]
>
> +_hidden void logdirty_init(libxl__logdirty_switch *lds);
> +
This function should be in libxl__ namespace.
Other than these cosmetic issues I don't really have the expertise to
comment further.
Wei.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |