[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v6 COLO 02/15] secondary vm suspend/resume/checkpoint code



On Mon, Jun 08, 2015 at 11:45:46AM +0800, Yang Hongyang wrote:
> From: Wen Congyang <wency@xxxxxxxxxxxxxx>
> 
> Secondary vm is running in colo mode. So we will do
> the following things again and again:
> 1. Resume secondary vm
>    a. Send LIBXL_COLO_SVM_READY to master.
>    b. If it is not the first resume, call 
> libxl__checkpoint_devices_preresume().
>    c. If it is the first resume(resume right after live migration),
>       - call libxl__xc_domain_restore_done() to build the secondary vm.
>       - enable secondary vm's logdirty.
>       - call libxl__domain_resume() to resume secondary vm.
>       - call libxl__checkpoint_devices_setup() to setup checkpoint devices.
>    d. Send LIBXL_COLO_SVM_RESUMED to master.
> 2. Wait a new checkpoint
>    a. Call libxl__checkpoint_devices_commit().
>    b. Read LIBXL_COLO_NEW_CHECKPOINT from master.
> 3. Suspend secondary vm
>    a. Suspend secondary vm.
>    b. Call libxl__checkpoint_devices_postsuspend().
>    c. Get secondary vm's dirty page information.
>    d. Send LIBXL_COLO_SVM_SUSPENDED to master.
>    e. Send secondary vm's dirty page information to master(count + pfn list).
> 
> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
> Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> ---
[...]
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#ifndef LIBXL_COLO_H
> +#define LIBXL_COLO_H
> +
> +/*
> + * values to control suspend/resume primary vm and secondary vm
> + * at the same time
> + */
> +enum {
> +    LIBXL_COLO_NEW_CHECKPOINT = 1,
> +    LIBXL_COLO_SVM_SUSPENDED,
> +    LIBXL_COLO_SVM_READY,
> +    LIBXL_COLO_SVM_RESUMED,
> +};
> +

Any reason to not have this in IDL?

> +extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
> +                                     int ret, int retval, int errnoval);
> +extern void libxl__colo_restore_setup(libxl__egc *egc,
> +                                      libxl__colo_restore_state *crs);
> +extern void libxl__colo_restore_teardown(libxl__egc *egc,
> +                                         libxl__colo_restore_state *crs,
> +                                         int rc);
> +
> +#endif
> diff --git a/tools/libxl/libxl_colo_restore.c 
> b/tools/libxl/libxl_colo_restore.c
> new file mode 100644
> index 0000000..6c39758
> --- /dev/null
> +++ b/tools/libxl/libxl_colo_restore.c
> @@ -0,0 +1,1158 @@
> +/*
> + * Copyright (C) 2014 FUJITSU LIMITED
> + * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
> + *         Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#include "libxl_internal.h"
> +#include "libxl_colo.h"
> +#include "xc_bitops.h"
> +
> +#define XC_PAGE_SHIFT           12
> +#define PAGE_SHIFT              XC_PAGE_SHIFT

I don't think you need these.

> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & 
> ~((1UL<<(_w))-1))
> +#define NRPAGES(x) (ROUNDUP(x, PAGE_SHIFT) >> PAGE_SHIFT)

And you can use XC_PAGE_SHIFT directly in above macro.

> +
> +enum {
> +    LIBXL_COLO_SETUPED,
> +    LIBXL_COLO_SUSPENDED,
> +    LIBXL_COLO_RESUMED,
> +};
> +

Move it to IDL as well?

> +typedef struct libxl__colo_restore_checkpoint_state 
> libxl__colo_restore_checkpoint_state;
> +struct libxl__colo_restore_checkpoint_state {
> +    xc_hypercall_buffer_t _dirty_bitmap;
> +    xc_hypercall_buffer_t *dirty_bitmap;

This one looks like layer violation to me. I don't have other good
suggestion on how to do this though. Maybe Ian and Ian have better idea.

> +    unsigned long p2m_size;
> +    libxl__domain_suspend_state dsps;
> +    libxl__datacopier_state dc;
> +    uint8_t section;

This could use a better name like "stage" / "state"?

> +    libxl__logdirty_switch lds;
> +    libxl__colo_restore_state *crs;
> +    int status;
> +    bool preresume;
> +    /* used for teardown */
> +    int teardown_devices;
> +    int saved_rc;
> +
> +    void (*callback)(libxl__egc *,
> +                     libxl__colo_restore_checkpoint_state *,
> +                     int);
> +
> +    /*
> +     * 0: secondary vm's dirty bitmap for domain @domid
> +     * 1: secondary vm is ready(domain @domid)
> +     * 2: secondary vm is resumed(domain @domid)
> +     * 3. new checkpoint is triggered(domain @domid)
> +     */
> +    const char *copywhat[4];
> +};
> +
> +
> +static void libxl__colo_restore_domain_resume_callback(void *data);
> +static void libxl__colo_restore_domain_checkpoint_callback(void *data);
> +static void libxl__colo_restore_domain_suspend_callback(void *data);
> +
> +static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
> +    NULL,
> +};
> +
[...]
> +    crcs->status = LIBXL_COLO_RESUMED;
> +
> +    /* avoid calling libxl__xc_domain_restore_done() more than once */
> +    if (crs->saved_cb) {
> +        dcs->callback = crs->saved_cb;
> +        crs->saved_cb = NULL;
> +

I have a feeling that this trick should be avoided. But I'm not an
expert on this so I will defer judgement to Ian J.

> +        lds->callback = colo_enable_logdirty_done;
> +        colo_enable_logdirty(crs, egc);
> +        return;
> +    }
> +
> +    colo_write_svm_resumed(egc, crcs);
> +    return;
> +
> +out:
> +    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
> +}
> +
[...]
>  
> +_hidden void logdirty_init(libxl__logdirty_switch *lds);
> +

This function should be in libxl__ namespace.

Other than these cosmetic issues I don't really have the expertise to
comment further.

Wei.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.