[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v10 16/31] secondary vm suspend/resume/checkpoint code



On 02/25/2016 11:56 PM, Wei Liu wrote:
> On Mon, Feb 22, 2016 at 10:52:20AM +0800, Wen Congyang wrote:
>> Secondary vm is running in colo mode. So we will do
>> the following things again and again:
>> 1. Resume secondary vm
>>    a. Send CHECKPOINT_SVM_READY to master.
>>    b. If it is not the first resume, call 
>> libxl__checkpoint_devices_preresume().
>>    c. If it is the first resume(resume right after live migration),
>>       - call libxl__xc_domain_restore_done() to build the secondary vm.
>>       - enable secondary vm's logdirty.
>>       - call libxl__domain_resume() to resume secondary vm.
>>       - call libxl__checkpoint_devices_setup() to setup checkpoint devices.
>>    d. Send CHECKPOINT_SVM_RESUMED to master.
>> 2. Wait a new checkpoint
>>    a. Call libxl__checkpoint_devices_commit().
>>    b. Read CHECKPOINT_NEW from master.
>> 3. Suspend secondary vm
>>    a. Suspend secondary vm.
>>    b. Call libxl__checkpoint_devices_postsuspend().
>>    c. Send CHECKPOINT_SVM_SUSPENDED to master.
>>
>> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
>> Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx>
>> ---
>>  tools/libxc/xc_sr_common.h       |    2 +
>>  tools/libxc/xc_sr_save.c         |    3 +-
>>  tools/libxl/Makefile             |    1 +
>>  tools/libxl/libxl_colo.h         |   24 +
>>  tools/libxl/libxl_colo_restore.c | 1038 
>> ++++++++++++++++++++++++++++++++++++++
>>  tools/libxl/libxl_create.c       |   37 ++
>>  tools/libxl/libxl_internal.h     |   19 +
>>  tools/libxl/libxl_save_callout.c |    7 +-
>>  tools/libxl/libxl_stream_read.c  |   12 +
>>  tools/libxl/libxl_types.idl      |    1 +
> 
> There is a bunch of TODOs in libxl_colo.c but I don't think you're in a
> better position to judge whether they should be blocker or not.
> 
>>  10 files changed, 1142 insertions(+), 2 deletions(-)
>>  create mode 100644 tools/libxl/libxl_colo.h
>>  create mode 100644 tools/libxl/libxl_colo_restore.c
>>
>> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
>> index 5d9f497..2bfed64 100644
>> --- a/tools/libxc/xc_sr_common.h
>> +++ b/tools/libxc/xc_sr_common.h
>> @@ -184,10 +184,12 @@ struct xc_sr_context
>>       * migration stream
>>       * 0: Plain VM
>>       * 1: Remus
>> +     * 2: COLO
>>       */
>>      enum {
>>          MIG_STREAM_NONE, /* plain stream */
>>          MIG_STREAM_REMUS,
>> +        MIG_STREAM_COLO,
>>      } migration_stream;
>>  
>>      union /* Common save or restore data. */
>> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
>> index fe210cc..7393355 100644
>> --- a/tools/libxc/xc_sr_save.c
>> +++ b/tools/libxc/xc_sr_save.c
>> @@ -846,7 +846,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
>> uint32_t dom,
>>  
>>      /* If altering migration_stream update this assert too. */
>>      assert(checkpointed_stream == MIG_STREAM_NONE ||
>> -           checkpointed_stream == MIG_STREAM_REMUS);
>> +           checkpointed_stream == MIG_STREAM_REMUS ||
>> +           checkpointed_stream == MIG_STREAM_COLO);
>>  
>>      /*
>>       * TODO: Find some time to better tweak the live migration algorithm.
> 
> [...]
> 
>> +
>> +#include "libxl_osdeps.h" /* must come before any other headers */
>> +
>> +#include "libxl_internal.h"
>> +#include "libxl_colo.h"
>> +#include "libxl_sr_stream_format.h"
>> +
>> +enum {
>> +    LIBXL_COLO_SETUPED,
>> +    LIBXL_COLO_SUSPENDED,
>> +    LIBXL_COLO_RESUMED,
>> +};
>> +
>> +typedef struct libxl__colo_restore_checkpoint_state 
>> libxl__colo_restore_checkpoint_state;
>> +struct libxl__colo_restore_checkpoint_state {
>> +    libxl__domain_suspend_state dsps;
>> +    libxl__logdirty_switch lds;
>> +    libxl__colo_restore_state *crs;
>> +    libxl__stream_write_state sws;
>> +    int status;
>> +    bool preresume;
>> +    /* used for teardown */
>> +    int teardown_devices;
>> +    int saved_rc;
>> +    char *state_file;
>> +
>> +    void (*callback)(libxl__egc *,
>> +                     libxl__colo_restore_checkpoint_state *,
>> +                     int);
>> +};
>> +
> 
> Shouldn't the enum and struct belong to libxl_colo.h ?

It only be used by restore side. I think it is OK to move them to libxl_colo.h.

> 
>> +
>> +static void libxl__colo_restore_domain_resume_callback(void *data);
>> +static void libxl__colo_restore_domain_checkpoint_callback(void *data);
>> +static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
>> +static void libxl__colo_restore_domain_suspend_callback(void *data);
>> +
>> +static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
>> +    NULL,
>> +};
>> +
> 
> It would be helpful to list the callbacks at the beginning of the time
> in the order they are supposed to occur.
> 
> See libxl_create.c for example. Search for "Event callbacks, in this
> order".

OK, will fix it in the next version.

> 
> I've tried to map the algorithm you described in commit message to all
> the callbacks, but without some references it is just too time consuming
> from my end.
> 
> I think what I'm going to do is to make sure the normal path that
> doesn't use COLO is not broken and leave the internal to you and Ian (if
> he fancies to dig into details).
> 
> [...]
>> +
>> +void libxl__colo_restore_setup(libxl__egc *egc,
>> +                               libxl__colo_restore_state *crs)
>> +{
>> +    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
>> +    libxl__colo_restore_checkpoint_state *crcs;
>> +    int rc = ERROR_FAIL;
>> +
>> +    /* Convenience aliases */
>> +    libxl__srm_restore_autogen_callbacks *const callbacks =
>> +        &dcs->srs.shs.callbacks.restore.a;
>> +    const int domid = crs->domid;
>> +
>> +    STATE_AO_GC(crs->ao);
>> +
>> +    GCNEW(crcs);
>> +    crs->crcs = crcs;
>> +    crcs->crs = crs;
>> +
>> +    /* setup dsps */
>> +    crcs->dsps.ao = ao;
>> +    crcs->dsps.domid = domid;
>> +    if (init_dsps(&crcs->dsps))
>> +        goto err;
>> +
>> +    callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
>> +    callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
>> +    callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
>> +    callbacks->wait_checkpoint = 
>> libxl__colo_restore_domain_wait_checkpoint_callback;
>> +
>> +    /*
>> +     * Secondary vm is running in colo mode, so we need to call
>> +     * libxl__xc_domain_restore_done() to create secondary vm.
>> +     * But we will exit in domain_create_cb(). So replace the
>> +     * callback here.
>> +     */
>> +    crs->saved_cb = dcs->callback;
>> +    dcs->callback = libxl__colo_domain_create_cb;
>> +    crcs->state_file = GCSPRINTF(LIBXL_DEVICE_MODEL_RESTORE_FILE".%d", 
>> domid);
> 
> Can you use a different name space from the normal one?

We write the qemu state into the restore file in write_emulator_blob().

> 
> For example, you can put
> 
>  #define LIBXL_COLO_DEVICE_MODEL_RESTORE_FILE    XXXX

So if we use a different name space, we should do
#define LIBXL_COLO_DEVICE_MODEL_RESTORE_FILE       
LIBXL_DEVICE_MODEL_RESTORE_FILE

In colo codes, IIRC there is no other code use it.

> 
> in libxl_colo.h and use it in all COLO code.
> 
> 
>> +    crcs->status = LIBXL_COLO_SETUPED;
>> +
>> +    libxl__logdirty_init(&crcs->lds);
>> +    crcs->lds.ao = ao;
>> +
>> +    crcs->sws.fd = crs->send_back_fd;
>> +    crcs->sws.ao = ao;
>> +    crcs->sws.back_channel = true;
>> +
>> +    dcs->cds.concrete_data = crs;
>> +
>> +    libxl__stream_write_start(egc, &crcs->sws);
>> +
>> +    rc = 0;
>> +
>> +out:
>> +    crs->callback(egc, crs, rc);
>> +    return;
>> +
>> +err:
>> +    goto out;
>> +}
>> +
>> +static void libxl__colo_domain_create_cb(libxl__egc *egc,
>> +                                         libxl__domain_create_state *dcs,
>> +                                         int rc, uint32_t domid)
>> +{
>> +    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
>> +
>> +    crcs->callback(egc, crcs, rc);
>> +}
>> +
>> +
> [...]
>> +
>> +static void colo_disable_logdirty_done(libxl__egc *egc,
>> +                                       libxl__logdirty_switch *lds,
>> +                                       int rc)
>> +{
>> +    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, 
>> lds);
>> +
>> +    EGC_GC;
>> +
>> +    if (rc)
>> +        LOG(WARN, "cannot disable logdirty");
>> +
>> +    if (crcs->status == LIBXL_COLO_SUSPENDED) {
>> +        /*
>> +         * failover when reading state from master, so no need to
>> +         * call libxl__domain_restore().
> 
> You need to update this comment to the right function name.

OK, will fix it in the next version.

> 
>> +         */
>> +        colo_resume_vm(egc, crcs, 0);
>> +        return;
>> +    }
>> +
>> +    /* If we cannot disable logdirty, we still can do failover */
>> +    crcs->callback(egc, crcs, 0);
>> +}
>> +
> [...]
>>  
>> +/* colo related structure */
>> +typedef struct libxl__colo_restore_state libxl__colo_restore_state;
>> +typedef void libxl__colo_callback(libxl__egc *,
>> +                                  libxl__colo_restore_state *, int rc);
>> +struct libxl__colo_restore_state {
>> +    /* must set by caller of libxl__colo_(setup|teardown) */
>> +    libxl__ao *ao;
>> +    uint32_t domid;
>> +    int send_back_fd;
>> +    int recv_fd;
>> +    int hvm;
>> +    libxl__colo_callback *callback;
>> +
>> +    /* private, colo restore checkpoint state */
>> +    libxl__domain_create_cb *saved_cb;
>> +    void *crcs;
>> +};
>>  
> 
> And this should go to libxl_colo.h, too? And libxl_internal.h includes
> libxl_colo.h?
> 
> I just don't want to colo structures and functions scatter in
> different places.

OK, will fix it in the next version.

> 
>>  struct libxl__domain_create_state {
>>      /* filled in by user */
>> @@ -3486,6 +3503,8 @@ struct libxl__domain_create_state {
>>      /* private to domain_create */
>>      int guest_domid;
>>      libxl__domain_build_state build_state;
>> +    libxl__colo_restore_state crs;
>> +    libxl__checkpoint_devices_state cds;
>>      libxl__bootloader_state bl;
>>      libxl__stub_dm_spawn_state dmss;
>>          /* If we're not doing stubdom, we use only dmss.dm,
>> diff --git a/tools/libxl/libxl_save_callout.c 
>> b/tools/libxl/libxl_save_callout.c
>> index 0d6949a..b1810b2 100644
>> --- a/tools/libxl/libxl_save_callout.c
>> +++ b/tools/libxl/libxl_save_callout.c
>> @@ -15,6 +15,7 @@
>>  #include "libxl_osdeps.h"
>>  
>>  #include "libxl_internal.h"
>> +#include "libxl_colo.h"
>>  
>>  /* stream_fd is as from the caller (eventually, the application).
>>   * It may be 0, 1 or 2, in which case we need to dup it elsewhere.
>> @@ -68,7 +69,11 @@ void libxl__xc_domain_restore(libxl__egc *egc, 
>> libxl__domain_create_state *dcs,
>>      shs->ao = ao;
>>      shs->domid = domid;
>>      shs->recv_callback = libxl__srm_callout_received_restore;
>> -    shs->completion_callback = libxl__xc_domain_restore_done;
>> +    if (dcs->restore_params.checkpointed_stream ==
>> +                                                
>> LIBXL_CHECKPOINTED_STREAM_COLO)
> 
> This is very strange line wrap.

Yes, will fix it in the next version.

Thanks
Wen Congyang

> 
>> +        shs->completion_callback = libxl__colo_restore_teardown;
>> +    else
>> +        shs->completion_callback = libxl__xc_domain_restore_done;
>>      shs->caller_state = dcs;
>>      shs->need_results = 1;
>>  
>> diff --git a/tools/libxl/libxl_stream_read.c 
>> b/tools/libxl/libxl_stream_read.c
>> index 5d980d9..d6bd2fe 100644
>> --- a/tools/libxl/libxl_stream_read.c
>> +++ b/tools/libxl/libxl_stream_read.c
>> @@ -846,6 +846,18 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, 
>> void *dcs_void,
>>       */
>>      if (libxl__stream_read_inuse(stream)) {
>>          switch (checkpointed_stream) {
>> +        case LIBXL_CHECKPOINTED_STREAM_COLO:
>> +            if (stream->completion_callback) {
>> +                /*
>> +                 * restore, just build the secondary vm, don't close
>> +                 * the stream
>> +                 */
>> +                stream->completion_callback(egc, stream, 0);
>> +            } else {
>> +                /* failover, just close the stream */
>> +                stream_complete(egc, stream, 0);
>> +            }
>> +            break;
>>          case LIBXL_CHECKPOINTED_STREAM_REMUS:
>>              /*
>>               * Failover from primary. Domain state is currently at a
>> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
>> index 632c009..33f4a90 100644
>> --- a/tools/libxl/libxl_types.idl
>> +++ b/tools/libxl/libxl_types.idl
>> @@ -232,6 +232,7 @@ libxl_hdtype = Enumeration("hdtype", [
>>  libxl_checkpointed_stream = Enumeration("checkpointed_stream", [
>>      (0, "NONE"),
>>      (1, "REMUS"),
>> +    (2, "COLO"),
>>      ])
>>  
>>  #
>> -- 
>> 2.5.0
>>
>>
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@xxxxxxxxxxxxx
>> http://lists.xen.org/xen-devel
> 
> 
> .
> 




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.