[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v16 2/7] remus: introduce remus device
introduce remus device, an abstract layer of remus devices(nic, disk, etc).It provides the following APIs for libxl: >libxl__remus_devices_setup setup remus devices, like attach qdisc, enable disk buffering, etc >libxl__remus_devices_teardown teardown devices >libxl__remus_devices_postsuspend >libxl__remus_devices_preresume >libxl__remus_devices_commit above three are for checkpoint. through remus device layer, the remus execution flow will be like this: xl remus -> remus device setup |-> remus checkpoint(postsuspend, preresume, commit) ... |-> remus device teardown, failover or abort the remus device layer provides an interface libxl__remus_device_subkind_ops which a remus device must implement. the whole remus structure: |remus| | |remus device| | |nic| |drbd disks| |qemu disks| ... a device(nic, drbd disks, qemu disks, etc) must implement libxl__remus_device_subkind_ops to support remus. Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> --- tools/libxl/Makefile | 2 + tools/libxl/libxl.c | 52 +++++- tools/libxl/libxl.h | 6 + tools/libxl/libxl_dom.c | 155 ++++++++++++++++-- tools/libxl/libxl_internal.h | 171 ++++++++++++++++++++ tools/libxl/libxl_remus_device.c | 339 +++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_types.idl | 2 + 7 files changed, 706 insertions(+), 21 deletions(-) create mode 100644 tools/libxl/libxl_remus_device.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index eb63510..202f1bb 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -56,6 +56,8 @@ else LIBXL_OBJS-y += libxl_nonetbuffer.o endif +LIBXL_OBJS-y += libxl_remus_device.o + LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index a9205d1..95d9953 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -781,9 +781,17 @@ out: return ptr; } +static void libxl__remus_setup_done(libxl__egc *egc, + libxl__remus_device_state *rds, int rc); +static void libxl__remus_setup_failed(libxl__egc *egc, + libxl__remus_device_state *rds, int rc); static void remus_failover_cb(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); +static const libxl__remus_device_subkind_ops *remus_ops[] = { + NULL, +}; + /* TODO: Explicit Checkpoint acknowledgements via recv_fd. */ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, uint32_t domid, int send_fd, int recv_fd, @@ -812,16 +820,52 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, assert(info); - /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ + /* Convenience aliases */ + libxl__remus_device_state *const rds = &dss->rds; + rds->ao = ao; + rds->egc = egc; + rds->domid = domid; + rds->callback = libxl__remus_setup_done; + rds->ops = remus_ops; /* Point of no return */ - libxl__domain_suspend(egc, dss); + libxl__remus_devices_setup(egc, rds); return AO_INPROGRESS; out: return AO_ABORT(rc); } +static void libxl__remus_setup_done(libxl__egc *egc, + libxl__remus_device_state *rds, int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); + STATE_AO_GC(dss->ao); + + if (!rc) { + libxl__domain_suspend(egc, dss); + return; + } + + LOG(ERROR, "Remus: failed to setup device for guest with domid %u, rc %d", + dss->domid, rc); + rds->callback = libxl__remus_setup_failed; + libxl__remus_devices_teardown(egc, rds); +} + +static void libxl__remus_setup_failed(libxl__egc *egc, + libxl__remus_device_state *rds, int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); + STATE_AO_GC(dss->ao); + + if (rc) + LOG(ERROR, "Remus: failed to teardown device after setup failed" + " for guest with domid %u, rc %d", dss->domid, rc); + + dss->callback(egc, dss, rc); +} + static void remus_failover_cb(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { @@ -831,10 +875,6 @@ static void remus_failover_cb(libxl__egc *egc, * backup died or some network error occurred preventing us * from sending checkpoints. */ - - /* TBD: Remus cleanup - i.e. detach qdisc, release other - * resources. - */ libxl__ao_complete(egc, ao, rc); } diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index 5ae6532..81905b3 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -579,6 +579,12 @@ typedef struct libxl__ctx libxl_ctx; */ #define LIBXL_HAVE_CPUPOOL_NAME 1 +/* + * LIBXL_HAVE_REMUS + * If this is defined, then libxl supports remus. + */ +#define LIBXL_HAVE_REMUS 1 + typedef uint8_t libxl_mac[6]; #define LIBXL_MAC_FMT "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx" #define LIBXL_MAC_FMTLEN ((2*6)+5) /* 6 hex bytes plus 5 colons */ diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 83eb29a..91f0bf1 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -798,8 +798,6 @@ static void domain_suspend_done(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); static void domain_suspend_callback_common_done(libxl__egc *egc, libxl__domain_suspend_state *dss, int ok); -static void remus_domain_suspend_callback_common_done(libxl__egc *egc, - libxl__domain_suspend_state *dss, int ok); /*----- complicated callback, called by xc_domain_save -----*/ @@ -1461,6 +1459,14 @@ static void domain_suspend_callback_common_done(libxl__egc *egc, } /*----- remus callbacks -----*/ +static void remus_domain_suspend_callback_common_done(libxl__egc *egc, + libxl__domain_suspend_state *dss, int ok); +static void remus_device_postsuspend_cb(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc); +static void remus_device_preresume_cb(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc); static void libxl__remus_domain_suspend_callback(void *data) { @@ -1475,32 +1481,67 @@ static void libxl__remus_domain_suspend_callback(void *data) static void remus_domain_suspend_callback_common_done(libxl__egc *egc, libxl__domain_suspend_state *dss, int ok) { - /* REMUS TODO: Issue disk and network checkpoint reqs. */ + if (!ok) + goto out; + + libxl__remus_device_state *const rds = &dss->rds; + rds->callback = remus_device_postsuspend_cb; + libxl__remus_devices_postsuspend(egc, rds); + return; + +out: libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok); } -static void libxl__remus_domain_resume_callback(void *data) +static void remus_device_postsuspend_cb(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc) { int ok = 0; + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); + + if (!rc) + ok = 1; + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok); +} + +static void libxl__remus_domain_resume_callback(void *data) +{ libxl__save_helper_state *shs = data; libxl__egc *egc = shs->egc; libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs); STATE_AO_GC(dss->ao); - /* Resumes the domain and the device model */ - if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) - goto out; + libxl__remus_device_state *const rds = &dss->rds; + rds->callback = remus_device_preresume_cb; + libxl__remus_devices_preresume(egc, rds); +} - /* REMUS TODO: Deal with disk. Start a new network output buffer */ - ok = 1; -out: - libxl__xc_domain_saverestore_async_callback_done(egc, shs, ok); +static void remus_device_preresume_cb(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc) +{ + int ok = 0; + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); + STATE_AO_GC(dss->ao); + + if (!rc) { + /* Resumes the domain and the device model */ + if (!libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) + ok = 1; + } + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok); } /*----- remus asynchronous checkpoint callback -----*/ static void remus_checkpoint_dm_saved(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); +static void remus_device_commit_cb(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc); +static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev, + const struct timeval *requested_abs); static void libxl__remus_domain_checkpoint_callback(void *data) { @@ -1520,10 +1561,63 @@ static void libxl__remus_domain_checkpoint_callback(void *data) static void remus_checkpoint_dm_saved(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { - /* REMUS TODO: Wait for disk and memory ack, release network buffer */ - /* REMUS TODO: make this asynchronous */ - assert(!rc); /* REMUS TODO handle this error properly */ - usleep(dss->interval * 1000); + /* Convenience aliases */ + libxl__remus_device_state *const rds = &dss->rds; + + STATE_AO_GC(dss->ao); + + if (rc) { + LOG(ERROR, "Failed to save device model. Terminating Remus.."); + goto out; + } + + rds->callback = remus_device_commit_cb; + libxl__remus_devices_commit(egc, rds); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0); +} + +static void remus_device_commit_cb(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); + + STATE_AO_GC(dss->ao); + + if (rc) { + LOG(ERROR, "Failed to do device commit op." + " Terminating Remus.."); + goto out; + } else { + /* Set checkpoint interval timeout */ + rc = libxl__ev_time_register_rel(gc, &dss->checkpoint_timeout, + remus_next_checkpoint, + dss->interval); + if (rc) { + LOG(ERROR, "unable to register timeout for next epoch." + " Terminating Remus.."); + goto out; + } + } + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0); +} + +static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev, + const struct timeval *requested_abs) +{ + libxl__domain_suspend_state *dss = + CONTAINER_OF(ev, *dss, checkpoint_timeout); + + STATE_AO_GC(dss->ao); + + libxl__ev_time_deregister(gc, &dss->checkpoint_timeout); libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 1); } @@ -1738,6 +1832,9 @@ static void save_device_model_datacopier_done(libxl__egc *egc, dss->save_dm_callback(egc, dss, our_rc); } +static void libxl__remus_teardown_done(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc); static void domain_suspend_done(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { @@ -1752,6 +1849,34 @@ static void domain_suspend_done(libxl__egc *egc, xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid, dss->guest_evtchn.port, &dss->guest_evtchn_lockfd); + if (dss->remus) { + /* + * With Remus, if we reach this point, it means either + * backup died or some network error occurred preventing us + * from sending checkpoints. Teardown the network buffers and + * release netlink resources. This is an async op. + */ + LOGE(WARN, "Remus: Domain suspend terminated with rc %d," + " teardown Remus devices...", rc); + dss->rds.callback = libxl__remus_teardown_done; + libxl__remus_devices_teardown(egc, &dss->rds); + return; + } + + dss->callback(egc, dss, rc); +} + +static void libxl__remus_teardown_done(libxl__egc *egc, + libxl__remus_device_state *rds, + int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); + STATE_AO_GC(dss->ao); + + if (rc) + LOG(ERROR, "Remus: failed to teardown device for guest with domid %u," + " rc %d", dss->domid, rc); + dss->callback(egc, dss, rc); } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index db10efb..1eba7af 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2470,6 +2470,175 @@ typedef struct libxl__save_helper_state { * marshalling and xc callback functions */ } libxl__save_helper_state; +/*----- remus device related state structure -----*/ +/* remus device is an abstract layer of remus devices(nic, disk, + * etc).It provides the following APIs for libxl: + * >libxl__remus_devices_setup + * setup remus devices, like attach qdisc, enable disk buffering, etc + * >libxl__remus_devices_teardown + * teardown devices + * >libxl__remus_devices_postsuspend + * >libxl__remus_devices_preresume + * >libxl__remus_devices_commit + * above three are for checkpoint. + * through remus device layer, the remus execution flow will be like + * this: + * xl remus -> remus device setup + * |-> remus checkpoint(postsuspend, preresume, commit) + * ... + * |-> remus device teardown, failover or abort + * the remus device layer provides an interface + * libxl__remus_device_subkind_ops + * which a remus device must implement. the whole remus structure: + * |remus| + * | + * |remus device| + * | + * |nic| |drbd disks| |qemu disks| ... + * a device(nic, drbd disks, qemu disks, etc) must implement + * libxl__remus_device_subkind_ops to support remus. + */ + +typedef enum libxl__remus_device_kind { + LIBXL__REMUS_DEVICE_NIC, + LIBXL__REMUS_DEVICE_DISK, +} libxl__remus_device_kind; + +typedef struct libxl__remus_device libxl__remus_device; +typedef struct libxl__remus_device_state libxl__remus_device_state; +typedef struct libxl__remus_device_subkind_ops libxl__remus_device_subkind_ops; + +struct libxl__remus_device_subkind_ops { + /* the device kind this ops belongs to... */ + libxl__remus_device_kind kind; + + /* + * init() and destroy() APIs are produced by a device subkind and + * consumed by the main remus code, a device subkind must implement + * these two APIs. + * the APIs init/destroy device subkind's private data which stored + * in CTX. must implement. + */ + int (*init)(libxl__remus_device_state *rds); + void (*destroy)(libxl__remus_device_state *rds); + + /* + * checkpoint callbacks, these are async ops, call dev->callback + * when done. These function pointers may be NULL, means the op is + * not implemented, and it will do nothing when checkpoint. + * The callers of these APIs must check the function pointer first. + * These callbacks can be implemented synchronously, call + * dev->callback at last directly. + */ + void (*postsuspend)(libxl__remus_device *dev); + void (*preresume)(libxl__remus_device *dev); + void (*commit)(libxl__remus_device *dev); + + /* + * This API determines whether the subkind matchs the specific device. In + * the implementation, we first init all device subkind, for example, NIC, + * DRBD disk... Then we will find out the libxl devices, and match the + * device with the subkind, if the device is a drbd disk, then it will be + * matched with DRBD subkind, and the further ops(such as checkpoint etc.) + * of this device will using DRBD subkind ops. This API is mainly for + * disks, because we must use an external script to determine whether a + * libxl_disk is a DRBD disk. + * This function pointer may be NULL. That means this *kind* of + * device's ops is always matched with the *kind* of device. + * It's an async op and must be implemented asynchronously, + * call dev->callback when done. + */ + void (*match)(libxl__remus_device *dev); + + /* + * setup() and teardown() are refer to the actual remus device, + * a device subkind must implement these two APIs. They are async + * ops, and call dev->callback when done. + * These callbacks can be implemented synchronously, call + * dev->callback at last directly. + */ + void (*setup)(libxl__remus_device *dev); + void (*teardown)(libxl__remus_device *dev); +}; + +typedef void libxl__remus_callback(libxl__egc *, + libxl__remus_device_state *, int rc); + +/* + * This structure is for remus device layer, it records remus devices + * that have been set up. + */ +struct libxl__remus_device_state { + /* must set by caller of libxl__remus_device_(setup|teardown) */ + libxl__ao *ao; + libxl__egc *egc; + uint32_t domid; + libxl__remus_callback *callback; + /* the last ops must be NULL */ + const libxl__remus_device_subkind_ops **ops; + + /* private */ + /* devices that have been set up */ + int saved_rc; + libxl__remus_device **dev; + + int num_nics; + int num_disks; + + /* for counting devices that have been handled */ + int num_devices; + /* for counting devices that have been set up */ + int num_set_up; +}; + +typedef void libxl__remus_device_callback(libxl__egc *, + libxl__remus_device *, + int rc); +/* + * This structure is init and setup by remus device abstruct layer, + * and pass to remus device ops + */ +struct libxl__remus_device { + /*----- shared between abstract and concrete layers -----*/ + /* set by remus device abstruct layer */ + /* libxl__device_* which this remus device related to */ + const void *backend_dev; + libxl__remus_device_kind kind; + + /*----- private for abstract layer only -----*/ + /* + * we must go through all device ops until we find a matched ops + * for the device. + */ + int ops_index; + const libxl__remus_device_subkind_ops *ops; + libxl__remus_device_callback *callback; + libxl__remus_device_state *rds; + + /*----- private for concrete (device-specific) layer -----*/ + /* *kind* of device's private data */ + void *data; + /* for calling scripts, eg. setup|teardown|match scripts */ + libxl__async_exec_state aes; + /* + * for async func calls, in the implementation of device ops, we + * may use fork to do async ops. this is owned by device-specific + * ops methods + */ + libxl__ev_child child; +}; + +/* the following 5 APIs are async ops, call rds->callback when done */ +_hidden void libxl__remus_devices_setup(libxl__egc *egc, + libxl__remus_device_state *rds); +_hidden void libxl__remus_devices_teardown(libxl__egc *egc, + libxl__remus_device_state *rds); +_hidden void libxl__remus_devices_postsuspend(libxl__egc *egc, + libxl__remus_device_state *rds); +_hidden void libxl__remus_devices_preresume(libxl__egc *egc, + libxl__remus_device_state *rds); +_hidden void libxl__remus_devices_commit(libxl__egc *egc, + libxl__remus_device_state *rds); _hidden int libxl__netbuffer_enabled(libxl__gc *gc); /*----- Domain suspend (save) state structure -----*/ @@ -2510,6 +2679,8 @@ struct libxl__domain_suspend_state { libxl__ev_xswatch guest_watch; libxl__ev_time guest_timeout; const char *dm_savefile; + libxl__remus_device_state rds; + libxl__ev_time checkpoint_timeout; /* used for Remus checkpoint */ int interval; /* checkpoint interval (for Remus) */ libxl__save_helper_state shs; libxl__logdirty_switch logdirty; diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c new file mode 100644 index 0000000..14cca79 --- /dev/null +++ b/tools/libxl/libxl_remus_device.c @@ -0,0 +1,339 @@ +/* + * Copyright (C) 2014 + * Author: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +/*----- helper functions -----*/ + +static int init_device_subkind(libxl__remus_device_state *rds) +{ + int rc; + const libxl__remus_device_subkind_ops **ops; + + for (ops = rds->ops; *ops; ops++) { + rc = (*ops)->init(rds); + if (rc) { + goto out; + } + } + + rc = 0; +out: + return rc; + +} + +static void destroy_device_subkind(libxl__remus_device_state *rds) +{ + const libxl__remus_device_subkind_ops **ops; + + for (ops = rds->ops; *ops; ops++) + (*ops)->destroy(rds); +} + +static bool all_devices_handled(libxl__remus_device_state *rds) +{ + return rds->num_devices == (rds->num_nics + rds->num_disks); +} + +/*----- setup() and teardown() -----*/ + +/* callbacks */ + +static void device_match_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc); +static void device_setup_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc); +static void device_teardown_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc); + +/* remus device setup and teardown */ + +static __attribute__((unused)) void libxl__remus_device_init(libxl__egc *egc, + libxl__remus_device_state *rds, + libxl__remus_device_kind kind, + void *libxl_dev); +void libxl__remus_devices_setup(libxl__egc *egc, libxl__remus_device_state *rds) +{ + STATE_AO_GC(rds->ao); + + if (!rds->ops[0]) + goto out; + + rds->saved_rc = init_device_subkind(rds); + if (rds->saved_rc) + goto out; + + rds->num_devices = 0; + rds->num_nics = 0; + rds->num_disks = 0; + + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ + + if (rds->num_nics == 0 && rds->num_disks == 0) + goto out; + + GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks); + + /* TBD: CALL libxl__remus_device_init to init remus devices */ + + return; + +out: + rds->callback(egc, rds, rds->saved_rc); + return; +} + +static __attribute__((unused)) void libxl__remus_device_init(libxl__egc *egc, + libxl__remus_device_state *rds, + libxl__remus_device_kind kind, + void *libxl_dev) +{ + libxl__remus_device *dev = NULL; + + STATE_AO_GC(rds->ao); + GCNEW(dev); + dev->backend_dev = libxl_dev; + dev->kind = kind; + dev->rds = rds; + + libxl__async_exec_init(&dev->aes); + libxl__ev_child_init(&dev->child); + + /* match the ops begin */ + dev->ops_index = 0; + dev->ops = rds->ops[dev->ops_index]; + for (; dev->ops; dev->ops = rds->ops[++dev->ops_index]) { + if (dev->ops->kind == dev->kind) { + if (dev->ops->match) { + dev->callback = device_match_cb; + dev->ops->match(dev); + } else { + /* + * This devops do not have match() implementation. + * That means this *kind* of device's ops is always + * matched with the *kind* of device. + */ + dev->callback = device_setup_cb; + dev->ops->setup(dev); + } + break; + } + } + + if (!dev->ops) { + rds->num_devices++; + rds->saved_rc = ERROR_REMUS_DEVICE_NOT_SUPPORTED; + if (all_devices_handled(rds)) + rds->callback(egc, rds, rds->saved_rc); + } +} + +static void device_match_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + libxl__remus_device_state *const rds = dev->rds; + + STATE_AO_GC(rds->ao); + + if (rds->saved_rc) { + /* there's already an error happened, we do not need to continue */ + rds->num_devices++; + if (all_devices_handled(rds)) + rds->callback(egc, rds, rds->saved_rc); + return; + } + + if (rc) { + /* the ops does not match, try next ops */ + dev->ops = rds->ops[++dev->ops_index]; + if (!dev->ops || rc != ERROR_REMUS_DEVOPS_NOT_MATCH) { + /* the device can not be matched */ + rds->num_devices++; + rds->saved_rc = ERROR_REMUS_DEVICE_NOT_SUPPORTED; + if (all_devices_handled(rds)) + rds->callback(egc, rds, rds->saved_rc); + return; + } + for ( ; dev->ops; dev->ops = rds->ops[++dev->ops_index]) { + if (dev->ops->kind == dev->kind) { + /* + * we have entered match process, that means this *kind* of + * device's ops must have a match() implementation. + */ + assert(dev->ops->match); + dev->ops->match(dev); + break; + } + } + } else { + /* the ops matched, setup the device */ + dev->callback = device_setup_cb; + dev->ops->setup(dev); + } +} + +static void device_setup_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + /* Convenience aliases */ + libxl__remus_device_state *const rds = dev->rds; + + STATE_AO_GC(rds->ao); + + rds->num_devices++; + /* + * the netbuf script was designed as below: + * 1. when setup failed, the script won't teardown the device itself. + * 2. the teardown op is ok to be executed many times. + * + * we add devices that have been set up to the array no matter + * the setup process succeed or failed because we need to ensure + * the device been teardown while setup failed. If any of the + * device setup failed, we will quit remus, but before we exit, + * we will teardown the devices that have been added to **dev + */ + rds->dev[rds->num_set_up++] = dev; + /* we preserve the first error that happened */ + if (rc && !rds->saved_rc) + rds->saved_rc = rc; + + if (all_devices_handled(rds)) + rds->callback(egc, rds, rds->saved_rc); +} + +void libxl__remus_devices_teardown(libxl__egc *egc, libxl__remus_device_state *rds) +{ + int i, num_set_up; + libxl__remus_device *dev; + + STATE_AO_GC(rds->ao); + + rds->saved_rc = 0; + + if (rds->num_set_up == 0) { + destroy_device_subkind(rds); + goto out; + } + + /* we will decrease rds->num_set_up in the teardown callback */ + num_set_up = rds->num_set_up; + for (i = 0; i < num_set_up; i++) { + dev = rds->dev[i]; + dev->callback = device_teardown_cb; + dev->ops->teardown(dev); + } + + return; + +out: + rds->callback(egc, rds, rds->saved_rc); + return; +} + +static void device_teardown_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + libxl__remus_device_state *const rds = dev->rds; + + STATE_AO_GC(rds->ao); + + /* we preserve the first error that happened */ + if (rc && !rds->saved_rc) + rds->saved_rc = rc; + + /* ignore teardown errors to teardown as many devs as possible*/ + rds->num_set_up--; + + if (rds->num_set_up == 0) { + destroy_device_subkind(rds); + rds->callback(egc, rds, rds->saved_rc); + } +} + +/*----- checkpointing APIs -----*/ + +/* callbacks */ + +static void device_checkpoint_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc); + +/* API implementations */ + +#define define_remus_device_checkpoint_api(api) \ +void libxl__remus_devices_##api(libxl__egc *egc, \ + libxl__remus_device_state *rds) \ +{ \ + int i; \ + libxl__remus_device *dev; \ + \ + STATE_AO_GC(rds->ao); \ + \ + rds->num_devices = 0; \ + rds->saved_rc = 0; \ + \ + if (rds->num_set_up == 0) \ + goto out; \ + \ + for (i = 0; i < rds->num_set_up; i++) { \ + dev = rds->dev[i]; \ + dev->callback = device_checkpoint_cb; \ + if (dev->ops->api) { \ + dev->ops->api(dev); \ + } else { \ + rds->num_devices++; \ + if (rds->num_devices == rds->num_set_up) \ + rds->callback(egc, rds, rds->saved_rc); \ + } \ + } \ + \ + return; \ + \ +out: \ + rds->callback(egc, rds, rds->saved_rc); \ +} + +define_remus_device_checkpoint_api(postsuspend); + +define_remus_device_checkpoint_api(preresume); + +define_remus_device_checkpoint_api(commit); + +static void device_checkpoint_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + /* Convenience aliases */ + libxl__remus_device_state *const rds = dev->rds; + + STATE_AO_GC(rds->ao); + + rds->num_devices++; + + if (rc) + rds->saved_rc = ERROR_FAIL; + + if (rds->num_devices == rds->num_set_up) + rds->callback(egc, rds, rds->saved_rc); +} diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index a412f9c..61d31c1 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -58,6 +58,8 @@ libxl_error = Enumeration("error", [ (-12, "OSEVENT_REG_FAIL"), (-13, "BUFFERFULL"), (-14, "UNKNOWN_CHILD"), + (-15, "REMUS_DEVOPS_NOT_MATCH"), + (-16, "REMUS_DEVICE_NOT_SUPPORTED"), ], value_namespace = "") libxl_domain_type = Enumeration("domain_type", [ -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |