|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v3 19/22] libxl/colo: setup and control disk replication for blktap2 backends
This patch adds the machinery required for protecting a guest's
disk state, when the guest disk uses a blktap2 disk backend.
1. COLO blktap2 disk device: Implements the interfaces required by the
checkpoint abstract device layer. A note about the implementation:
a) setup() is called for each disk attached to the guest.
During setup():
i) perform the sanity check: backend type should be LIBXL_DISK_BACKEND_TAP
and format should be LIBXL_DISK_FORMAT_COLO.
ii) connect to the control socket: /var/run/tap/colo_xxx, xxx is
"host:port"(The character ':/' will be changed to '_').
b) The postsuspend callback() will write "flush" to this socket
c) The commit callback() will wait and read "done" from this socket
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
docs/man/xl.pod.1 | 3 +-
tools/libxl/Makefile | 2 +-
tools/libxl/libxl_colo_save.c | 36 ++++-
tools/libxl/libxl_colo_save_disk_blktap2.c | 214 +++++++++++++++++++++++++++++
tools/libxl/libxl_create.c | 7 +
tools/libxl/libxl_internal.h | 2 +
tools/libxl/libxl_noblktap2.c | 29 ++++
7 files changed, 289 insertions(+), 4 deletions(-)
create mode 100644 tools/libxl/libxl_colo_save_disk_blktap2.c
diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 297cd04..d528e7a 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -434,7 +434,8 @@ N.B: Remus support in xl is still in experimental
(proof-of-concept) phase.
Disk replication support is limited to DRBD disks.
COLO support in xl is still in experimental (proof-of-concept) phase.
- There is no support for network or disk at the moment.
+ There is no support for network at the moment.
+ Disk replication support is limited to blktap2 disks.
B<OPTIONS>
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1c32ae2..b4755c8 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -45,7 +45,7 @@ LIBXLU_LIBS =
LIBXL_OBJS-y = osdeps.o libxl_paths.o libxl_bootloader.o flexarray.o
ifeq ($(LIBXL_BLKTAP),y)
-LIBXL_OBJS-y += libxl_blktap2.o
+LIBXL_OBJS-y += libxl_blktap2.o libxl_colo_save_disk_blktap2.o
else
LIBXL_OBJS-y += libxl_noblktap2.o
endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index 7b76d3f..7d6f269 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -18,10 +18,36 @@
#include "libxl_internal.h"
#include "libxl_colo.h"
+extern const libxl__checkpoint_device_instance_ops
colo_save_device_blktap2_disk;
+
static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+ &colo_save_device_blktap2_disk,
NULL,
};
+/* ================= helper functions ================= */
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* init device subkind-specific state in the libxl ctx */
+ int rc;
+ STATE_AO_GC(cds->ao);
+
+ rc = init_subkind_drbd_disk(cds);
+ if (rc) goto out;
+
+ rc = 0;
+out:
+ return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* cleanup device subkind-specific state in the libxl ctx */
+ STATE_AO_GC(cds->ao);
+
+ cleanup_subkind_blktap2_disk(cds);
+}
+
/* ================= colo: setup save environment ================= */
static void colo_save_setup_done(libxl__egc *egc,
libxl__checkpoint_devices_state *cds,
@@ -48,13 +74,16 @@ void libxl__colo_save_setup(libxl__egc *egc,
libxl__colo_save_state *css)
css->recv_fd = dss->recv_fd;
css->svm_running = false;
- /* TODO: disk/nic support */
- cds->device_kind_flags = 0;
+ /* TODO: nic support */
+ cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_CHECKPOINT_DISK);
cds->ops = colo_ops;
cds->callback = colo_save_setup_done;
cds->ao = ao;
cds->domid = dss->domid;
+ if (init_device_subkind(cds))
+ goto out;
+
libxl__checkpoint_devices_setup(egc, &css->cds);
return;
@@ -92,6 +121,7 @@ static void colo_save_setup_failed(libxl__egc *egc,
LOG(ERROR, "COLO: failed to teardown device after setup failed"
" for guest with domid %u, rc %d", cds->domid, rc);
+ cleanup_device_subkind(cds);
libxl__ao_complete(egc, ao, rc);
}
@@ -122,6 +152,8 @@ static void colo_teardown_done(libxl__egc *egc,
{
libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+ cleanup_device_subkind(cds);
dss->callback(egc, dss, rc);
}
diff --git a/tools/libxl/libxl_colo_save_disk_blktap2.c
b/tools/libxl/libxl_colo_save_disk_blktap2.c
new file mode 100644
index 0000000..19ba6d8
--- /dev/null
+++ b/tools/libxl/libxl_colo_save_disk_blktap2.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+#include <string.h>
+#include <sys/un.h>
+
+#define BLKTAP2_REQUEST "flush"
+#define BLKTAP2_RESPONSE "done"
+#define BLKTAP_CTRL_DIR "/var/run/tap"
+
+typedef struct libxl__colo_blktap2_disk {
+ char *name;
+ char *ctl_socket_path;
+ int fd;
+ libxl__ev_fd ev;
+ libxl__checkpoint_device *dev;
+}libxl__colo_blktap2_disk;
+
+/* ========== init() and cleanup() ========== */
+int init_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds)
+{
+ return 0;
+}
+
+void cleanup_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* ========== setup() and teardown() ========== */
+static int blktap2_control_connect(libxl__gc *gc,
+ libxl__colo_blktap2_disk *blktap2_disk)
+{
+ struct sockaddr_un saddr;
+ int fd, err;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ LOG(ERROR, "cannot creating socket fd");
+ return ERROR_FAIL;
+ }
+
+ memset(&saddr, 0, sizeof(saddr));
+ saddr.sun_family = AF_UNIX;
+ strcpy(saddr.sun_path, blktap2_disk->ctl_socket_path);
+
+ err = connect(fd, (const struct sockaddr *)&saddr, sizeof(saddr));
+ if (err) {
+ LOG(ERROR, "cannot connecte to %s", blktap2_disk->ctl_socket_path);
+ close(fd);
+ return ERROR_FAIL;
+ }
+
+ blktap2_disk->fd = fd;
+ return 0;
+}
+
+static void blktap2_colo_setup(libxl__checkpoint_device *dev)
+{
+ const libxl_device_disk *disk = dev->backend_dev;
+ libxl__colo_blktap2_disk *blktap2_disk;
+ int rc;
+ char *type;
+ int i, l;
+
+ STATE_AO_GC(dev->cds->ao);
+
+ if (disk->backend != LIBXL_DISK_BACKEND_TAP ||
+ disk->format != LIBXL_DISK_FORMAT_COLO) {
+ rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+ goto out;
+ }
+
+ dev->matched = 1;
+ GCNEW(blktap2_disk);
+ dev->concrete_data = blktap2_disk;
+ blktap2_disk->fd = -1;
+ blktap2_disk->dev = dev;
+
+ type = strchr(disk->pdev_path, '|');
+ if (!type) {
+ LOG(ERROR, "unexpected pdev_path: %s", disk->pdev_path);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+ blktap2_disk->name = libxl__strndup(gc, disk->pdev_path,
+ type - disk->pdev_path);
+ blktap2_disk->ctl_socket_path = libxl__sprintf(gc, "%s/colo_%s",
+ BLKTAP_CTRL_DIR,
+ blktap2_disk->name);
+ /* scrub socket pathname */
+ l = strlen(blktap2_disk->ctl_socket_path);
+ for (i = strlen(BLKTAP_CTRL_DIR) + 1; i < l; i++) {
+ if (strchr(":/", blktap2_disk->ctl_socket_path[i]))
+ blktap2_disk->ctl_socket_path[i] = '_';
+ }
+
+ libxl__ev_fd_init(&blktap2_disk->ev);
+
+ rc = blktap2_control_connect(gc, blktap2_disk);
+
+out:
+ dev->aodev.rc = rc;
+ dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+static void blktap2_colo_teardown(libxl__checkpoint_device *dev)
+{
+ libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data;
+
+ if (blktap2_disk->fd > 0) {
+ close(blktap2_disk->fd);
+ blktap2_disk->fd = -1;
+ }
+
+ dev->aodev.rc = 0;
+ dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+/* ========== checkpointing APIs ========== */
+static void blktap2_control_readable(libxl__egc *egc, libxl__ev_fd *ev,
+ int fd, short events, short revents);
+
+static void blktap2_colo_postsuspend(libxl__checkpoint_device *dev)
+{
+ int ret;
+ libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data;
+ int rc = 0;
+
+ /* unit socket fd, so not block */
+ ret = write(blktap2_disk->fd, BLKTAP2_REQUEST, strlen(BLKTAP2_REQUEST));
+ if (ret < strlen(BLKTAP2_REQUEST))
+ rc = ERROR_FAIL;
+
+ dev->aodev.rc = rc;
+ dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+static void blktap2_colo_commit(libxl__checkpoint_device *dev)
+{
+ libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data;
+ int rc;
+
+ /* Convenience aliases */
+ const int fd = blktap2_disk->fd;
+ libxl__ev_fd *const ev = &blktap2_disk->ev;
+
+ STATE_AO_GC(dev->cds->ao);
+
+ rc = libxl__ev_fd_register(gc, ev, blktap2_control_readable, fd, POLLIN);
+ if (rc) {
+ dev->aodev.rc = rc;
+ dev->aodev.callback(dev->cds->egc, &dev->aodev);
+ }
+}
+
+static void blktap2_control_readable(libxl__egc *egc, libxl__ev_fd *ev,
+ int fd, short events, short revents)
+{
+ libxl__colo_blktap2_disk *blktap2_disk =
+ CONTAINER_OF(ev, *blktap2_disk, ev);
+ int rc = 0, ret;
+ char response[5];
+
+ /* Convenience aliases */
+ libxl__checkpoint_device *const dev = blktap2_disk->dev;
+
+ EGC_GC;
+
+ libxl__ev_fd_deregister(gc, ev);
+
+ if (revents & ~POLLIN) {
+ LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)", revents);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ ret = read(blktap2_disk->fd, response, sizeof(response) - 1);
+ if (ret < sizeof(response) - 1) {
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ response[4] = '\0';
+ if (strcmp(response, BLKTAP2_RESPONSE))
+ rc = ERROR_FAIL;
+
+out:
+ dev->aodev.rc = rc;
+ dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+const libxl__checkpoint_device_instance_ops colo_save_device_blktap2_disk = {
+ .kind = LIBXL__DEVICE_KIND_CHECKPOINT_DISK,
+ .setup = blktap2_colo_setup,
+ .teardown = blktap2_colo_teardown,
+ .postsuspend = blktap2_colo_postsuspend,
+ .commit = blktap2_colo_commit,
+};
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index ec5946d..b3a2f33 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -854,6 +854,13 @@ static void initiate_domain_create(libxl__egc *egc,
for (i = 0; i < d_config->num_disks; i++) {
ret = libxl__device_disk_setdefault(gc, &d_config->disks[i]);
if (ret) goto error_out;
+
+ /* TODO: cleanup it when destroying the domain */
+ if (d_config->disks[i].backend == LIBXL_DISK_BACKEND_TAP &&
+ (d_config->disks[i].format == LIBXL_DISK_FORMAT_REMUS ||
+ d_config->disks[i].format == LIBXL_DISK_FORMAT_COLO))
+ libxl__blktap_devpath(gc, d_config->disks[i].pdev_path,
+ d_config->disks[i].format);
}
dcs->bl.ao = ao;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 1cc90fb..120b389 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2606,6 +2606,8 @@ int init_subkind_nic(libxl__checkpoint_devices_state
*cds);
void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds);
int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
+int init_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds);
+void cleanup_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds);
typedef void libxl__checkpoint_callback(libxl__egc *,
libxl__checkpoint_devices_state *,
diff --git a/tools/libxl/libxl_noblktap2.c b/tools/libxl/libxl_noblktap2.c
index 38696ec..b66ab95 100644
--- a/tools/libxl/libxl_noblktap2.c
+++ b/tools/libxl/libxl_noblktap2.c
@@ -39,6 +39,35 @@ libxl_disk_format libxl__blktap_get_real_format(const char
*disk,
return format;
}
+static int blktap2_colo_init(libxl__checkpoint_device *cds)
+{
+ return 0;
+}
+
+static void blktap2_colo_cleanup(libxl__checkpoint_device *cds)
+{
+ return;
+}
+
+static void blktap2_colo_setup(libxl__checkpoint_device *cds)
+{
+ dev->aodev.rc = ERROR_FAIL;
+ dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+static void blktap2_colo_teardown(libxl__checkpoint_device *cds)
+{
+ return;
+}
+
+const libxl__checkpoint_device_instance_ops colo_save_device_blktap2_disk = {
+ .kind = LIBXL__CHECKPOINT_DEVICE_DISK,
+ .init = blktap2_colo_init,
+ .cleanup = blktap2_colo_cleanup,
+ .setup = blktap2_colo_setup,
+ .teardown = blktap2_colo_teardown,
+};
+
/*
* Local variables:
* mode: C
--
1.9.3
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |