[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v19 06/12] libxl/remus: setup and control disk replication for DRBD backends



This patch adds the machinery required for protecting a guest's
disk state, when the guest disk uses a DRBD disk backend.
This patch comprises of two parts:

1. Hotplug scripts: The block-drbd-probe script is responsible for
  performing sanity checks on the state of the DRBD disk before the
  checkpointing process begins. This script should be invoked by
  libxl for each of the guest's disk devices, when starting Remus.

2. Remus drbd disk device: Implements the interfaces required by the
   remus abstract device layer. A note about the implementation:

   a) setup() is called for each disk attached to the guest.
      During setup():
      i) The hotplug script is called to perform the sanity check.

      ii) Libxl obtains a handle to the DRBD device (/dev/drbd*) and
          and subsequently controls disk checkpoint replication using
          this handle in the checkpoint callbacks.

   c) The preresume() checkpoint callback is executed asynchronously
      using libxl__ev_child_fork(), as it may potentially block for more
      than few seconds in case of backup failure.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>

Edits to commit message:
Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
---
 tools/hotplug/Linux/Makefile         |   1 +
 tools/hotplug/Linux/block-drbd-probe |  85 ++++++++++++
 tools/libxl/Makefile                 |   2 +-
 tools/libxl/libxl.c                  |   1 +
 tools/libxl/libxl_internal.h         |   5 +
 tools/libxl/libxl_remus_device.c     |   7 +
 tools/libxl/libxl_remus_disk_drbd.c  | 257 +++++++++++++++++++++++++++++++++++
 7 files changed, 357 insertions(+), 1 deletion(-)
 create mode 100755 tools/hotplug/Linux/block-drbd-probe
 create mode 100644 tools/libxl/libxl_remus_disk_drbd.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index 31e57f7..5317fef 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -24,6 +24,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
 XEN_SCRIPTS += external-device-migrate
 XEN_SCRIPTS += vscsi
 XEN_SCRIPTS += block-iscsi
+XEN_SCRIPTS += block-drbd-probe
 XEN_SCRIPTS += $(XEN_SCRIPTS-y)
 
 SUBDIRS-$(CONFIG_SYSTEMD) += systemd
diff --git a/tools/hotplug/Linux/block-drbd-probe 
b/tools/hotplug/Linux/block-drbd-probe
new file mode 100755
index 0000000..3a3d446
--- /dev/null
+++ b/tools/hotplug/Linux/block-drbd-probe
@@ -0,0 +1,85 @@
+#! /bin/bash
+#
+# Copyright (C) 2014 FUJITSU LIMITED
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Usage:
+#     block-drbd-probe devicename
+#
+# Return value:
+#     0: the device is drbd device
+#     1: the device is not drbd device
+#     2: unkown error
+#     3: the drbd device does not use protocol D
+#     4: the drbd device is not ready
+
+drbd_res=
+
+function get_res_name()
+{
+    local drbd_dev=$1
+    local drbd_dev_list=($(drbdadm sh-dev all))
+    local drbd_res_list=($(drbdadm sh-resource all))
+    local temp_drbd_dev temp_drbd_res
+    local found=0
+
+    for temp_drbd_dev in ${drbd_dev_list[@]}; do
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            found=1
+            break
+        fi
+    done
+
+    if [[ $found -eq 0 ]]; then
+        return 1
+    fi
+
+    for temp_drbd_res in ${drbd_res_list[@]}; do
+        temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            drbd_res="$temp_drbd_res"
+            return 0
+        fi
+    done
+
+    # OOPS
+    return 2
+}
+
+get_res_name $1
+rc=$?
+if [[ $rc -ne 0 ]]; then
+    exit $rc
+fi
+
+# check protocol
+drbdsetup $1 show | grep -q "protocol D;"
+if [[ $? -ne 0 ]]; then
+    exit 3
+fi
+
+# check connect status
+state=$(drbdadm cstate "$drbd_res")
+if [[ "$state" != "Connected" ]]; then
+    exit 4
+fi
+
+# check role
+role=$(drbdadm role "$drbd_res")
+if [[ "$role" != "Primary/Secondary" ]]; then
+    exit 4
+fi
+
+exit 0
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 202f1bb..ba10ab7 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus_device.o
+LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 3e23fe6..b075ca7 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -825,6 +825,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
         goto out;
     }
     rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_REMUS_NIC);
+    rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_REMUS_DISK);
 
     rds->ao = ao;
     rds->egc = egc;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index dbe8d5f..eefd519 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2561,6 +2561,8 @@ struct libxl__remus_device_instance_ops {
 
 int init_subkind_nic(libxl__remus_devices_state *rds);
 void cleanup_subkind_nic(libxl__remus_devices_state *rds);
+int init_subkind_drbd_disk(libxl__remus_devices_state *rds);
+void cleanup_subkind_drbd_disk(libxl__remus_devices_state *rds);
 
 typedef void libxl__remus_callback(libxl__egc *,
                                    libxl__remus_devices_state *, int rc);
@@ -2604,6 +2606,9 @@ struct libxl__remus_devices_state {
     char *netbufscript;
     struct nl_sock *nlsock;
     struct nl_cache *qdisc_cache;
+
+    /* private for drbd disk subkind ops */
+    char *drbd_probe_script;
 };
 
 /*
diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c
index 9edcc94..23ad36b 100644
--- a/tools/libxl/libxl_remus_device.c
+++ b/tools/libxl/libxl_remus_device.c
@@ -18,8 +18,10 @@
 #include "libxl_internal.h"
 
 extern const libxl__remus_device_instance_ops remus_device_nic;
+extern const libxl__remus_device_instance_ops remus_device_drbd_disk;
 static const libxl__remus_device_instance_ops *remus_ops[] = {
     &remus_device_nic,
+    &remus_device_drbd_disk,
     NULL,
 };
 
@@ -36,6 +38,9 @@ static int init_device_subkind(libxl__remus_devices_state 
*rds)
         if (rc) goto out;
     }
 
+    rc = init_subkind_drbd_disk(rds);
+    if (rc) goto out;
+
     rc = 0;
 out:
     return rc;
@@ -48,6 +53,8 @@ static void cleanup_device_subkind(libxl__remus_devices_state 
*rds)
 
     if (libxl__netbuffer_enabled(gc))
         cleanup_subkind_nic(rds);
+
+    cleanup_subkind_drbd_disk(rds);
 }
 
 /*----- setup() and teardown() -----*/
diff --git a/tools/libxl/libxl_remus_disk_drbd.c 
b/tools/libxl/libxl_remus_disk_drbd.c
new file mode 100644
index 0000000..1be08bb
--- /dev/null
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*** drbd implementation ***/
+const int DRBD_SEND_CHECKPOINT = 20;
+const int DRBD_WAIT_CHECKPOINT_ACK = 30;
+
+typedef struct libxl__remus_drbd_disk {
+    int ctl_fd;
+    int ackwait;
+} libxl__remus_drbd_disk;
+
+int init_subkind_drbd_disk(libxl__remus_devices_state *rds)
+{
+    STATE_AO_GC(rds->ao);
+
+    rds->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe",
+                                       libxl__xen_script_dir_path());
+
+    return 0;
+}
+
+void cleanup_subkind_drbd_disk(libxl__remus_devices_state *rds)
+{
+    return;
+}
+
+/*----- helper functions, for async calls -----*/
+static void drbd_async_call(libxl__remus_device *dev,
+                            void func(libxl__remus_device *),
+                            libxl__ev_child_callback callback)
+{
+    int pid = -1, rc;
+    libxl__ao_device *aodev = &dev->aodev;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Fork and call */
+    pid = libxl__ev_child_fork(gc, &aodev->child, callback);
+    if (pid == -1) {
+        LOG(ERROR, "unable to fork");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    if (!pid) {
+        /* child */
+        func(dev);
+        /* notreached */
+        abort();
+    }
+
+    return;
+
+out:
+    aodev->rc = rc;
+    aodev->callback(dev->rds->egc, aodev);
+}
+
+/*----- match(), setup() and teardown() -----*/
+
+/* callbacks */
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status);
+
+/* implementations */
+
+static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev);
+
+static void drbd_setup(libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    match_async_exec(dev->rds->egc, dev);
+}
+
+static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev)
+{
+    int arraysize, nr = 0, rc;
+    const libxl_device_disk *disk = dev->backend_dev;
+    libxl__async_exec_state *aes = &dev->aodev.aes;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* setup env & args */
+    arraysize = 1;
+    GCNEW_ARRAY(aes->env, arraysize);
+    aes->env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3;
+    nr = 0;
+    GCNEW_ARRAY(aes->args, arraysize);
+    aes->args[nr++] = dev->rds->drbd_probe_script;
+    aes->args[nr++] = disk->pdev_path;
+    aes->args[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    aes->ao = dev->rds->ao;
+    aes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]);
+    aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
+    aes->callback = match_async_exec_cb;
+    aes->stdfds[0] = -1;
+    aes->stdfds[1] = -1;
+    aes->stdfds[2] = -1;
+
+    rc = libxl__async_exec_start(gc, aes);
+    if (rc)
+        goto out;
+
+    return;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status)
+{
+    int rc;
+    libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
+    libxl__remus_device *dev = CONTAINER_OF(aodev, *dev, aodev);
+    libxl__remus_drbd_disk *drbd_disk;
+    const libxl_device_disk *disk = dev->backend_dev;
+
+    STATE_AO_GC(aodev->ao);
+
+    if (status) {
+        rc = ERROR_REMUS_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    /* ops matched */
+    dev->matched = true;
+
+    GCNEW(drbd_disk);
+    dev->concrete_data = drbd_disk;
+    drbd_disk->ackwait = 0;
+    drbd_disk->ctl_fd = open(disk->pdev_path, O_RDONLY);
+    if (drbd_disk->ctl_fd < 0) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = 0;
+
+out:
+    aodev->rc = rc;
+    aodev->callback(egc, aodev);
+}
+
+static void drbd_teardown(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk = dev->concrete_data;
+    STATE_AO_GC(dev->rds->ao);
+
+    close(drbd_disk->ctl_fd);
+    dev->aodev.rc = 0;
+    dev->aodev.callback(dev->rds->egc, &dev->aodev);
+}
+
+/*----- checkpointing APIs -----*/
+
+/* callbacks */
+static void checkpoint_async_call_done(libxl__egc *egc,
+                                       libxl__ev_child *child,
+                                       pid_t pid, int status);
+
+/* API implementations */
+
+/* this op will not wait and block, so implement as sync op */
+static void drbd_postsuspend(libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+
+    if (!rdd->ackwait) {
+        if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
+            rdd->ackwait = 1;
+    }
+
+    dev->aodev.rc = 0;
+    dev->aodev.callback(dev->rds->egc, &dev->aodev);
+}
+
+
+static void drbd_preresume_async(libxl__remus_device *dev);
+
+static void drbd_preresume(libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    drbd_async_call(dev, drbd_preresume_async, checkpoint_async_call_done);
+}
+
+static void drbd_preresume_async(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+    int ackwait = rdd->ackwait;
+
+    if (ackwait) {
+        ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
+        ackwait = 0;
+    }
+
+    _exit(ackwait);
+}
+
+static void checkpoint_async_call_done(libxl__egc *egc,
+                                       libxl__ev_child *child,
+                                       pid_t pid, int status)
+{
+    int rc;
+    libxl__ao_device *aodev = CONTAINER_OF(child, *aodev, child);
+    libxl__remus_device *dev = CONTAINER_OF(aodev, *dev, aodev);
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+
+    STATE_AO_GC(aodev->ao);
+
+    if (!WIFEXITED(status)) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rdd->ackwait = WEXITSTATUS(status);
+    rc = 0;
+
+out:
+    aodev->rc = rc;
+    aodev->callback(egc, aodev);
+}
+
+const libxl__remus_device_instance_ops remus_device_drbd_disk = {
+    .kind = LIBXL__DEVICE_KIND_REMUS_DISK,
+    .setup = drbd_setup,
+    .teardown = drbd_teardown,
+    .postsuspend = drbd_postsuspend,
+    .preresume = drbd_preresume,
+};
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.