[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v18 06/11] libxl/remus: setup and control disk replication for DRBD backends



This patch adds the machinery required for protecting a guest's
disk state, when the guest disk uses a DRBD disk backend.
This patch comprises of two parts:

1. Hotplug scripts: The block-drbd-probe script is responsible for
  performing sanity checks on the state of the DRBD disk before the
  checkpointing process begins. This script should be invoked by
  libxl for each of the guest's disk devices, when starting Remus.

2. Remus drbd disk device: Implements the interfaces required by the
   remus abstract device layer. A note about the implementation:

   a) setup() is called for each disk attached to the guest.
      During setup():
      i) The hotplug script is called to perform the sanity check.

      ii) Libxl obtains a handle to the DRBD device (/dev/drbd*) and
          and subsequently controls disk checkpoint replication using
          this handle in the checkpoint callbacks.

   c) The preresume() checkpoint callback is executed asynchronously
      using libxl__ev_child_fork(), as it may potentially block for more
      than few seconds in case of backup failure.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>

Edits to commit message:
Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
---
 tools/hotplug/Linux/Makefile         |   1 +
 tools/hotplug/Linux/block-drbd-probe |  85 ++++++++++++
 tools/libxl/Makefile                 |   2 +-
 tools/libxl/libxl.c                  |   1 +
 tools/libxl/libxl_internal.h         |   3 +
 tools/libxl/libxl_remus_device.c     |   2 +
 tools/libxl/libxl_remus_disk_drbd.c  | 260 +++++++++++++++++++++++++++++++++++
 7 files changed, 353 insertions(+), 1 deletion(-)
 create mode 100755 tools/hotplug/Linux/block-drbd-probe
 create mode 100644 tools/libxl/libxl_remus_disk_drbd.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index 721f8c0..15d1b37 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -24,6 +24,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
 XEN_SCRIPTS += external-device-migrate
 XEN_SCRIPTS += vscsi
 XEN_SCRIPTS += block-iscsi
+XEN_SCRIPTS += block-drbd-probe
 XEN_SCRIPTS += $(XEN_SCRIPTS-y)
 
 XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
diff --git a/tools/hotplug/Linux/block-drbd-probe 
b/tools/hotplug/Linux/block-drbd-probe
new file mode 100755
index 0000000..3a3d446
--- /dev/null
+++ b/tools/hotplug/Linux/block-drbd-probe
@@ -0,0 +1,85 @@
+#! /bin/bash
+#
+# Copyright (C) 2014 FUJITSU LIMITED
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Usage:
+#     block-drbd-probe devicename
+#
+# Return value:
+#     0: the device is drbd device
+#     1: the device is not drbd device
+#     2: unkown error
+#     3: the drbd device does not use protocol D
+#     4: the drbd device is not ready
+
+drbd_res=
+
+function get_res_name()
+{
+    local drbd_dev=$1
+    local drbd_dev_list=($(drbdadm sh-dev all))
+    local drbd_res_list=($(drbdadm sh-resource all))
+    local temp_drbd_dev temp_drbd_res
+    local found=0
+
+    for temp_drbd_dev in ${drbd_dev_list[@]}; do
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            found=1
+            break
+        fi
+    done
+
+    if [[ $found -eq 0 ]]; then
+        return 1
+    fi
+
+    for temp_drbd_res in ${drbd_res_list[@]}; do
+        temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            drbd_res="$temp_drbd_res"
+            return 0
+        fi
+    done
+
+    # OOPS
+    return 2
+}
+
+get_res_name $1
+rc=$?
+if [[ $rc -ne 0 ]]; then
+    exit $rc
+fi
+
+# check protocol
+drbdsetup $1 show | grep -q "protocol D;"
+if [[ $? -ne 0 ]]; then
+    exit 3
+fi
+
+# check connect status
+state=$(drbdadm cstate "$drbd_res")
+if [[ "$state" != "Connected" ]]; then
+    exit 4
+fi
+
+# check role
+role=$(drbdadm role "$drbd_res")
+if [[ "$role" != "Primary/Secondary" ]]; then
+    exit 4
+fi
+
+exit 0
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 202f1bb..ba10ab7 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus_device.o
+LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 191469b..021d77c 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -824,6 +824,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
         goto out;
     }
     rds->device_kind_flags |= LIBXL__REMUS_DEVICE_NIC;
+    rds->device_kind_flags |= LIBXL__REMUS_DEVICE_DISK;
 
     rds->ao = ao;
     rds->egc = egc;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index c6f1411..e631eaf 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2613,6 +2613,9 @@ struct libxl__remus_devices_state {
     char *netbufscript;
     struct nl_sock *nlsock;
     struct nl_cache *qdisc_cache;
+
+    /* private for drbd disk subkind ops */
+    char *drbd_probe_script;
 };
 
 /*
diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c
index e9b0e20..b19c372 100644
--- a/tools/libxl/libxl_remus_device.c
+++ b/tools/libxl/libxl_remus_device.c
@@ -18,8 +18,10 @@
 #include "libxl_internal.h"
 
 extern const libxl__remus_device_subkind_ops remus_device_nic;
+extern const libxl__remus_device_subkind_ops remus_device_drbd_disk;
 static const libxl__remus_device_subkind_ops *remus_ops[] = {
     &remus_device_nic,
+    &remus_device_drbd_disk,
     NULL,
 };
 
diff --git a/tools/libxl/libxl_remus_disk_drbd.c 
b/tools/libxl/libxl_remus_disk_drbd.c
new file mode 100644
index 0000000..59db54f
--- /dev/null
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*** drbd implementation ***/
+const int DRBD_SEND_CHECKPOINT = 20;
+const int DRBD_WAIT_CHECKPOINT_ACK = 30;
+
+typedef struct libxl__remus_drbd_disk {
+    int ctl_fd;
+    int ackwait;
+} libxl__remus_drbd_disk;
+
+/*----- helper functions, for async calls -----*/
+static void drbd_async_call(libxl__remus_device *dev,
+                            void func(libxl__remus_device *),
+                            libxl__ev_child_callback callback)
+{
+    int pid = -1, rc;
+    libxl__ao_device *aodev = &dev->aodev;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Fork and call */
+    pid = libxl__ev_child_fork(gc, &aodev->child, callback);
+    if (pid == -1) {
+        LOG(ERROR, "unable to fork");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    if (!pid) {
+        /* child */
+        func(dev);
+        /* notreached */
+        abort();
+    }
+
+    return;
+
+out:
+    aodev->rc = rc;
+    aodev->callback(dev->rds->egc, aodev);
+}
+
+/*----- init() and cleanup() -----*/
+static int drbd_init(libxl__remus_devices_state *rds)
+{
+    STATE_AO_GC(rds->ao);
+
+    rds->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe",
+                                       libxl__xen_script_dir_path());
+
+    return 0;
+}
+
+static void drbd_cleanup(libxl__remus_devices_state *rds)
+{
+    return;
+}
+
+/*----- match(), setup() and teardown() -----*/
+
+/* callbacks */
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status);
+
+/* implementations */
+
+static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev);
+
+static void drbd_setup(libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    match_async_exec(dev->rds->egc, dev);
+}
+
+static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev)
+{
+    int arraysize, nr = 0, rc;
+    const libxl_device_disk *disk = dev->backend_dev;
+    libxl__async_exec_state *aes = &dev->aodev.aes;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* setup env & args */
+    arraysize = 1;
+    GCNEW_ARRAY(aes->env, arraysize);
+    aes->env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3;
+    nr = 0;
+    GCNEW_ARRAY(aes->args, arraysize);
+    aes->args[nr++] = dev->rds->drbd_probe_script;
+    aes->args[nr++] = disk->pdev_path;
+    aes->args[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    aes->ao = dev->rds->ao;
+    aes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]);
+    aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
+    aes->callback = match_async_exec_cb;
+    aes->stdfds[0] = -1;
+    aes->stdfds[1] = -1;
+    aes->stdfds[2] = -1;
+
+    rc = libxl__async_exec_start(gc, aes);
+    if (rc)
+        goto out;
+
+    return;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status)
+{
+    int rc;
+    libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
+    libxl__remus_device *dev = CONTAINER_OF(aodev, *dev, aodev);
+    libxl__remus_drbd_disk *drbd_disk;
+    const libxl_device_disk *disk = dev->backend_dev;
+
+    STATE_AO_GC(aodev->ao);
+
+    if (status) {
+        rc = ERROR_REMUS_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    /* ops matched, setup the device */
+    dev->set_up = 1;
+
+    GCNEW(drbd_disk);
+    dev->concrete_data = drbd_disk;
+    drbd_disk->ackwait = 0;
+    drbd_disk->ctl_fd = open(disk->pdev_path, O_RDONLY);
+    if (drbd_disk->ctl_fd < 0) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = 0;
+
+out:
+    aodev->rc = rc;
+    aodev->callback(egc, aodev);
+}
+
+static void drbd_teardown(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk = dev->concrete_data;
+    STATE_AO_GC(dev->rds->ao);
+
+    close(drbd_disk->ctl_fd);
+    dev->aodev.rc = 0;
+    dev->aodev.callback(dev->rds->egc, &dev->aodev);
+}
+
+/*----- checkpointing APIs -----*/
+
+/* callbacks */
+static void chekpoint_async_call_done(libxl__egc *egc,
+                                      libxl__ev_child *child,
+                                      pid_t pid, int status);
+
+/* API implementations */
+
+/* this op will not wait and block, so implement as sync op */
+static void drbd_postsuspend(libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+
+    if (!rdd->ackwait) {
+        if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
+            rdd->ackwait = 1;
+    }
+
+    dev->aodev.rc = 0;
+    dev->aodev.callback(dev->rds->egc, &dev->aodev);
+}
+
+
+static void drbd_preresume_async(libxl__remus_device *dev);
+
+static void drbd_preresume(libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    drbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done);
+}
+
+static void drbd_preresume_async(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+    int ackwait = rdd->ackwait;
+
+    if (ackwait) {
+        ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
+        ackwait = 0;
+    }
+
+    _exit(ackwait);
+}
+
+static void chekpoint_async_call_done(libxl__egc *egc,
+                                      libxl__ev_child *child,
+                                      pid_t pid, int status)
+{
+    int rc;
+    libxl__ao_device *aodev = CONTAINER_OF(child, *aodev, child);
+    libxl__remus_device *dev = CONTAINER_OF(aodev, *dev, aodev);
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+
+    STATE_AO_GC(aodev->ao);
+
+    if (!WIFEXITED(status)) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rdd->ackwait = WEXITSTATUS(status);
+    rc = 0;
+
+out:
+    aodev->rc = rc;
+    aodev->callback(egc, aodev);
+}
+
+const libxl__remus_device_subkind_ops remus_device_drbd_disk = {
+    .kind = LIBXL__REMUS_DEVICE_DISK,
+    .init = drbd_init,
+    .cleanup = drbd_cleanup,
+    .setup = drbd_setup,
+    .teardown = drbd_teardown,
+    .postsuspend = drbd_postsuspend,
+    .preresume = drbd_preresume,
+};
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.