[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v10] remus drbd: Implement remus drbd replicated disk



On Wed, Jun 4, 2014 at 8:39 PM, Yang Hongyang <yanghy@xxxxxxxxxxxxxx> wrote:
Implement remus-drbd-replicated-checkpointing-disk based on
generic remus devices framework.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
---
Âtools/hotplug/Linux/Makefile     |  1 +
Âtools/hotplug/Linux/block-drbd-probe | Â84 ++++++++++
Âtools/libxl/Makefile         |  2 +-
Âtools/libxl/libxl_internal.h     |  1 +
Âtools/libxl/libxl_remus_device.c   | Â23 ++-
Âtools/libxl/libxl_remus_disk_drbd.c Â| 290 +++++++++++++++++++++++++++++++++++
Â6 files changed, 394 insertions(+), 7 deletions(-)
Âcreate mode 100755 tools/hotplug/Linux/block-drbd-probe
Âcreate mode 100644 tools/libxl/libxl_remus_disk_drbd.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index 13e1f5f..5dd8599 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
ÂXEN_SCRIPTS += external-device-migrate
ÂXEN_SCRIPTS += vscsi
ÂXEN_SCRIPTS += block-iscsi
+XEN_SCRIPTS += block-drbd-probe
ÂXEN_SCRIPTS += $(XEN_SCRIPTS-y)

ÂXEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
diff --git a/tools/hotplug/Linux/block-drbd-probe b/tools/hotplug/Linux/block-drbd-probe
new file mode 100755
index 0000000..163ad04
--- /dev/null
+++ b/tools/hotplug/Linux/block-drbd-probe
@@ -0,0 +1,84 @@
+#! /bin/bash
+#
+# Copyright (C) 2014 FUJITSU LIMITED
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Â02111-1307 ÂUSA
+#
+# Usage:
+# Â Â block-drbd-probe devicename
+#
+# Return value:
+# Â Â 0: the device is drbd device
+# Â Â 1: the device is not drbd device
+# Â Â 2: unkown error
+# Â Â 3: the drbd device does not use protocol D
+# Â Â 4: the drbd device is not ready
+
+drbd_res=
+
+function get_res_name()
+{
+ Â Âlocal drbd_dev=$1
+ Â Âlocal drbd_dev_list=($(drbdadm sh-dev all))
+ Â Âlocal drbd_res_list=($(drbdadm sh-resource all))
+ Â Âlocal temp_drbd_dev temp_drbd_res
+ Â Âlocal found=0
+
+ Â Âfor temp_drbd_dev in ${drbd_dev_list[@]}; do
+ Â Â Â Âif [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+ Â Â Â Â Â Âfound=1
+ Â Â Â Â Â Âbreak
+ Â Â Â Âfi
+ Â Âdone
+
+ Â Âif [[ $found -eq 0 ]]; then
+ Â Â Â Âreturn 1
+ Â Âfi
+
+ Â Âfor temp_drbd_res in ${drbd_res_list[@]}; do
+ Â Â Â Âtemp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
+ Â Â Â Âif [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+ Â Â Â Â Â Âdrbd_res="$temp_drbd_res"
+ Â Â Â Â Â Âreturn 0
+ Â Â Â Âfi
+ Â Âdone
+
+ Â Â# OOPS
+ Â Âreturn 2
+}
+
+get_res_name $1
+if [[ $? -ne 0 ]]; then
+ Â Âexit $?
+fi
+
+# check protocol
+drbdsetup $1 show | grep -q "protocol D;"
+if [[ $? -ne 0 ]]; then
+ Â Âexit 3
+fi
+
+# check connect status
+state=$(drbdadm cstate "$drbd_res")
+if [[ "$state" != "Connected" ]]; then
+ Â Âexit 4
+fi
+
+# check role
+role=$(drbdadm role "$drbd_res")
+if [[ "$role" != "Primary/Secondary" ]]; then
+ Â Âexit 4
+fi
+
+exit 0
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 7a722a8..6f4d9b4 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
ÂLIBXL_OBJS-y += libxl_nonetbuffer.o
Âendif

-LIBXL_OBJS-y += libxl_remus_device.o
+LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o

ÂLIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
ÂLIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index f221f97..47a4ab9 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2519,6 +2519,7 @@ struct libxl__remus_device_state {

  Âlibxl_device_nic *nics;
  Âint num_nics;
+ Â Âlibxl_device_disk *disks;
  Âint num_disks;

  Â/* for counting devices that have been handled */
diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c
index 5f07266..040441a 100644
--- a/tools/libxl/libxl_remus_device.c
+++ b/tools/libxl/libxl_remus_device.c
@@ -19,8 +19,10 @@
Â#include "libxl_internal.h"

Âextern libxl__remus_device_ops remus_device_nic;
+extern libxl__remus_device_ops remus_device_drbd_disk;
Âstatic libxl__remus_device_ops *dev_ops[] = {
  Â&remus_device_nic,
+ Â Â&remus_device_drbd_disk,
Â};

Âstatic void device_common_cb(libxl__egc *egc,
@@ -194,6 +196,13 @@ static void device_teardown_cb(libxl__egc *egc,
    Ârds->nics = NULL;
    Ârds->num_nics = 0;

+ Â Â Â Â/* clean disk */
+ Â Â Â Âfor (i = 0; i < rds->num_disks; i++)
+ Â Â Â Â Â Âlibxl_device_disk_dispose(&rds->disks[i]);
+ Â Â Â Âfree(rds->disks);
+ Â Â Â Ârds->disks = NULL;
+ Â Â Â Ârds->num_disks = 0;
+
    Â/* clean device ops */
    Âfor (i = 0; i < ARRAY_SIZE(dev_ops); i++) {
      Âops = dev_ops[i];
@@ -269,15 +278,15 @@ void libxl__remus_device_setup(libxl__egc *egc, libxl__remus_state *rs)
  Ârds->num_nics = 0;
  Ârds->num_disks = 0;

- Â Â/* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
-
  Âif (rs->netbufscript) {
    Ârds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics);
  Â}
+ Â Ârds->disks = libxl_device_disk_list(CTX, rs->domid, &rds->num_disks);

- Â ÂGCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
+ Â Âif (rds->num_nics == 0 && rds->num_disks == 0)
+ Â Â Â Âgoto out;

- Â Â/* TBD: CALL libxl__remus_device_init to init remus devices */
+ Â ÂGCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);

  Âif (rs->netbufscript && rds->nics) {
    Âfor (i = 0; i < rds->num_nics; i++) {
@@ -286,8 +295,10 @@ void libxl__remus_device_setup(libxl__egc *egc, libxl__remus_state *rs)
    Â}
  Â}

- Â Âif (rds->num_nics == 0 && rds->num_disks == 0)
- Â Â Â Âgoto out;
+ Â Âfor (i = 0; i < rds->num_disks; i++) {
+ Â Â Â Âlibxl__remus_device_init(egc, rds,
+ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â LIBXL__REMUS_DEVICE_DISK, &rds->disks[i]);
+ Â Â}

  Âreturn;

diff --git a/tools/libxl/libxl_remus_disk_drbd.c b/tools/libxl/libxl_remus_disk_drbd.c
new file mode 100644
index 0000000..f35a406
--- /dev/null
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*** drbd implementation ***/
+const int DRBD_SEND_CHECKPOINT = 20;
+const int DRBD_WAIT_CHECKPOINT_ACK = 30;
+
+typedef struct libxl__remus_drbd_disk {
+ Â Âlibxl__remus_device remus_dev;
+ Â Âint ctl_fd;
+ Â Âint ackwait;
+ Â Âconst char *path;
+} libxl__remus_drbd_disk;
+
+typedef struct libxl__remus_drbd_state {
+ Â Âlibxl__ao *ao;
+ Â Âchar *drbd_probe_script;
+} libxl__remus_drbd_state;
+
+static void drbd_async_call(libxl__remus_device *dev,
+ Â Â Â Â Â Â Â Â Â Â Â Â Â Âvoid func(libxl__remus_device *),
+ Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__ev_child_callback callback)
+{
+ Â Âint pid = -1;
+ Â ÂSTATE_AO_GC(dev->rds->ao);
+
+ Â Â/* Fork and call */
+ Â Âpid = libxl__ev_child_fork(gc, &dev->child, callback);
+ Â Âif (pid == -1) {
+ Â Â Â ÂLOG(ERROR, "unable to fork");
+ Â Â Â Âgoto out;
+ Â Â}
+
+ Â Âif (!pid) {
+ Â Â Â Â/* child */
+ Â Â Â Âfunc(dev);
+ Â Â Â Â/* notreached */
+ Â Â Â Âabort();
+ Â Â}
+
+ Â Âreturn;
+
+out:
+ Â Âdev->callback(dev->rds->egc, dev, ERROR_FAIL);
+}
+
+static void chekpoint_async_call_done(libxl__egc *egc,
+ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__ev_child *child,
+ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âpid_t pid, int status)
+{
+ Â Âlibxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
+ Â Âlibxl__remus_drbd_disk *rdd = dev->data;
+ Â ÂSTATE_AO_GC(dev->rds->ao);
+
+ Â Âif (WIFEXITED(status)) {
+ Â Â Â Ârdd->ackwait = WEXITSTATUS(status);
+ Â Â Â Âdev->callback(egc, dev, 0);
+ Â Â} else {
+ Â Â Â Âdev->callback(egc, dev, ERROR_FAIL);
+ Â Â}
+}
+
+static void drbd_postsuspend_async(libxl__remus_device *dev)
+{
+ Â Âlibxl__remus_drbd_disk *rdd = dev->data;
+ Â Âint ackwait = rdd->ackwait;
+
+ Â Âif (!ackwait) {
+ Â Â Â Âif (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
+ Â Â Â Â Â Âackwait = 1;
+ Â Â}
+
+ Â Â_exit(ackwait);
+}
+
+static void drbd_postsuspend(libxl__remus_device *dev)
+{
+ Â Âdrbd_async_call(dev, drbd_postsuspend_async, chekpoint_async_call_done);
+}
+
+static void drbd_preresume_async(libxl__remus_device *dev)
+{
+ Â Âlibxl__remus_drbd_disk *rdd = dev->data;
+ Â Âint ackwait = rdd->ackwait;
+
+ Â Âif (ackwait) {
+ Â Â Â Âioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
+ Â Â Â Âackwait = 0;
+ Â Â}
+
+ Â Â_exit(ackwait);
+}
+
+static void drbd_preresume(libxl__remus_device *dev)
+{
+ Â Âdrbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done);
+}
+


Please get rid of the async execution just to execute a sys call. Not to mention
a fork & exec per sys call, per checkpoint would just add more overhead than what
can be gleaned through async execution.

But the setup and teardown can use the async execution drbd_async_call as they involve
invoking the scripts.

Apart from that, the rest of the code looks fine structurally.


shriram
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.