[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v11 5/6] remus drbd: Implement remus drbd replicated disk




On Jun 13, 2014 7:39 AM, "Yang Hongyang" <yanghy@xxxxxxxxxxxxxx> wrote:
>
> Implement remus-drbd-replicated-checkpointing-disk based on
> generic remus devices framework.
>
> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
> Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> ---
> Âtools/hotplug/Linux/Makefile     |  1 +
> Âtools/hotplug/Linux/block-drbd-probe | Â84 ++++++++++++
> Âtools/libxl/Makefile         |  2 +-
> Âtools/libxl/libxl_internal.h     |  1 +
> Âtools/libxl/libxl_remus_device.c   | Â23 +++-
> Âtools/libxl/libxl_remus_disk_drbd.c Â| 249 +++++++++++++++++++++++++++++++++++
> Â6 files changed, 353 insertions(+), 7 deletions(-)
> Âcreate mode 100755 tools/hotplug/Linux/block-drbd-probe
> Âcreate mode 100644 tools/libxl/libxl_remus_disk_drbd.c
>
> diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
> index 13e1f5f..5dd8599 100644
> --- a/tools/hotplug/Linux/Makefile
> +++ b/tools/hotplug/Linux/Makefile
> @@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
> ÂXEN_SCRIPTS += external-device-migrate
> ÂXEN_SCRIPTS += vscsi
> ÂXEN_SCRIPTS += block-iscsi
> +XEN_SCRIPTS += block-drbd-probe
> ÂXEN_SCRIPTS += $(XEN_SCRIPTS-y)
>
> ÂXEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
> diff --git a/tools/hotplug/Linux/block-drbd-probe b/tools/hotplug/Linux/block-drbd-probe
> new file mode 100755
> index 0000000..163ad04
> --- /dev/null
> +++ b/tools/hotplug/Linux/block-drbd-probe
> @@ -0,0 +1,84 @@
> +#! /bin/bash
> +#
> +# Copyright (C) 2014 FUJITSU LIMITED
> +#
> +# This library is free software; you can redistribute it and/or
> +# modify it under the terms of version 2.1 of the GNU Lesser General Public
> +# License as published by the Free Software Foundation.
> +#
> +# This library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with this library; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Â02111-1307 ÂUSA
> +#
> +# Usage:
> +# Â Â block-drbd-probe devicename
> +#
> +# Return value:
> +# Â Â 0: the device is drbd device
> +# Â Â 1: the device is not drbd device
> +# Â Â 2: unkown error
> +# Â Â 3: the drbd device does not use protocol D
> +# Â Â 4: the drbd device is not ready
> +
> +drbd_res=
> +
> +function get_res_name()
> +{
> + Â Âlocal drbd_dev=$1
> + Â Âlocal drbd_dev_list=($(drbdadm sh-dev all))
> + Â Âlocal drbd_res_list=($(drbdadm sh-resource all))
> + Â Âlocal temp_drbd_dev temp_drbd_res
> + Â Âlocal found=0
> +
> + Â Âfor temp_drbd_dev in ${drbd_dev_list[@]}; do
> + Â Â Â Âif [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
> + Â Â Â Â Â Âfound=1
> + Â Â Â Â Â Âbreak
> + Â Â Â Âfi
> + Â Âdone
> +
> + Â Âif [[ $found -eq 0 ]]; then
> + Â Â Â Âreturn 1
> + Â Âfi
> +
> + Â Âfor temp_drbd_res in ${drbd_res_list[@]}; do
> + Â Â Â Âtemp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
> + Â Â Â Âif [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
> + Â Â Â Â Â Âdrbd_res="$temp_drbd_res"
> + Â Â Â Â Â Âreturn 0
> + Â Â Â Âfi
> + Â Âdone
> +
> + Â Â# OOPS
> + Â Âreturn 2
> +}
> +
> +get_res_name $1
> +if [[ $? -ne 0 ]]; then
> + Â Âexit $?
> +fi
> +
> +# check protocol
> +drbdsetup $1 show | grep -q "protocol D;"
> +if [[ $? -ne 0 ]]; then
> + Â Âexit 3
> +fi
> +
> +# check connect status
> +state=$(drbdadm cstate "$drbd_res")
> +if [[ "$state" != "Connected" ]]; then
> + Â Âexit 4
> +fi
> +
> +# check role
> +role=$(drbdadm role "$drbd_res")
> +if [[ "$role" != "Primary/Secondary" ]]; then
> + Â Âexit 4
> +fi
> +
> +exit 0
> diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
> index 7a722a8..6f4d9b4 100644
> --- a/tools/libxl/Makefile
> +++ b/tools/libxl/Makefile
> @@ -56,7 +56,7 @@ else
> ÂLIBXL_OBJS-y += libxl_nonetbuffer.o
> Âendif
>
> -LIBXL_OBJS-y += libxl_remus_device.o
> +LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
>
> ÂLIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
> ÂLIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 47648cd..2424be0 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2580,6 +2580,7 @@ struct libxl__remus_device_state {
>
> Â Â Âlibxl_device_nic *nics;
> Â Â Âint num_nics;
> + Â Âlibxl_device_disk *disks;
> Â Â Âint num_disks;
>
> Â Â Â/* for counting devices that have been handled */
> diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c
> index c447d7d..9637b1e 100644
> --- a/tools/libxl/libxl_remus_device.c
> +++ b/tools/libxl/libxl_remus_device.c
> @@ -19,8 +19,10 @@
> Â#include "libxl_internal.h"
>
> Âextern libxl__remus_device_ops remus_device_nic;
> +extern libxl__remus_device_ops remus_device_drbd_disk;
> Âstatic libxl__remus_device_ops *dev_ops[] = {
> Â Â Â&remus_device_nic,
> + Â Â&remus_device_drbd_disk,
> Â};
>
> Âstatic void device_common_cb(libxl__egc *egc,
> @@ -197,6 +199,13 @@ static void device_teardown_cb(libxl__egc *egc,
> Â Â Â Â Ârds->nics = NULL;
> Â Â Â Â Ârds->num_nics = 0;
>
> + Â Â Â Â/* clean disk */
> + Â Â Â Âfor (i = 0; i < rds->num_disks; i++)
> + Â Â Â Â Â Âlibxl_device_disk_dispose(&rds->disks[i]);
> + Â Â Â Âfree(rds->disks);
> + Â Â Â Ârds->disks = NULL;
> + Â Â Â Ârds->num_disks = 0;
> +
> Â Â Â Â Â/* clean device ops */
> Â Â Â Â Âfor (i = 0; i < ARRAY_SIZE(dev_ops); i++) {
> Â Â Â Â Â Â Âops = dev_ops[i];
> @@ -272,15 +281,15 @@ void libxl__remus_device_setup(libxl__egc *egc, libxl__remus_state *rs)
> Â Â Ârds->num_nics = 0;
> Â Â Ârds->num_disks = 0;
>
> - Â Â/* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
> -
> Â Â Âif (rs->netbufscript) {
> Â Â Â Â Ârds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics);
> Â Â Â}
> + Â Ârds->disks = libxl_device_disk_list(CTX, rs->domid, &rds->num_disks);
>
> - Â ÂGCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
> + Â Âif (rds->num_nics == 0 && rds->num_disks == 0)
> + Â Â Â Âgoto out;
>
> - Â Â/* TBD: CALL libxl__remus_device_init to init remus devices */
> + Â ÂGCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
>
> Â Â Âif (rs->netbufscript && rds->nics) {
> Â Â Â Â Âfor (i = 0; i < rds->num_nics; i++) {
> @@ -289,8 +298,10 @@ void libxl__remus_device_setup(libxl__egc *egc, libxl__remus_state *rs)
> Â Â Â Â Â}
> Â Â Â}
>
> - Â Âif (rds->num_nics == 0 && rds->num_disks == 0)
> - Â Â Â Âgoto out;
> + Â Âfor (i = 0; i < rds->num_disks; i++) {
> + Â Â Â Âlibxl__remus_device_init(egc, rds,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â LIBXL__REMUS_DEVICE_DISK, &rds->disks[i]);
> + Â Â}
>
> Â Â Âreturn;
>
> diff --git a/tools/libxl/libxl_remus_disk_drbd.c b/tools/libxl/libxl_remus_disk_drbd.c
> new file mode 100644
> index 0000000..69fdd34
> --- /dev/null
> +++ b/tools/libxl/libxl_remus_disk_drbd.c
> @@ -0,0 +1,249 @@
> +/*
> + * Copyright (C) 2014 FUJITSU LIMITED
> + * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#include "libxl_internal.h"
> +
> +/*** drbd implementation ***/
> +const int DRBD_SEND_CHECKPOINT = 20;
> +const int DRBD_WAIT_CHECKPOINT_ACK = 30;
> +
> +typedef struct libxl__remus_drbd_disk {
> + Â Âlibxl__remus_device remus_dev;
> + Â Âint ctl_fd;
> + Â Âint ackwait;
> + Â Âconst char *path;
> +} libxl__remus_drbd_disk;
> +
> +typedef struct libxl__remus_drbd_state {
> + Â Âlibxl__ao *ao;
> + Â Âchar *drbd_probe_script;
> +} libxl__remus_drbd_state;
> +
> +static void drbd_async_call(libxl__remus_device *dev,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Âvoid func(libxl__remus_device *),
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__ev_child_callback callback)
> +{
> + Â Âint pid = -1;
> + Â ÂSTATE_AO_GC(dev->rds->ao);
> +
> + Â Â/* Fork and call */
> + Â Âpid = libxl__ev_child_fork(gc, &dev->child, callback);
> + Â Âif (pid == -1) {
> + Â Â Â ÂLOG(ERROR, "unable to fork");
> + Â Â Â Âgoto out;
> + Â Â}
> +
> + Â Âif (!pid) {
> + Â Â Â Â/* child */
> + Â Â Â Âfunc(dev);
> + Â Â Â Â/* notreached */
> + Â Â Â Âabort();
> + Â Â}
> +
> + Â Âreturn;
> +
> +out:
> + Â Âdev->callback(dev->rds->egc, dev, ERROR_FAIL);
> +}
> +
> +static void chekpoint_async_call_done(libxl__egc *egc,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__ev_child *child,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âpid_t pid, int status)
> +{
> + Â Âlibxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
> + Â Âlibxl__remus_drbd_disk *rdd = dev->data;
> + Â ÂSTATE_AO_GC(dev->rds->ao);
> +
> + Â Âif (WIFEXITED(status)) {
> + Â Â Â Ârdd->ackwait = WEXITSTATUS(status);
> + Â Â Â Âdev->callback(egc, dev, 0);
> + Â Â} else {
> + Â Â Â Âdev->callback(egc, dev, ERROR_FAIL);
> + Â Â}
> +}
> +
> +/* this op will not wait and block, so implement as sync op */
> +static void drbd_postsuspend(libxl__remus_device *dev)
> +{
> + Â Âlibxl__remus_drbd_disk *rdd = dev->data;
> +
> + Â Âif (!rdd->ackwait) {
> + Â Â Â Âif (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
> + Â Â Â Â Â Ârdd->ackwait = 1;
> + Â Â}
> +
> + Â Âdev->callback(dev->rds->egc, dev, 0);
> +}
> +
> +static void drbd_preresume_async(libxl__remus_device *dev)
> +{
> + Â Âlibxl__remus_drbd_disk *rdd = dev->data;
> + Â Âint ackwait = rdd->ackwait;
> +
> + Â Âif (ackwait) {
> + Â Â Â Âioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
> + Â Â Â Âackwait = 0;
> + Â Â}
> +
> + Â Â_exit(ackwait);
> +}
> +
> +static void drbd_preresume(libxl__remus_device *dev)
> +{
> + Â Âdrbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done);
> +}
> +
> +static int drbd_init(libxl__remus_device_ops *self,
> + Â Â Â Â Â Â Â Â Â Â libxl__remus_state *rs)
> +{
> + Â Âlibxl__remus_drbd_state *drbd_state;
> +
> + Â ÂSTATE_AO_GC(rs->ao);
> +
> + Â ÂGCNEW(drbd_state);
> + Â Âself->data = ""> > + Â Âdrbd_state->ao = ao;
> + Â Âdrbd_state->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe",
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__xen_script_dir_path());
> +
> +
> + Â Âreturn 0;
> +}
> +
> +static void drbd_destroy(libxl__remus_device_ops *self)
> +{
> + Â Âreturn;
> +}
> +
> +static void match_async_exec_cb(libxl__egc *egc,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__async_exec_state *aes,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âint status)
> +{
> + Â Âlibxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes);
> +
> + Â Âif (status) {
> + Â Â Â Âdev->callback(egc, dev, ERROR_NOT_MATCH);
> + Â Â} else {
> + Â Â Â Âdev->callback(egc, dev, 0);
> + Â Â}
> +}
> +
> +static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev)
> +{
> + Â Âint arraysize, nr = 0;
> + Â Âconst libxl_device_disk *disk = dev->backend_dev;
> + Â Âlibxl__remus_drbd_state *drbd_state = dev->ops->data;
> + Â Âlibxl__async_exec_state *aes = &dev->aes;
> + Â ÂSTATE_AO_GC(drbd_state->ao);
> +
> + Â Â/* setup env & args */
> + Â Âarraysize = 1;
> + Â ÂGCNEW_ARRAY(aes->env, arraysize);
> + Â Âaes->env[nr++] = NULL;
> + Â Âassert(nr <= arraysize);
> +
> + Â Âarraysize = 3;
> + Â Ânr = 0;
> + Â ÂGCNEW_ARRAY(aes->args, arraysize);
> + Â Âaes->args[nr++] = drbd_state->drbd_probe_script;
> + Â Âaes->args[nr++] = disk->pdev_path;
> + Â Âaes->args[nr++] = NULL;
> + Â Âassert(nr <= arraysize);
> +
> + Â Âaes->ao = drbd_state->ao;
> + Â Âaes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]);
> + Â Âaes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
> + Â Âaes->callback = match_async_exec_cb;
> + Â Âaes->stdfds[0] = -1;
> + Â Âaes->stdfds[1] = -1;
> + Â Âaes->stdfds[2] = -1;
> +
> + Â Âif (libxl__async_exec_start(gc, aes))
> + Â Â Â Âgoto out;
> +
> + Â Âreturn;
> +
> +out:
> + Â Âdev->callback(egc, dev, ERROR_FAIL);
> +}
> +
> +static void match_async_call_done(libxl__egc *egc,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âlibxl__ev_child *child,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âpid_t pid, int status)
> +{
> + Â Âlibxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
> + Â ÂSTATE_AO_GC(dev->rds->ao);
> +
> + Â Âif (WIFEXITED(status)) {
> + Â Â Â Âif (-WEXITSTATUS(status) == ERROR_NOT_MATCH) {
> + Â Â Â Â Â Âdev->callback(egc, dev, ERROR_NOT_MATCH);
> + Â Â Â Â} else {
> + Â Â Â Â Â Âmatch_async_exec(egc, dev);
> + Â Â Â Â}
> + Â Â} else {
> + Â Â Â Âdev->callback(egc, dev, ERROR_FAIL);
> + Â Â}
> +}
> +
> +static void drbd_match_async(libxl__remus_device *dev)
> +{
> + Â Âif (dev->kind != LIBXL__REMUS_DEVICE_DISK)
> + Â Â Â Â_exit(-ERROR_NOT_MATCH);
> +
> + Â Â_exit(0);
> +}
> +
> +static void drbd_match(libxl__remus_device_ops *self,
> + Â Â Â Â Â Â Â Â Â Â Âlibxl__remus_device *dev)
> +{
> + Â Âdrbd_async_call(dev, drbd_match_async, match_async_call_done);
> +}
> +
> +static void drbd_setup(libxl__remus_device *dev)
> +{
> + Â Âlibxl__remus_drbd_disk *drbd_disk;
> + Â Âconst libxl_device_disk *disk = dev->backend_dev;
> + Â ÂSTATE_AO_GC(dev->rds->ao);
> +
> + Â ÂGCNEW(drbd_disk);
> + Â Âdev->data = ""> > + Â Âdrbd_disk->path = disk->pdev_path;
> + Â Âdrbd_disk->ackwait = 0;
> + Â Âdrbd_disk->ctl_fd = open(drbd_disk->path, O_RDONLY);
> + Â Âif (drbd_disk->ctl_fd < 0)
> + Â Â Â Âdev->callback(dev->rds->egc, dev, ERROR_FAIL);
> + Â Âelse
> + Â Â Â Âdev->callback(dev->rds->egc, dev, 0);
> +}
> +
> +static void drbd_teardown(libxl__remus_device *dev)
> +{
> + Â Âlibxl__remus_drbd_disk *drbd_disk = dev->data;
> +
> + Â Âclose(drbd_disk->ctl_fd);
> + Â Âdev->callback(dev->rds->egc, dev, 0);
> +}
> +
> +libxl__remus_device_ops remus_device_drbd_disk = {
> + Â Â.init = drbd_init,
> + Â Â.destroy = drbd_destroy,
> + Â Â.postsuspend = drbd_postsuspend,
> + Â Â.preresume = drbd_preresume,
> + Â Â.match = drbd_match,
> + Â Â.setup = drbd_setup,
> + Â Â.teardown = drbd_teardown,
> +};
> --
> 1.9.1
>

I am fine with this patch. My only feedback is to have an option in xl remus command to explicitly disable disk buffering, similar to the netbuffer disable switch.

Acked-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.