[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH COLO v5 26/29] COLO nic: implement COLO nic subkind
implement COLO nic subkind. Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- tools/hotplug/Linux/Makefile | 1 + tools/hotplug/Linux/colo-proxy-setup | 128 ++++++++++++++ tools/libxl/Makefile | 1 + tools/libxl/libxl_colo_nic.c | 313 +++++++++++++++++++++++++++++++++++ tools/libxl/libxl_internal.h | 5 + tools/libxl/libxl_types.idl | 2 + 6 files changed, 450 insertions(+) create mode 100755 tools/hotplug/Linux/colo-proxy-setup create mode 100644 tools/libxl/libxl_colo_nic.c diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile index d94a9cb..1c28bea 100644 --- a/tools/hotplug/Linux/Makefile +++ b/tools/hotplug/Linux/Makefile @@ -25,6 +25,7 @@ XEN_SCRIPTS += vscsi XEN_SCRIPTS += block-iscsi XEN_SCRIPTS += block-drbd-probe XEN_SCRIPTS += $(XEN_SCRIPTS-y) +XEN_SCRIPTS += colo-proxy-setup SUBDIRS-$(CONFIG_SYSTEMD) += systemd diff --git a/tools/hotplug/Linux/colo-proxy-setup b/tools/hotplug/Linux/colo-proxy-setup new file mode 100755 index 0000000..850f672 --- /dev/null +++ b/tools/hotplug/Linux/colo-proxy-setup @@ -0,0 +1,128 @@ +#! /bin/bash + +dir=$(dirname "$0") +. "$dir/xen-hotplug-common.sh" +. "$dir/hotplugpath.sh" +. "$dir/xen-network-ft.sh" + +findCommand "$@" + +if [ "$command" != "setup" -a "$command" != "teardown" ] +then + echo "Invalid command: $command" + log err "Invalid command: $command" + exit 1 +fi + +evalVariables "$@" + +: ${vifname:?} +: ${forwarddev:?} +: ${mode:?} +: ${forwardbr:?} +: ${index:?} +: ${bridge:?} + +if [ "$mode" != "primary" -a "$mode" != "secondary" ] +then + echo "Invalid mode: $mode" + log err "Invalid mode: $mode" + exit 1 +fi + +if [ $index -lt 0 ] || [ $index -gt 100 ]; then + echo "index overflow" + exit 1 +fi + +function setup_primary() +{ + do_without_error tc qdisc add dev $vifname root handle 1: prio + do_without_error tc filter add dev $vifname parent 1: protocol ip prio 10 \ + u32 match u32 0 0 flowid 1:2 action mirred egress mirror dev $forwarddev + do_without_error tc filter add dev $vifname parent 1: protocol arp prio 11 \ + u32 match u32 0 0 flowid 1:2 action mirred egress mirror dev $forwarddev + do_without_error tc filter add dev $vifname parent 1: protocol ipv6 prio \ + 12 u32 match u32 0 0 flowid 1:2 action mirred egress mirror \ + dev $forwarddev + + do_without_error modprobe nf_conntrack_ipv4 + do_without_error modprobe xt_PMYCOLO sec_dev=$forwarddev + + do_without_error /usr/local/sbin/iptables -t mangle -I PREROUTING -m physdev --physdev-in \ + $vifname -j PMYCOLO --index $index + do_without_error /usr/local/sbin/ip6tables -t mangle -I PREROUTING -m physdev --physdev-in \ + $vifname -j PMYCOLO --index $index + do_without_error /usr/local/sbin/arptables -I INPUT -i $forwarddev -j MARK --set-mark $index +} + +function teardown_primary() +{ + do_without_error tc filter del dev $vifname parent 1: protocol ip prio 10 u32 match u32 \ + 0 0 flowid 1:2 action mirred egress mirror dev $forwarddev + do_without_error tc filter del dev $vifname parent 1: protocol arp prio 11 u32 match u32 \ + 0 0 flowid 1:2 action mirred egress mirror dev $forwarddev + do_without_error tc filter del dev $vifname parent 1: protocol ipv6 prio 12 u32 match u32 \ + 0 0 flowid 1:2 action mirred egress mirror dev $forwarddev + do_without_error tc qdisc del dev $vifname root handle 1: prio + + do_without_error /usr/local/sbin/iptables -t mangle -F + do_without_error /usr/local/sbin/ip6tables -t mangle -F + do_without_error /usr/local/sbin/arptables -F + do_without_error rmmod xt_PMYCOLO +} + +function setup_secondary() +{ + do_without_error brctl delif $bridge $vifname + do_without_error brctl addif $forwardbr $vifname + do_without_error brctl addif $forwardbr $forwarddev + do_without_error modprobe xt_SECCOLO + + do_without_error /usr/local/sbin/iptables -t mangle -I PREROUTING -m physdev --physdev-in \ + $vifname -j SECCOLO --index $index + do_without_error /usr/local/sbin/ip6tables -t mangle -I PREROUTING -m physdev --physdev-in \ + $vifname -j SECCOLO --index $index +} + +function teardown_secondary() +{ + do_without_error brctl delif $forwardbr $forwarddev + do_without_error brctl delif $forwardbr $vifname + do_without_error brctl addif $bridge $vifname + + do_without_error /usr/local/sbin/iptables -t mangle -F + do_without_error /usr/local/sbin/ip6tables -t mangle -F + do_without_error rmmod xt_SECCOLO +} + +case "$command" in + setup) + if [ "$mode" = "primary" ] + then + setup_primary + else + setup_secondary + fi + + success + ;; + teardown) + if [ "$mode" = "primary" ] + then + teardown_primary + else + teardown_secondary + fi + ;; +esac + +if [ "$mode" = "primary" ] +then + log debug "Successful colo-proxy-setup $command for $vifname." \ + " vifname: $vifname, index: $index, forwarddev: $forwarddev." +else + log debug "Successful colo-proxy-setup $command for $vifname." \ + " vifname: $vifname, index: $index, forwarddev: $forwarddev,"\ + " forwardbr: $forwardbr." +fi diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index c74ba79..84d8278 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -60,6 +60,7 @@ LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o LIBXL_OBJS-y += libxl_colo_qdisk.o LIBXL_OBJS-y += libxl_colo_proxy.o +LIBXL_OBJS-y += libxl_colo_nic.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o diff --git a/tools/libxl/libxl_colo_nic.c b/tools/libxl/libxl_colo_nic.c new file mode 100644 index 0000000..3d2e493 --- /dev/null +++ b/tools/libxl/libxl_colo_nic.c @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2014 FUJITSU LIMITED + * Author: Wen Congyang <wency@xxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +typedef struct libxl__colo_device_nic { + int devid; + const char *vif; +} libxl__colo_device_nic; + +enum { + primary, + secondary, +}; + + +/* ========== init() and cleanup() ========== */ +int init_subkind_colo_nic(libxl__checkpoint_devices_state *cds) +{ + return 0; +} + +void cleanup_subkind_colo_nic(libxl__checkpoint_devices_state *cds) +{ +} + +/* ========== helper functions ========== */ +static void colo_save_setup_script_cb(libxl__egc *egc, + libxl__async_exec_state *aes, + int status); +static void colo_save_teardown_script_cb(libxl__egc *egc, + libxl__async_exec_state *aes, + int status); + +/* + * If the device has a vifname, then use that instead of + * the vifX.Y format. + * it must ONLY be used for remus because if driver domains + * were in use it would constitute a security vulnerability. + */ +static const char *get_vifname(libxl__checkpoint_device *dev, + const libxl_device_nic *nic) +{ + const char *vifname = NULL; + const char *path; + int rc; + + STATE_AO_GC(dev->cds->ao); + + /* Convenience aliases */ + const uint32_t domid = dev->cds->domid; + + path = GCSPRINTF("%s/backend/vif/%d/%d/vifname", + libxl__xs_get_dompath(gc, 0), domid, nic->devid); + rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname); + if (!rc && !vifname) { + vifname = libxl__device_nic_devname(gc, domid, + nic->devid, + nic->nictype); + } + + return vifname; +} + +/* + * the script needs the following env & args + * $vifname + * $forwarddev + * $mode(primary/secondary) + * $forwardbr + * $index + * $bridge + * setup/teardown as command line arg. + */ +static void setup_async_exec(libxl__checkpoint_device *dev, char *op, int side, + char *colo_proxy_script) +{ + int arraysize, nr = 0; + char **env = NULL, **args = NULL; + libxl__colo_device_nic *colo_nic = dev->concrete_data; + libxl__checkpoint_devices_state *cds = dev->cds; + libxl__async_exec_state *aes = &dev->aodev.aes; + const libxl_device_nic *nic = dev->backend_dev; + libxl__colo_save_state *css = CONTAINER_OF(dev->cds, *css, cds); + + STATE_AO_GC(cds->ao); + + /* Convenience aliases */ + const char *const vif = colo_nic->vif; + + arraysize = 13; + GCNEW_ARRAY(env, arraysize); + env[nr++] = "vifname"; + env[nr++] = libxl__strdup(gc, vif); + env[nr++] = "forwarddev"; + env[nr++] = libxl__strdup(gc, nic->forwarddev); + env[nr++] = "mode"; + if (side == primary) + env[nr++] = "primary"; + else + env[nr++] = "secondary"; + env[nr++] = "forwardbr"; + env[nr++] = libxl__strdup(gc, nic->forwardbr); + env[nr++] = "index"; + env[nr++] = GCSPRINTF("%d", css->cps.index); + env[nr++] = "bridge"; + env[nr++] = libxl__strdup(gc, nic->bridge); + env[nr++] = NULL; + assert(nr == arraysize); + + arraysize = 3; nr = 0; + GCNEW_ARRAY(args, arraysize); + args[nr++] = colo_proxy_script; + args[nr++] = op; + args[nr++] = NULL; + assert(nr == arraysize); + + aes->ao = dev->cds->ao; + aes->what = GCSPRINTF("%s %s", args[0], args[1]); + aes->env = env; + aes->args = args; + aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000; + aes->stdfds[0] = -1; + aes->stdfds[1] = -1; + aes->stdfds[2] = -1; + + if (!strcmp(op, "teardown")) + aes->callback = colo_save_teardown_script_cb; + else + aes->callback = colo_save_setup_script_cb; +} + +/* ========== setup() and teardown() ========== */ +static void colo_nic_setup(libxl__egc *egc, libxl__checkpoint_device *dev, + int side, char *colo_proxy_script) +{ + int rc; + libxl__colo_device_nic *colo_nic; + const libxl_device_nic *nic = dev->backend_dev; + + STATE_AO_GC(dev->cds->ao); + + /* + * thers's no subkind of nic devices, so nic ops is always matched + * with nic devices, we begin to setup the nic device + */ + dev->matched = 1; + + if (!nic->forwarddev || !nic->forwardbr) { + rc = ERROR_FAIL; + goto out; + } + + GCNEW(colo_nic); + dev->concrete_data = colo_nic; + colo_nic->devid = nic->devid; + colo_nic->vif = get_vifname(dev, nic); + if (!colo_nic->vif) { + rc = ERROR_FAIL; + goto out; + } + + setup_async_exec(dev, "setup", side, colo_proxy_script); + rc = libxl__async_exec_start(gc, &dev->aodev.aes); + if (rc) + goto out; + + return; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +static void colo_save_setup_script_cb(libxl__egc *egc, + libxl__async_exec_state *aes, + int status) +{ + libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes); + libxl__checkpoint_device *dev = CONTAINER_OF(aodev, *dev, aodev); + libxl__colo_device_nic *colo_nic = dev->concrete_data; + libxl__checkpoint_devices_state *cds = dev->cds; + const char *out_path_base, *hotplug_error = NULL; + int rc; + + STATE_AO_GC(cds->ao); + + /* Convenience aliases */ + const uint32_t domid = cds->domid; + const int devid = colo_nic->devid; + const char *const vif = colo_nic->vif; + + out_path_base = GCSPRINTF("%s/colo_proxy/%d", + libxl__xs_libxl_path(gc, domid), devid); + + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/hotplug-error", out_path_base), + &hotplug_error); + if (rc) + goto out; + + if (hotplug_error) { + LOG(ERROR, "colo_proxy script %s setup failed for vif %s: %s", + aes->args[0], vif, hotplug_error); + rc = ERROR_FAIL; + goto out; + } + + if (status) { + rc = ERROR_FAIL; + goto out; + } + + rc = 0; + +out: + aodev->rc = rc; + aodev->callback(egc, aodev); +} + +static void colo_nic_teardown(libxl__egc *egc, libxl__checkpoint_device *dev, + int side, char *colo_proxy_script) +{ + int rc; + STATE_AO_GC(dev->cds->ao); + + setup_async_exec(dev, "teardown", side, colo_proxy_script); + + rc = libxl__async_exec_start(gc, &dev->aodev.aes); + if (rc) + goto out; + + return; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +static void colo_save_teardown_script_cb(libxl__egc *egc, + libxl__async_exec_state *aes, + int status) +{ + int rc; + libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes); + + if (status) + rc = ERROR_FAIL; + else + rc = 0; + + aodev->rc = rc; + aodev->callback(egc, aodev); +} + +/* ======== primary ======== */ +static void colo_nic_save_setup(libxl__egc *egc, libxl__checkpoint_device *dev) +{ + libxl__colo_save_state *css = CONTAINER_OF(dev->cds, *css, cds); + + colo_nic_setup(egc, dev, primary, css->colo_proxy_script); +} + +static void colo_nic_save_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_save_state *css = CONTAINER_OF(dev->cds, *css, cds); + + colo_nic_teardown(egc, dev, primary, css->colo_proxy_script); +} + +const libxl__checkpoint_device_instance_ops colo_save_device_nic = { + .kind = LIBXL__DEVICE_KIND_VIF, + .setup = colo_nic_save_setup, + .teardown = colo_nic_save_teardown, +}; + +/* ======== secondary ======== */ +static void colo_nic_restore_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_restore_state *crs = CONTAINER_OF(dev->cds, *crs, cds); + + colo_nic_setup(egc, dev, secondary, crs->colo_proxy_script); +} + +static void colo_nic_restore_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_restore_state *crs = CONTAINER_OF(dev->cds, *crs, cds); + + colo_nic_teardown(egc, dev, secondary, crs->colo_proxy_script); +} + +const libxl__checkpoint_device_instance_ops colo_restore_device_nic = { + .kind = LIBXL__DEVICE_KIND_VIF, + .setup = colo_nic_restore_setup, + .teardown = colo_nic_restore_teardown, +}; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index b091958..63101b4 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2723,6 +2723,8 @@ void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); int init_subkind_qdisk(libxl__checkpoint_devices_state *cds); void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds); int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid); +int init_subkind_colo_nic(libxl__checkpoint_devices_state *cds); +void cleanup_subkind_colo_nic(libxl__checkpoint_devices_state *cds); typedef void libxl__checkpoint_callback(libxl__egc *, libxl__checkpoint_devices_state *, @@ -2847,6 +2849,7 @@ struct libxl__colo_save_state { libxl__checkpoint_devices_state cds; int send_fd; int recv_fd; + char *colo_proxy_script; /* private */ libxl__datacopier_state dc; @@ -3197,6 +3200,7 @@ struct libxl__colo_restore_state { int pae; int superpages; libxl__colo_callback *callback; + char *colo_proxy_script; /* private, colo restore checkpoint state */ libxl__domain_create_cb *saved_cb; @@ -3219,6 +3223,7 @@ struct libxl__domain_create_state { /* private to domain_create */ int guest_domid; int checkpointed_stream; + const char *colo_proxy_script; libxl__domain_build_state build_state; libxl__colo_restore_state crs; libxl__bootloader_state bl; diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 87c52b9..4013cd3 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -530,6 +530,8 @@ libxl_device_nic = Struct("device_nic", [ ("rate_bytes_per_interval", uint64), ("rate_interval_usecs", uint32), ("gatewaydev", string), + ("forwarddev", string), + ("forwardbr", string) ]) libxl_device_pci = Struct("device_pci", [ -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |