[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 06/13 V6] remus: implement the API to setup network buffering



On Tue, Jan 21, 2014 at 1:05 AM, Lai Jiangshan <laijs@xxxxxxxxxxxxxx> wrote:
From: Shriram Rajagopalan <rshriram@xxxxxxxxx>

The following steps are taken during setup:
 a) call the hotplug script for each vif to setup its network buffer

 b) establish a dedicated remus context containing libnl related
    state (netlink sockets, qdisc caches, etc.,)

 c) Obtain handles to plug qdiscs installed on the IFB devices
    chosen by the hotplug scripts.

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 docs/misc/xenstore-paths.markdown |   4 +
 tools/libxl/Makefile              |   2 +
 tools/libxl/libxl_dom.c           |   7 +-
 tools/libxl/libxl_internal.h      |  11 +
 tools/libxl/libxl_netbuffer.c     | 419 ++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_nonetbuffer.c   |   6 +
 tools/libxl/libxl_remus.c         |  35 ++++
 7 files changed, 479 insertions(+), 5 deletions(-)
 create mode 100644 tools/libxl/libxl_remus.c

diff --git a/docs/misc/xenstore-paths.markdown b/docs/misc/xenstore-paths.markdown
index 70ab7f4..7a0d2c9 100644
--- a/docs/misc/xenstore-paths.markdown
+++ b/docs/misc/xenstore-paths.markdown
@@ -385,6 +385,10 @@ The guest's virtual time offset from UTC in seconds.

 The device model version for a domain.

+#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL]
+
+IFB device used by Remus to buffer network output from the associated vif.
+
 [BLKIF]: http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html
 [FBIF]: http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html
 [HVMPARAMS]: http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 84a467c..218f55e 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -52,6 +52,8 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif

+LIBXL_OBJS-y += libxl_remus.o
+
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 8d63f90..e3e9f6f 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -753,9 +753,6 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t *buf,

 /*==================== Domain suspend (save) ====================*/

-static void domain_suspend_done(libxl__egc *egc,
-                        libxl__domain_suspend_state *dss, int rc);
-
 /*----- complicated callback, called by xc_domain_save -----*/

 /*
@@ -1508,8 +1505,8 @@ static void save_device_model_datacopier_done(libxl__egc *egc,
     dss->save_dm_callback(egc, dss, our_rc);
 }

-static void domain_suspend_done(libxl__egc *egc,
-                        libxl__domain_suspend_state *dss, int rc)
+void domain_suspend_done(libxl__egc *egc,
+                         libxl__domain_suspend_state *dss, int rc)
 {
     STATE_AO_GC(dss->ao);

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2f64382..0430307 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2313,6 +2313,17 @@ typedef struct libxl__remus_state {

 _hidden int libxl__netbuffer_enabled(libxl__gc *gc);

+_hidden void domain_suspend_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dss,
+                                 int rc);
+
+_hidden void libxl__remus_setup_done(libxl__egc *egc,
+                                     libxl__domain_suspend_state *dss,
+                                     int rc);
+
+_hidden void libxl__remus_netbuf_setup(libxl__egc *egc,
+                                       libxl__domain_suspend_state *dss);
+
 struct libxl__domain_suspend_state {
     /* set by caller of libxl__domain_suspend */
     libxl__ao *ao;
diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c
index 8e23d75..0be876c 100644
--- a/tools/libxl/libxl_netbuffer.c
+++ b/tools/libxl/libxl_netbuffer.c
@@ -17,11 +17,430 @@

 #include "libxl_internal.h"

+#include <netlink/cache.h>
+#include <netlink/socket.h>
+#include <netlink/attr.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/qdisc.h>
+#include <netlink/route/qdisc/plug.h>
+
+typedef struct libxl__remus_netbuf_state {
+    struct rtnl_qdisc **netbuf_qdisc_list;
+    struct nl_sock *nlsock;
+    struct nl_cache *qdisc_cache;
+    const char **vif_list;
+    const char **ifb_list;
+    uint32_t num_netbufs;
+    uint32_t unused;
+} libxl__remus_netbuf_state;
+
 int libxl__netbuffer_enabled(libxl__gc *gc)
 {
     return 1;
 }

+/* If the device has a vifname, then use that instead of
+ * the vifX.Y format.
+ */
+static const char *get_vifname(libxl__gc *gc, uint32_t domid,
+                               libxl_device_nic *nic)
+{
+    const char *vifname = NULL;
+    const char *path;
+    int rc;
+
+    path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname",
+                          libxl__xs_get_dompath(gc, 0), domid, nic->devid);
+    rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
+    if (rc < 0) {
+        /* use the default name */
+        vifname = libxl__device_nic_devname(gc, domid,
+                                            nic->devid,
+                                            nic->nictype);
+    }
+
+    return vifname;

IanJ's feedback last time was that the error code rc needs to be checked.
If its a failure, then return NULL to the caller. If its a ENOENT, then use the
default name.

+}
+
+static const char **get_guest_vif_list(libxl__gc *gc, uint32_t domid,
+                                       int *num_vifs)
+{
+    libxl_device_nic *nics = NULL;
+    int nb, i = 0;
+    const char **vif_list = NULL;
+
+    *num_vifs = 0;
+    nics = libxl_device_nic_list(CTX, domid, &nb);
+    if (!nics)
+        return NULL;
+
+    /* Ensure that none of the vifs are backed by driver domains */
+    for (i = 0; i < nb; i++) {
+        if (nics[i].backend_domid != LIBXL_TOOLSTACK_DOMID) {
+            LOG(ERROR, "vif %s has driver domain (%u) as its backend. "
+                "Network buffering is not supported with driver domains",
+                get_vifname(gc, domid, &nics[i]), nics[i].backend_domid);

And if the previous feedback were to be incorporated, then get_vifname's return
value should be assigned to a variable and checked before printing or using it for
other purposes.
 
+            *num_vifs = -1;
+            goto out;
+        }
+    }
+
+    GCNEW_ARRAY(vif_list, nb);
+    for (i = 0; i < nb; ++i) {
+        vif_list[i] = get_vifname(gc, domid, &nics[i]);
+        if (!vif_list[i]) {
+            vif_list = NULL;
+            goto out;
+        }
+    }
+    *num_vifs = nb;
+
+ out:
+    for (i = 0; i < nb; i++)
+        libxl_device_nic_dispose(&nics[i]);
+    free(nics);
+    return vif_list;
+}
+
+static void free_qdiscs(libxl__remus_netbuf_state *netbuf_state)
+{
+    int i;
+    struct rtnl_qdisc *qdisc = NULL;
+
+    /* free qdiscs */
+    for (i = 0; i < netbuf_state->num_netbufs; i++) {
+        qdisc = netbuf_state->netbuf_qdisc_list[i];
+        if (!qdisc)
+            break;
+
+        nl_object_put((struct nl_object *)qdisc);
+    }
+
+    /* free qdisc cache */
+    nl_cache_clear(netbuf_state->qdisc_cache);
+    nl_cache_free(netbuf_state->qdisc_cache);
+
+    /* close nlsock */
+    nl_close(netbuf_state->nlsock);
+
+    /* free nlsock */
+    nl_socket_free(netbuf_state->nlsock);
+}
+

This code (free_qdiscs) is new. Have you tested it?
While the control flow looks pretty sane, libnl has been evolving
a bit ever since the 3.* series.

If init_qdisc fails, it calls free_qdisc(). If any other setup stage after 
network buffering fails, it would invoke the teardown code, which also 
calls free_qdisc(). This may end up in a segfault.

I suggest adding a simple check to see if nlsock/qdisc_cache are NULL
before attempting to execute the rest of the function.  And after you free the 
qdisc_cache & nlsock, set them to NULL. 
 
+static int init_qdiscs(libxl__gc *gc,
+                       libxl__remus_state *remus_state)
+{
+    int i, ret, ifindex;
+    struct rtnl_link *ifb = NULL;
+    struct rtnl_qdisc *qdisc = NULL;
+
+    /* Convenience aliases */
+    libxl__remus_netbuf_state * const netbuf_state = remus_state->netbuf_state;
+    const int num_netbufs = netbuf_state->num_netbufs;
+    const char ** const ifb_list = netbuf_state->ifb_list;
+
+    /* Now that we have brought up IFB devices with plug qdisc for
+     * each vif, lets get a netlink handle on the plug qdisc for use
+     * during checkpointing.
+     */
+    netbuf_state->nlsock = nl_socket_alloc();
+    if (!netbuf_state->nlsock) {
+        LOG(ERROR, "cannot allocate nl socket");
+        goto out;
+    }
+
+    ret = nl_connect(netbuf_state->nlsock, NETLINK_ROUTE);
+    if (ret) {
+        LOG(ERROR, "failed to open netlink socket: %s",
+            nl_geterror(ret));
+        goto out;
+    }
+
+    /* get list of all qdiscs installed on network devs. */
+    ret = rtnl_qdisc_alloc_cache(netbuf_state->nlsock,
+                                 &netbuf_state->qdisc_cache);
+    if (ret) {
+        LOG(ERROR, "failed to allocate qdisc cache: %s",
+            nl_geterror(ret));
+        goto out;
+    }
+
+    /* list of handles to plug qdiscs */
+    GCNEW_ARRAY(netbuf_state->netbuf_qdisc_list, num_netbufs);
+
+    for (i = 0; i < num_netbufs; ++i) {
+
+        /* get a handle to the IFB interface */
+        ifb = NULL;
+        ret = rtnl_link_get_kernel(netbuf_state->nlsock, 0,
+                                   ifb_list[i], &ifb);
+        if (ret) {
+            LOG(ERROR, "cannot obtain handle for %s: %s", ifb_list[i],
+                nl_geterror(ret));
+            goto out;
+        }
+
+        ifindex = rtnl_link_get_ifindex(ifb);
+        if (!ifindex) {
+            LOG(ERROR, "interface %s has no index", ifb_list[i]);
+            goto out;
+        }
+
+        /* Get a reference to the root qdisc installed on the IFB, by
+         * querying the qdisc list we obtained earlier. The netbufscript
+         * sets up the plug qdisc as the root qdisc, so we don't have to
+         * search the entire qdisc tree on the IFB dev.
+
+         * There is no need to explicitly free this qdisc as its just a
+         * reference from the qdisc cache we allocated earlier.
+         */
+        qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache, ifindex,
+                                         TC_H_ROOT);
+
+        if (qdisc) {
+            const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
+            /* Sanity check: Ensure that the root qdisc is a plug qdisc. */
+            if (!tc_kind || strcmp(tc_kind, "plug")) {
+                nl_object_put((struct nl_object *)qdisc);
+                LOG(ERROR, "plug qdisc is not installed on %s", ifb_list[i]);
+                goto out;
+            }
+            netbuf_state->netbuf_qdisc_list[i] = qdisc;
+        } else {
+            LOG(ERROR, "Cannot get qdisc handle from ifb %s", ifb_list[i]);
+            goto out;
+        }
+        rtnl_link_put(ifb);
+    }
+
+    return 0;
+
+ out:
+    if (ifb)
+        rtnl_link_put(ifb);
+    free_qdiscs(netbuf_state);
+    return ERROR_FAIL;
+}
+
+static void netbuf_setup_timeout_cb(libxl__egc *egc,
+                                    libxl__ev_time *ev,
+                                    const struct timeval *requested_abs)
+{
+    libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state, timeout);
+
+    /* Convenience aliases */
+    const int devid = remus_state->dev_id;
+    libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+    const char *const vif = netbuf_state->vif_list[devid];
+
+    STATE_AO_GC(remus_state->dss->ao);
+
+    libxl__ev_time_deregister(gc, &remus_state->timeout);
+    assert(libxl__ev_child_inuse(&remus_state->child));
+
+    LOG(DEBUG, "killing hotplug script %s (on vif %s) because of timeout",
+        remus_state->netbufscript, vif);
+
+    if (kill(remus_state->child.pid, SIGKILL)) {
+        LOGEV(ERROR, errno, "unable to kill hotplug script %s [%ld]",
+              remus_state->netbufscript,
+              (unsigned long)remus_state->child.pid);
+    }
+
+    return;
+}
+
+/* the script needs the following env & args
+ * $vifname
+ * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
+ * $IFB (for teardown)
+ * setup/teardown as command line arg.
+ * In return, the script writes the name of IFB device (during setup) to be
+ * used for output buffering into XENBUS_PATH/ifb
+ */
+static int exec_netbuf_script(libxl__gc *gc, libxl__remus_state *remus_state,
+                              char *op, libxl__ev_child_callback *death)
+{
+    int arraysize = 7, nr = 0;
+    char **env = NULL, **args = NULL;
+    pid_t pid;
+
+    /* Convenience aliases */
+    libxl__ev_child *const child = &remus_state->child;
+    libxl__ev_time *const timeout = &remus_state->timeout;
+    char *const script = libxl__strdup(gc, remus_state->netbufscript);
+    const uint32_t domid = remus_state->dss->domid;
+    const int devid = remus_state->dev_id;
+    libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+    const char *const vif = netbuf_state->vif_list[devid];
+    const char *const ifb = netbuf_state->ifb_list[devid];
+

Please set arraysize to 7 here, instead of the beginning of the function. 
Its more readable that way.

+    GCNEW_ARRAY(env, arraysize);
+    env[nr++] = "vifname";
+    env[nr++] = libxl__strdup(gc, vif);
+    env[nr++] = "XENBUS_PATH";
+    env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
+                          libxl__xs_libxl_path(gc, domid), devid);
+    if (!strcmp(op, "teardown")) {
+        env[nr++] = "IFB";
+        env[nr++] = libxl__strdup(gc, ifb);
+    }
+    env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3; nr = 0;
+    GCNEW_ARRAY(args, arraysize);
+    args[nr++] = script;
+    args[nr++] = op;
+    args[nr++] = NULL;
+    assert(nr == arraysize);
+
+    /* Set hotplug timeout */
+    if (libxl__ev_time_register_rel(gc, timeout,
+                                    netbuf_setup_timeout_cb,
+                                    LIBXL_HOTPLUG_TIMEOUT * 1000)) {
+        LOG(ERROR, "unable to register timeout for "
+            "netbuf setup script %s on vif %s", script, vif);
+        return ERROR_FAIL;
+    }
+
+    LOG(DEBUG, "Calling netbuf script: %s %s on vif %s",
+        script, op, vif);
+
+    /* Fork and exec netbuf script */
+    pid = libxl__ev_child_fork(gc, child, death);
+    if (pid == -1) {
+        LOG(ERROR, "unable to fork netbuf script %s", script);
+        return ERROR_FAIL;
+    }
+
+    if (!pid) {
+        /* child: Launch netbuf script */
+        libxl__exec(gc, -1, -1, -1, args[0], args, env);
+        /* notreached */
+        abort();
+    }
+
+    return 0;
+}
+


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.