[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 06/13 V6] remus: implement the API to setup network buffering
At 01/27/2014 06:30 AM, Shriram Rajagopalan Wrote: > On Tue, Jan 21, 2014 at 1:05 AM, Lai Jiangshan <laijs@xxxxxxxxxxxxxx> wrote: > >> From: Shriram Rajagopalan <rshriram@xxxxxxxxx> >> >> The following steps are taken during setup: >> a) call the hotplug script for each vif to setup its network buffer >> >> b) establish a dedicated remus context containing libnl related >> state (netlink sockets, qdisc caches, etc.,) >> >> c) Obtain handles to plug qdiscs installed on the IFB devices >> chosen by the hotplug scripts. >> >> Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx> >> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> >> Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx> >> --- >> docs/misc/xenstore-paths.markdown | 4 + >> tools/libxl/Makefile | 2 + >> tools/libxl/libxl_dom.c | 7 +- >> tools/libxl/libxl_internal.h | 11 + >> tools/libxl/libxl_netbuffer.c | 419 >> ++++++++++++++++++++++++++++++++++++++ >> tools/libxl/libxl_nonetbuffer.c | 6 + >> tools/libxl/libxl_remus.c | 35 ++++ >> 7 files changed, 479 insertions(+), 5 deletions(-) >> create mode 100644 tools/libxl/libxl_remus.c >> >> diff --git a/docs/misc/xenstore-paths.markdown >> b/docs/misc/xenstore-paths.markdown >> index 70ab7f4..7a0d2c9 100644 >> --- a/docs/misc/xenstore-paths.markdown >> +++ b/docs/misc/xenstore-paths.markdown >> @@ -385,6 +385,10 @@ The guest's virtual time offset from UTC in seconds. >> >> The device model version for a domain. >> >> +#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL] >> + >> +IFB device used by Remus to buffer network output from the associated vif. >> + >> [BLKIF]: >> http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html >> [FBIF]: >> http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html >> [HVMPARAMS]: >> http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html >> diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile >> index 84a467c..218f55e 100644 >> --- a/tools/libxl/Makefile >> +++ b/tools/libxl/Makefile >> @@ -52,6 +52,8 @@ else >> LIBXL_OBJS-y += libxl_nonetbuffer.o >> endif >> >> +LIBXL_OBJS-y += libxl_remus.o >> + >> LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o >> LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o >> >> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c >> index 8d63f90..e3e9f6f 100644 >> --- a/tools/libxl/libxl_dom.c >> +++ b/tools/libxl/libxl_dom.c >> @@ -753,9 +753,6 @@ int libxl__toolstack_restore(uint32_t domid, const >> uint8_t *buf, >> >> /*==================== Domain suspend (save) ====================*/ >> >> -static void domain_suspend_done(libxl__egc *egc, >> - libxl__domain_suspend_state *dss, int rc); >> - >> /*----- complicated callback, called by xc_domain_save -----*/ >> >> /* >> @@ -1508,8 +1505,8 @@ static void >> save_device_model_datacopier_done(libxl__egc *egc, >> dss->save_dm_callback(egc, dss, our_rc); >> } >> >> -static void domain_suspend_done(libxl__egc *egc, >> - libxl__domain_suspend_state *dss, int rc) >> +void domain_suspend_done(libxl__egc *egc, >> + libxl__domain_suspend_state *dss, int rc) >> { >> STATE_AO_GC(dss->ao); >> >> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h >> index 2f64382..0430307 100644 >> --- a/tools/libxl/libxl_internal.h >> +++ b/tools/libxl/libxl_internal.h >> @@ -2313,6 +2313,17 @@ typedef struct libxl__remus_state { >> >> _hidden int libxl__netbuffer_enabled(libxl__gc *gc); >> >> +_hidden void domain_suspend_done(libxl__egc *egc, >> + libxl__domain_suspend_state *dss, >> + int rc); >> + >> +_hidden void libxl__remus_setup_done(libxl__egc *egc, >> + libxl__domain_suspend_state *dss, >> + int rc); >> + >> +_hidden void libxl__remus_netbuf_setup(libxl__egc *egc, >> + libxl__domain_suspend_state *dss); >> + >> struct libxl__domain_suspend_state { >> /* set by caller of libxl__domain_suspend */ >> libxl__ao *ao; >> diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c >> index 8e23d75..0be876c 100644 >> --- a/tools/libxl/libxl_netbuffer.c >> +++ b/tools/libxl/libxl_netbuffer.c >> @@ -17,11 +17,430 @@ >> >> #include "libxl_internal.h" >> >> +#include <netlink/cache.h> >> +#include <netlink/socket.h> >> +#include <netlink/attr.h> >> +#include <netlink/route/link.h> >> +#include <netlink/route/route.h> >> +#include <netlink/route/qdisc.h> >> +#include <netlink/route/qdisc/plug.h> >> + >> +typedef struct libxl__remus_netbuf_state { >> + struct rtnl_qdisc **netbuf_qdisc_list; >> + struct nl_sock *nlsock; >> + struct nl_cache *qdisc_cache; >> + const char **vif_list; >> + const char **ifb_list; >> + uint32_t num_netbufs; >> + uint32_t unused; >> +} libxl__remus_netbuf_state; >> + >> int libxl__netbuffer_enabled(libxl__gc *gc) >> { >> return 1; >> } >> >> +/* If the device has a vifname, then use that instead of >> + * the vifX.Y format. >> + */ >> +static const char *get_vifname(libxl__gc *gc, uint32_t domid, >> + libxl_device_nic *nic) >> +{ >> + const char *vifname = NULL; >> + const char *path; >> + int rc; >> + >> + path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname", >> + libxl__xs_get_dompath(gc, 0), domid, >> nic->devid); >> + rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname); >> + if (rc < 0) { >> + /* use the default name */ >> + vifname = libxl__device_nic_devname(gc, domid, >> + nic->devid, >> + nic->nictype); >> + } >> + >> + return vifname; >> > > IanJ's feedback last time was that the error code rc needs to be checked. > If its a failure, then return NULL to the caller. If its a ENOENT, then use > the > default name. OK. This should be if (!rc && !vifname) { /* use the default name */ .... } > > +} >> + >> +static const char **get_guest_vif_list(libxl__gc *gc, uint32_t domid, >> + int *num_vifs) >> +{ >> + libxl_device_nic *nics = NULL; >> + int nb, i = 0; >> + const char **vif_list = NULL; >> + >> + *num_vifs = 0; >> + nics = libxl_device_nic_list(CTX, domid, &nb); >> + if (!nics) >> + return NULL; >> + >> + /* Ensure that none of the vifs are backed by driver domains */ >> + for (i = 0; i < nb; i++) { >> + if (nics[i].backend_domid != LIBXL_TOOLSTACK_DOMID) { >> + LOG(ERROR, "vif %s has driver domain (%u) as its backend. " >> + "Network buffering is not supported with driver domains", >> + get_vifname(gc, domid, &nics[i]), nics[i].backend_domid); >> > > And if the previous feedback were to be incorporated, then get_vifname's > return > value should be assigned to a variable and checked before printing or using > it for > other purposes. OK > > >> + *num_vifs = -1; >> + goto out; >> + } >> + } >> + >> + GCNEW_ARRAY(vif_list, nb); >> + for (i = 0; i < nb; ++i) { >> + vif_list[i] = get_vifname(gc, domid, &nics[i]); >> + if (!vif_list[i]) { >> + vif_list = NULL; >> + goto out; >> + } >> + } >> + *num_vifs = nb; >> + >> + out: >> + for (i = 0; i < nb; i++) >> + libxl_device_nic_dispose(&nics[i]); >> + free(nics); >> + return vif_list; >> +} >> + >> +static void free_qdiscs(libxl__remus_netbuf_state *netbuf_state) >> +{ >> + int i; >> + struct rtnl_qdisc *qdisc = NULL; >> + >> + /* free qdiscs */ >> + for (i = 0; i < netbuf_state->num_netbufs; i++) { >> + qdisc = netbuf_state->netbuf_qdisc_list[i]; >> + if (!qdisc) >> + break; >> + >> + nl_object_put((struct nl_object *)qdisc); >> + } >> + >> + /* free qdisc cache */ >> + nl_cache_clear(netbuf_state->qdisc_cache); >> + nl_cache_free(netbuf_state->qdisc_cache); >> + >> + /* close nlsock */ >> + nl_close(netbuf_state->nlsock); >> + >> + /* free nlsock */ >> + nl_socket_free(netbuf_state->nlsock); >> +} >> + >> > > This code (free_qdiscs) is new. Have you tested it? > While the control flow looks pretty sane, libnl has been evolving > a bit ever since the 3.* series. > > If init_qdisc fails, it calls free_qdisc(). If any other setup stage after > network buffering fails, it would invoke the teardown code, which also > calls free_qdisc(). This may end up in a segfault. > > I suggest adding a simple check to see if nlsock/qdisc_cache are NULL > before attempting to execute the rest of the function. And after you free > the > qdisc_cache & nlsock, set them to NULL. Yes, free_qdisc() may be called twice. Will fix it in the next version. > > >> +static int init_qdiscs(libxl__gc *gc, >> + libxl__remus_state *remus_state) >> +{ >> + int i, ret, ifindex; >> + struct rtnl_link *ifb = NULL; >> + struct rtnl_qdisc *qdisc = NULL; >> + >> + /* Convenience aliases */ >> + libxl__remus_netbuf_state * const netbuf_state = >> remus_state->netbuf_state; >> + const int num_netbufs = netbuf_state->num_netbufs; >> + const char ** const ifb_list = netbuf_state->ifb_list; >> + >> + /* Now that we have brought up IFB devices with plug qdisc for >> + * each vif, lets get a netlink handle on the plug qdisc for use >> + * during checkpointing. >> + */ >> + netbuf_state->nlsock = nl_socket_alloc(); >> + if (!netbuf_state->nlsock) { >> + LOG(ERROR, "cannot allocate nl socket"); >> + goto out; >> + } >> + >> + ret = nl_connect(netbuf_state->nlsock, NETLINK_ROUTE); >> + if (ret) { >> + LOG(ERROR, "failed to open netlink socket: %s", >> + nl_geterror(ret)); >> + goto out; >> + } >> + >> + /* get list of all qdiscs installed on network devs. */ >> + ret = rtnl_qdisc_alloc_cache(netbuf_state->nlsock, >> + &netbuf_state->qdisc_cache); >> + if (ret) { >> + LOG(ERROR, "failed to allocate qdisc cache: %s", >> + nl_geterror(ret)); >> + goto out; >> + } >> + >> + /* list of handles to plug qdiscs */ >> + GCNEW_ARRAY(netbuf_state->netbuf_qdisc_list, num_netbufs); >> + >> + for (i = 0; i < num_netbufs; ++i) { >> + >> + /* get a handle to the IFB interface */ >> + ifb = NULL; >> + ret = rtnl_link_get_kernel(netbuf_state->nlsock, 0, >> + ifb_list[i], &ifb); >> + if (ret) { >> + LOG(ERROR, "cannot obtain handle for %s: %s", ifb_list[i], >> + nl_geterror(ret)); >> + goto out; >> + } >> + >> + ifindex = rtnl_link_get_ifindex(ifb); >> + if (!ifindex) { >> + LOG(ERROR, "interface %s has no index", ifb_list[i]); >> + goto out; >> + } >> + >> + /* Get a reference to the root qdisc installed on the IFB, by >> + * querying the qdisc list we obtained earlier. The netbufscript >> + * sets up the plug qdisc as the root qdisc, so we don't have to >> + * search the entire qdisc tree on the IFB dev. >> + >> + * There is no need to explicitly free this qdisc as its just a >> + * reference from the qdisc cache we allocated earlier. >> + */ >> + qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache, >> ifindex, >> + TC_H_ROOT); >> + >> + if (qdisc) { >> + const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc)); >> + /* Sanity check: Ensure that the root qdisc is a plug qdisc. >> */ >> + if (!tc_kind || strcmp(tc_kind, "plug")) { >> + nl_object_put((struct nl_object *)qdisc); >> + LOG(ERROR, "plug qdisc is not installed on %s", >> ifb_list[i]); >> + goto out; >> + } >> + netbuf_state->netbuf_qdisc_list[i] = qdisc; >> + } else { >> + LOG(ERROR, "Cannot get qdisc handle from ifb %s", >> ifb_list[i]); >> + goto out; >> + } >> + rtnl_link_put(ifb); >> + } >> + >> + return 0; >> + >> + out: >> + if (ifb) >> + rtnl_link_put(ifb); >> + free_qdiscs(netbuf_state); >> + return ERROR_FAIL; >> +} >> + >> +static void netbuf_setup_timeout_cb(libxl__egc *egc, >> + libxl__ev_time *ev, >> + const struct timeval *requested_abs) >> +{ >> + libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state, >> timeout); >> + >> + /* Convenience aliases */ >> + const int devid = remus_state->dev_id; >> + libxl__remus_netbuf_state *const netbuf_state = >> remus_state->netbuf_state; >> + const char *const vif = netbuf_state->vif_list[devid]; >> + >> + STATE_AO_GC(remus_state->dss->ao); >> + >> + libxl__ev_time_deregister(gc, &remus_state->timeout); >> + assert(libxl__ev_child_inuse(&remus_state->child)); >> + >> + LOG(DEBUG, "killing hotplug script %s (on vif %s) because of timeout", >> + remus_state->netbufscript, vif); >> + >> + if (kill(remus_state->child.pid, SIGKILL)) { >> + LOGEV(ERROR, errno, "unable to kill hotplug script %s [%ld]", >> + remus_state->netbufscript, >> + (unsigned long)remus_state->child.pid); >> + } >> + >> + return; >> +} >> + >> +/* the script needs the following env & args >> + * $vifname >> + * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/) >> + * $IFB (for teardown) >> + * setup/teardown as command line arg. >> + * In return, the script writes the name of IFB device (during setup) to >> be >> + * used for output buffering into XENBUS_PATH/ifb >> + */ >> +static int exec_netbuf_script(libxl__gc *gc, libxl__remus_state >> *remus_state, >> + char *op, libxl__ev_child_callback *death) >> +{ >> + int arraysize = 7, nr = 0; >> + char **env = NULL, **args = NULL; >> + pid_t pid; >> + >> + /* Convenience aliases */ >> + libxl__ev_child *const child = &remus_state->child; >> + libxl__ev_time *const timeout = &remus_state->timeout; >> + char *const script = libxl__strdup(gc, remus_state->netbufscript); >> + const uint32_t domid = remus_state->dss->domid; >> + const int devid = remus_state->dev_id; >> + libxl__remus_netbuf_state *const netbuf_state = >> remus_state->netbuf_state; >> + const char *const vif = netbuf_state->vif_list[devid]; >> + const char *const ifb = netbuf_state->ifb_list[devid]; >> + >> > > Please set arraysize to 7 here, instead of the beginning of the function. > Its more readable that way. OK. Thanks Wen Congyang > > + GCNEW_ARRAY(env, arraysize); >> + env[nr++] = "vifname"; >> + env[nr++] = libxl__strdup(gc, vif); >> + env[nr++] = "XENBUS_PATH"; >> + env[nr++] = GCSPRINTF("%s/remus/netbuf/%d", >> + libxl__xs_libxl_path(gc, domid), devid); >> + if (!strcmp(op, "teardown")) { >> + env[nr++] = "IFB"; >> + env[nr++] = libxl__strdup(gc, ifb); >> + } >> + env[nr++] = NULL; >> + assert(nr <= arraysize); >> + >> + arraysize = 3; nr = 0; >> + GCNEW_ARRAY(args, arraysize); >> + args[nr++] = script; >> + args[nr++] = op; >> + args[nr++] = NULL; >> + assert(nr == arraysize); >> + >> + /* Set hotplug timeout */ >> + if (libxl__ev_time_register_rel(gc, timeout, >> + netbuf_setup_timeout_cb, >> + LIBXL_HOTPLUG_TIMEOUT * 1000)) { >> + LOG(ERROR, "unable to register timeout for " >> + "netbuf setup script %s on vif %s", script, vif); >> + return ERROR_FAIL; >> + } >> + >> + LOG(DEBUG, "Calling netbuf script: %s %s on vif %s", >> + script, op, vif); >> + >> + /* Fork and exec netbuf script */ >> + pid = libxl__ev_child_fork(gc, child, death); >> + if (pid == -1) { >> + LOG(ERROR, "unable to fork netbuf script %s", script); >> + return ERROR_FAIL; >> + } >> + >> + if (!pid) { >> + /* child: Launch netbuf script */ >> + libxl__exec(gc, -1, -1, -1, args[0], args, env); >> + /* notreached */ >> + abort(); >> + } >> + >> + return 0; >> +} >> + >> >> > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |