[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2 of 2] libxl: make libxl__domain_suspend_callback be asynchronous



# HG changeset patch
# User Shriram Rajagopalan <rshriram@xxxxxxxxx>
# Date 1385310114 28800
# Node ID 2eddacbd701dce5d3bb395ff3c19c5bc7c9cfa68
# Parent  c7fdc18830cd46018ced3afb1cde23121a716a6e
libxl: make libxl__domain_suspend_callback be asynchronous

Mark the suspend callback as asynchronous in the helper stub generator
(libxl_save_msgs_gen.pl).

libxl__domain_suspend_common_callback, the common synchronous core,
which used to be provided directly as the callback function for the
helper machinery, becomes libxl__domain_suspend_callback.  It
can now take a typesafe parameter.  This function is further refactored
to use the previously introduced code that relies on libxl's
event timer machinery instead of usleep calls to implement wait loops.

The remus version of suspend callback is no longer called directly by
the helper machinery.  Instead, after a domain is successfully suspended,
a remus callback (libxl__domain_suspend_callback_remus) is called, wherein
remus specific tasks (e.g., creating a new network buffer) are executed.

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Cc: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>

diff -r c7fdc18830cd -r 2eddacbd701d tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c   Sun Nov 24 08:17:13 2013 -0800
+++ b/tools/libxl/libxl_dom.c   Sun Nov 24 08:21:54 2013 -0800
@@ -1206,16 +1206,13 @@ static void guest_suspended(libxl__domai
                                                      &dss->shs, ok);
 }
 
-int libxl__domain_suspend_common_callback(void *user)
+void libxl__domain_suspend_callback(void *data)
 {
-    libxl__save_helper_state *shs = user;
+    libxl__save_helper_state *shs = data;
     libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
     STATE_AO_GC(dss->ao);
     unsigned long hvm_s_state = 0, hvm_pvdrv = 0;
     int ret;
-    char *state = "suspend";
-    int watchdog;
-    xs_transaction_t t;
 
     /* Convenience aliases */
     const uint32_t domid = dss->domid;
@@ -1231,117 +1228,41 @@ int libxl__domain_suspend_common_callbac
         ret = xc_evtchn_notify(dss->xce, dss->suspend_eventchn);
         if (ret < 0) {
             LOG(ERROR, "xc_evtchn_notify failed ret=%d", ret);
-            return 0;
+            goto err;
         }
         ret = xc_await_suspend(CTX->xch, dss->xce, dss->suspend_eventchn);
         if (ret < 0) {
             LOG(ERROR, "xc_await_suspend failed ret=%d", ret);
-            return 0;
+            goto err;
         }
         dss->guest_responded = 1;
-        goto guest_suspended;
+        guest_suspended(dss);
+        return;
     }
 
+    dss->watchdog = 60;
+    dss->watchdog_starting = 1;
     if (dss->hvm && (!hvm_pvdrv || hvm_s_state)) {
         LOG(DEBUG, "Calling xc_domain_shutdown on HVM domain");
         ret = xc_domain_shutdown(CTX->xch, domid, SHUTDOWN_suspend);
         if (ret < 0) {
             LOGE(ERROR, "xc_domain_shutdown failed");
-            return 0;
+            goto err;
         }
         /* The guest does not (need to) respond to this sort of request. */
         dss->guest_responded = 1;
+        wait_for_guest_suspend(dss);
     } else {
         LOG(DEBUG, "issuing %s suspend request via XenBus control node",
             dss->hvm ? "PVHVM" : "PV");
+        libxl__domain_pvcontrol_write(gc, XBT_NULL, domid, "suspend");
+        LOG(DEBUG, "wait for the guest to acknowledge suspend request");
+        wait_for_suspend_req_ack(dss);
+    }
+    return;
 
-        libxl__domain_pvcontrol_write(gc, XBT_NULL, domid, "suspend");
-
-        LOG(DEBUG, "wait for the guest to acknowledge suspend request");
-        watchdog = 60;
-        while (!strcmp(state, "suspend") && watchdog > 0) {
-            usleep(100000);
-
-            state = libxl__domain_pvcontrol_read(gc, XBT_NULL, domid);
-            if (!state) state = "";
-
-            watchdog--;
-        }
-
-        /*
-         * Guest appears to not be responding. Cancel the suspend
-         * request.
-         *
-         * We re-read the suspend node and clear it within a
-         * transaction in order to handle the case where we race
-         * against the guest catching up and acknowledging the request
-         * at the last minute.
-         */
-        if (!strcmp(state, "suspend")) {
-            LOG(ERROR, "guest didn't acknowledge suspend, cancelling request");
-        retry_transaction:
-            t = xs_transaction_start(CTX->xsh);
-
-            state = libxl__domain_pvcontrol_read(gc, t, domid);
-            if (!state) state = "";
-
-            if (!strcmp(state, "suspend"))
-                libxl__domain_pvcontrol_write(gc, t, domid, "");
-
-            if (!xs_transaction_end(CTX->xsh, t, 0))
-                if (errno == EAGAIN)
-                    goto retry_transaction;
-
-        }
-
-        /*
-         * Final check for guest acknowledgement. The guest may have
-         * acknowledged while we were cancelling the request in which
-         * case we lost the race while cancelling and should continue.
-         */
-        if (!strcmp(state, "suspend")) {
-            LOG(ERROR, "guest didn't acknowledge suspend, request cancelled");
-            return 0;
-        }
-
-        LOG(DEBUG, "guest acknowledged suspend request");
-        dss->guest_responded = 1;
-    }
-
-    LOG(DEBUG, "wait for the guest to suspend");
-    watchdog = 60;
-    while (watchdog > 0) {
-        xc_domaininfo_t info;
-
-        usleep(100000);
-        ret = xc_domain_getinfolist(CTX->xch, domid, 1, &info);
-        if (ret == 1 && info.domain == domid &&
-            (info.flags & XEN_DOMINF_shutdown)) {
-            int shutdown_reason;
-
-            shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift)
-                & XEN_DOMINF_shutdownmask;
-            if (shutdown_reason == SHUTDOWN_suspend) {
-                LOG(DEBUG, "guest has suspended");
-                goto guest_suspended;
-            }
-        }
-
-        watchdog--;
-    }
-
-    LOG(ERROR, "guest did not suspend");
-    return 0;
-
- guest_suspended:
-    if (dss->hvm) {
-        ret = libxl__domain_suspend_device_model(gc, dss);
-        if (ret) {
-            LOG(ERROR, "libxl__domain_suspend_device_model failed ret=%d", 
ret);
-            return 0;
-        }
-    }
-    return 1;
+ err:
+    libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
 }
 
 static inline char *physmap_path(libxl__gc *gc, uint32_t domid,
@@ -1497,31 +1418,6 @@ static int libxl__domain_suspend_callbac
     return 1;
 }
 
-static int libxl__remus_domain_suspend_callback(void *data)
-{
-    libxl__save_helper_state *shs = data;
-    libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
-    libxl__remus_ctx *remus_ctx = dss->remus_ctx;
-    STATE_AO_GC(dss->ao);
-
-    /* REMUS TODO: Issue disk checkpoint reqs. */
-    int ok = libxl__domain_suspend_common_callback(data);
-
-    if (!remus_ctx->netbuf_ctx || !ok) goto out;
-
-    /* The domain was suspended successfully. Start a new network
-     * buffer for the next epoch. If this operation fails, then act
-     * as though domain suspend failed -- libxc exits its infinite
-     * loop and ultimately, the replication stops.
-     */
-    if (libxl__remus_netbuf_start_new_epoch(gc, dss->domid,
-                                            remus_ctx))
-        ok = 0;
-
- out:
-    return ok;
-}
-
 static int libxl__remus_domain_resume_callback(void *data)
 {
     libxl__save_helper_state *shs = data;
@@ -1680,11 +1576,10 @@ void libxl__domain_suspend(libxl__egc *e
 
     memset(callbacks, 0, sizeof(*callbacks));
     if (dss->remus_ctx != NULL) {
-        callbacks->suspend = libxl__remus_domain_suspend_callback;
         callbacks->postcopy = libxl__remus_domain_resume_callback;
         callbacks->checkpoint = libxl__remus_domain_checkpoint_callback;
-    } else
-        callbacks->suspend = libxl__domain_suspend_common_callback;
+    }
+    callbacks->suspend = libxl__domain_suspend_callback;
 
     callbacks->switch_qemu_logdirty = 
libxl__domain_suspend_common_switch_qemu_logdirty;
     dss->shs.callbacks.save.toolstack_save = libxl__toolstack_save;
diff -r c7fdc18830cd -r 2eddacbd701d tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h      Sun Nov 24 08:17:13 2013 -0800
+++ b/tools/libxl/libxl_internal.h      Sun Nov 24 08:21:54 2013 -0800
@@ -2650,7 +2650,7 @@ _hidden void libxl__xc_domain_save_done(
 void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
                            libxl__save_helper_state *shs, int return_value);
 
-_hidden int libxl__domain_suspend_common_callback(void *data);
+_hidden void libxl__domain_suspend_callback(void *data);
 _hidden void libxl__domain_suspend_common_switch_qemu_logdirty
                                (int domid, unsigned int enable, void *data);
 _hidden int libxl__toolstack_save(uint32_t domid, uint8_t **buf,
diff -r c7fdc18830cd -r 2eddacbd701d tools/libxl/libxl_save_msgs_gen.pl
--- a/tools/libxl/libxl_save_msgs_gen.pl        Sun Nov 24 08:17:13 2013 -0800
+++ b/tools/libxl/libxl_save_msgs_gen.pl        Sun Nov 24 08:21:54 2013 -0800
@@ -23,7 +23,7 @@ our @msgs = (
                                                  STRING doing_what),
                                                 'unsigned long', 'done',
                                                 'unsigned long', 'total'] ],
-    [  3, 'scxW',   "suspend", [] ],         
+    [  3, 'scxA',   "suspend", [] ],         
     [  4, 'scxW',   "postcopy", [] ],        
     [  5, 'scxA',   "checkpoint", [] ],      
     [  6, 'scxA',   "switch_qemu_logdirty",  [qw(int domid

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.