[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH V8 7/8] libxl: control network buffering in remus callbacks



This patch constitutes the core network buffering logic.
and does the following:
 a) create a new network buffer when the domain is suspended
    (remus_domain_suspend_callback)
 b) release the previous network buffer pertaining to the
    committed checkpoint (remus_domain_checkpoint_dm_saved)

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 tools/libxl/libxl_dom.c | 77 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 6 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 0695f3e..1272bf6 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1432,7 +1432,24 @@ static void libxl__remus_domain_suspend_callback(void 
*data)
 static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
                                 libxl__domain_suspend_state *dss, int ok)
 {
-    /* REMUS TODO: Issue disk and network checkpoint reqs. */
+    /* Convenience aliases */
+    libxl__remus_state *const remus_state = dss->remus_state;
+
+    STATE_AO_GC(dss->ao);
+
+    /* REMUS TODO: Issue disk checkpoint reqs. */
+    if (!remus_state->netbuf_state || !ok) goto out;
+
+    /* The domain was suspended successfully. Start a new network
+     * buffer for the next epoch. If this operation fails, then act
+     * as though domain suspend failed -- libxc exits its infinite
+     * loop and ultimately, the replication stops.
+     */
+    if (libxl__remus_netbuf_start_new_epoch(gc, dss->domid,
+                                            remus_state))
+        ok = 0;
+
+out:
     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
 }
 
@@ -1446,7 +1463,7 @@ static int libxl__remus_domain_resume_callback(void *data)
     if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1))
         return 0;
 
-    /* REMUS TODO: Deal with disk. Start a new network output buffer */
+    /* REMUS TODO: Deal with disk. */
     return 1;
 }
 
@@ -1454,6 +1471,8 @@ static int libxl__remus_domain_resume_callback(void *data)
 
 static void remus_checkpoint_dm_saved(libxl__egc *egc,
                                       libxl__domain_suspend_state *dss, int 
rc);
+static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
+                                  const struct timeval *requested_abs);
 
 static void libxl__remus_domain_checkpoint_callback(void *data)
 {
@@ -1473,10 +1492,56 @@ static void 
libxl__remus_domain_checkpoint_callback(void *data)
 static void remus_checkpoint_dm_saved(libxl__egc *egc,
                                       libxl__domain_suspend_state *dss, int rc)
 {
-    /* REMUS TODO: Wait for disk and memory ack, release network buffer */
-    /* REMUS TODO: make this asynchronous */
-    assert(!rc); /* REMUS TODO handle this error properly */
-    usleep(dss->interval * 1000);
+    /* Convenience aliases */
+    /*
+     * REMUS TODO: Wait for disk and explicit memory ack (through restore
+     * callback from remote) before releasing network buffer.
+     */
+    libxl__remus_state *const remus_state = dss->remus_state;
+
+    STATE_AO_GC(dss->ao);
+
+    if (rc) {
+        LOG(ERROR, "Failed to save device model. Terminating Remus..");
+        goto out;
+    }
+
+    if (remus_state->netbuf_state) {
+        rc = libxl__remus_netbuf_release_prev_epoch(gc, dss->domid,
+                                                    remus_state);
+        if (rc) {
+            LOG(ERROR, "Failed to release network buffer."
+                " Terminating Remus..");
+            goto out;
+        }
+    }
+
+    /* Set checkpoint interval timeout */
+    rc = libxl__ev_time_register_rel(gc, &remus_state->timeout,
+                                     remus_next_checkpoint,
+                                     dss->interval);
+    if (rc) {
+        LOG(ERROR, "unable to register timeout for next epoch."
+            " Terminating Remus..");
+        goto out;
+    }
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
+                                  const struct timeval *requested_abs)
+{
+    libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state, timeout);
+
+    /* Convenience aliases */
+    libxl__domain_suspend_state *const dss = remus_state->dss;
+
+    STATE_AO_GC(dss->ao);
+
+    libxl__ev_time_deregister(gc, &remus_state->timeout);
     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 1);
 }
 
-- 
1.8.3.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.