[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 15/19] libxl: suspend: Async xenstore pvcontrol wait



On Tue, 2014-03-04 at 14:56 +0000, Ian Jackson wrote:
> When negotiating guest suspend via the xenstore pvcontrol protocol
> (ie when the guest does NOT support the evtchn fast suspend protocol):
> 
> Replace the use of loops and usleep with a call to libxl__xswait.
> 
> Also, replace the xenstore transaction loop with one using
> libxl__xs_transaction_start et al.
> 
> There is not intended to be any semantic change, other than to make
> the algorithm properly asynchronous.
> 
> Signed-off-by: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
> CC: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
> CC: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
>  tools/libxl/libxl_dom.c      |   93 
> ++++++++++++++++++++++++++----------------
>  tools/libxl/libxl_internal.h |    1 +
>  2 files changed, 59 insertions(+), 35 deletions(-)
> 
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> index 8aceba9..dde7e33 100644
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -1028,6 +1028,8 @@ static void domain_suspend_common_wait_guest(libxl__egc 
> *egc,
>                                               libxl__domain_suspend_state 
> *dss);
>  static void domain_suspend_common_guest_suspended(libxl__egc *egc,
>                                           libxl__domain_suspend_state *dss);
> +static void domain_suspend_common_pvcontrol_suspending(libxl__egc *egc,
> +      libxl__xswait_state *xswa, int rc, const char *state);
>  static void domain_suspend_common_failed(libxl__egc *egc,
>                                           libxl__domain_suspend_state *dss);
>  static void domain_suspend_common_done(libxl__egc *egc,
> @@ -1047,10 +1049,6 @@ static void domain_suspend_callback_common(libxl__egc 
> *egc,
>      STATE_AO_GC(dss->ao);
>      unsigned long hvm_s_state = 0, hvm_pvdrv = 0;
>      int ret;
> -    char *state = "suspend";
> -    int watchdog;
> -    xs_transaction_t t;
> -    int rc;
>  
>      /* Convenience aliases */
>      const uint32_t domid = dss->domid;
> @@ -1096,59 +1094,81 @@ static void domain_suspend_callback_common(libxl__egc 
> *egc,
>  
>      libxl__domain_pvcontrol_write(gc, XBT_NULL, domid, "suspend");
>  
> -    LOG(DEBUG, "wait for the guest to acknowledge suspend request");
> -    watchdog = 60;
> -    while (!domain_suspend_pvcontrol_acked(state) && watchdog > 0) {
> -        usleep(100000);
> +    dss->pvcontrol.path = libxl__domain_pvcontrol_xspath(gc, domid);
> +    if (!dss->pvcontrol.path) goto err;
>  
> -        state = libxl__domain_pvcontrol_read(gc, XBT_NULL, domid);
> +    dss->pvcontrol.ao = ao;
> +    dss->pvcontrol.what = "guest acknowledgement of suspend request";
> +    dss->pvcontrol.timeout_ms = 60 * 1000;
> +    dss->pvcontrol.callback = domain_suspend_common_pvcontrol_suspending;
> +    libxl__xswait_start(gc, &dss->pvcontrol);
> +    return;
>  
> -        watchdog--;
> -    }
> + err:
> +    domain_suspend_common_failed(egc, dss);
> +}
>  
> -    /*
> -     * Guest appears to not be responding. Cancel the suspend
> -     * request.
> -     *
> -     * We re-read the suspend node and clear it within a
> -     * transaction in order to handle the case where we race
> -     * against the guest catching up and acknowledging the request
> -     * at the last minute.
> -     */
> -    if (!domain_suspend_pvcontrol_acked(state)) {
> -        LOG(ERROR, "guest didn't acknowledge suspend, cancelling request");
> +static void domain_suspend_common_pvcontrol_suspending(libxl__egc *egc,
> +      libxl__xswait_state *xswa, int rc, const char *state)
> +{
> +    libxl__domain_suspend_state *dss = CONTAINER_OF(xswa, *dss, pvcontrol);
> +    STATE_AO_GC(dss->ao);
> +    xs_transaction_t t = 0;
> +
> +    if (!rc && !domain_suspend_pvcontrol_acked(state))
> +        /* keep waiting */
> +        return;
> +
> +    libxl__xswait_stop(gc, &dss->pvcontrol);
> +
> +    if (rc == ERROR_TIMEDOUT) {
> +        /*
> +         * Guest appears to not be responding. Cancel the suspend
> +         * request.
> +         *
> +         * We re-read the suspend node and clear it within a
> +         * transaction in order to handle the case where we race
> +         * against the guest catching up and acknowledging the request
> +         * at the last minute.
> +         */
>          for (;;) {
>              rc = libxl__xs_transaction_start(gc, &t);
>              if (rc) goto err;
>  
> -            state = libxl__domain_pvcontrol_read(gc, t, domid);
> +            rc = libxl__xs_read_checked(gc, t, xswa->path, &state);
> +            if (rc) goto err;
> +
> +            if (domain_suspend_pvcontrol_acked(state))
> +                break;
>  
> -            if (!domain_suspend_pvcontrol_acked(state))
> -                libxl__domain_pvcontrol_write(gc, t, domid, "");
> +            rc = libxl__xs_write_checked(gc, t, xswa->path, "");
> +            if (rc) goto err;
>  
>              rc = libxl__xs_transaction_commit(gc, &t);
> -            if (!rc) break;
> +            if (!rc) {
> +                LOG(ERROR,
> +                    "guest didn't acknowledge suspend, cancelling request");
> +                goto err;
> +            }
>              if (rc<0) goto err;
>          }
> -    }
> -
> -    /*
> -     * Final check for guest acknowledgement. The guest may have
> -     * acknowledged while we were cancelling the request in which
> -     * case we lost the race while cancelling and should continue.

This behaviour has gone completely now?

> -     */
> -    if (!domain_suspend_pvcontrol_acked(state)) {
> -        LOG(ERROR, "guest didn't acknowledge suspend, request cancelled");
> +    } else if (rc) {
> +        /* some error in xswait's read of xenstore, already logged */
>          goto err;
>      }
>  
> +    assert(domain_suspend_pvcontrol_acked(state));
>      LOG(DEBUG, "guest acknowledged suspend request");
> +
> +    libxl__xs_transaction_abort(gc, &t);

Is this right/proper in the success case?

If rc != TIMEOUT t may not have been started, although it is initialised
so I suppose this is intended to be ok.

>      dss->guest_responded = 1;
>      domain_suspend_common_wait_guest(egc,dss);
>      return;
>  
>   err:
> +    libxl__xs_transaction_abort(gc, &t);
>      domain_suspend_common_failed(egc, dss);
> +    return;
>  }
>  
>  static void domain_suspend_common_wait_guest(libxl__egc *egc,
> @@ -1215,6 +1235,8 @@ static void domain_suspend_common_done(libxl__egc *egc,
>                                         libxl__domain_suspend_state *dss,
>                                         bool ok)
>  {
> +    EGC_GC;
> +    assert(!libxl__xswait_inuse(&dss->pvcontrol));
>      dss->callback_common_done(egc, dss, ok);
>  }
>  
> @@ -1400,6 +1422,7 @@ void libxl__domain_suspend(libxl__egc *egc, 
> libxl__domain_suspend_state *dss)
>          &dss->shs.callbacks.save.a;
>  
>      logdirty_init(&dss->logdirty);
> +    libxl__xswait_init(&dss->pvcontrol);
>  
>      switch (type) {
>      case LIBXL_DOMAIN_TYPE_HVM: {
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 97dbd8e..5ac02cb 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2422,6 +2422,7 @@ struct libxl__domain_suspend_state {
>      int hvm;
>      int xcflags;
>      int guest_responded;
> +    libxl__xswait_state pvcontrol;
>      const char *dm_savefile;
>      int interval; /* checkpoint interval (for Remus) */
>      libxl__save_helper_state shs;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.