|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 02/13] libxl: react correctly to bootloader pty master POLLHUP
On Thu, 2012-08-02 at 18:27 +0100, Ian Jackson wrote:
> Receive POLLHUP on the bootloader master pty is not an error.
> Hopefully it means that the bootloader has exited and therefore the
> pty slave side has no process group any more. (At least NetBSD
> indicates POLLHUP on the master in this case.)
>
> So send the bootloader SIGTERM; if it has already exited then this has
> no effect (except that on some versions of NetBSD it erroneously
> returns ESRCH and we print a harmless warning) and we will then
> collect the bootloader's exit status and be satisfied.
>
> However, we remember that we have done this so that if we got POLLHUP
> for some other reason than that the bootloader exited we report
> something resembling a useful message.
>
> In order to implement this we need to provide a way for users of
> datacopier to handle POLLHUP rather than treating it as fatal.
>
> We rename bootloader_abort to bootloader_stop since it now no longer
> only applies to error situations.
>
> Signed-off-by: Roger Pau Monne <roger.pau@xxxxxxxxxx>
> Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
>
> -
> Changes in v5:
> * Correctly call dc->callback_pollhup, not dc->callback,
> in datacopier_pollhup_handled.
Therefore:
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
>
> Changes in v4:
> * Track whether we sent SIGTERM due to POLLHUP so we can report
> messages properly.
>
> Changes in v3:
> * datacopier provides new interface for handling POLLHUP
> * Do not ignore errors on the xenconsole pty
> * Rename bootloader_abort.
> ---
> tools/libxl/libxl_aoutils.c | 23 +++++++++++++++++++++++
> tools/libxl/libxl_bootloader.c | 39 +++++++++++++++++++++++++++++----------
> tools/libxl/libxl_internal.h | 7 +++++--
> 3 files changed, 57 insertions(+), 12 deletions(-)
>
> diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
> index 99972a2..983a60a 100644
> --- a/tools/libxl/libxl_aoutils.c
> +++ b/tools/libxl/libxl_aoutils.c
> @@ -97,11 +97,31 @@ void libxl__datacopier_prefixdata(libxl__egc *egc,
> libxl__datacopier_state *dc,
> LIBXL_TAILQ_INSERT_TAIL(&dc->bufs, buf, entry);
> }
>
> +static int datacopier_pollhup_handled(libxl__egc *egc,
> + libxl__datacopier_state *dc,
> + short revents, int onwrite)
> +{
> + STATE_AO_GC(dc->ao);
> +
> + if (dc->callback_pollhup && (revents & POLLHUP)) {
> + LOG(DEBUG, "received POLLHUP on %s during copy of %s",
> + onwrite ? dc->writewhat : dc->readwhat,
> + dc->copywhat);
> + libxl__datacopier_kill(dc);
> + dc->callback_pollhup(egc, dc, onwrite, -1);
> + return 1;
> + }
> + return 0;
> +}
> +
> static void datacopier_readable(libxl__egc *egc, libxl__ev_fd *ev,
> int fd, short events, short revents) {
> libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, toread);
> STATE_AO_GC(dc->ao);
>
> + if (datacopier_pollhup_handled(egc, dc, revents, 0))
> + return;
> +
> if (revents & ~POLLIN) {
> LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)"
> " on %s during copy of %s", revents, dc->readwhat, dc->copywhat);
> @@ -163,6 +183,9 @@ static void datacopier_writable(libxl__egc *egc,
> libxl__ev_fd *ev,
> libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, towrite);
> STATE_AO_GC(dc->ao);
>
> + if (datacopier_pollhup_handled(egc, dc, revents, 1))
> + return;
> +
> if (revents & ~POLLOUT) {
> LOG(ERROR, "unexpected poll event 0x%x (should be POLLOUT)"
> " on %s during copy of %s", revents, dc->writewhat,
> dc->copywhat);
> diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
> index ef5a91b..bfc1b56 100644
> --- a/tools/libxl/libxl_bootloader.c
> +++ b/tools/libxl/libxl_bootloader.c
> @@ -215,6 +215,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
> libxl__domaindeathcheck_init(&bl->deathcheck);
> bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes);
> bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display);
> + bl->got_pollhup = 0;
> }
>
> static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
> @@ -275,7 +276,7 @@ static void bootloader_local_detached_cb(libxl__egc *egc,
> }
>
> /* might be called at any time, provided it's init'd */
> -static void bootloader_abort(libxl__egc *egc,
> +static void bootloader_stop(libxl__egc *egc,
> libxl__bootloader_state *bl, int rc)
> {
> STATE_AO_GC(bl->ao);
> @@ -285,8 +286,8 @@ static void bootloader_abort(libxl__egc *egc,
> libxl__datacopier_kill(&bl->display);
> if (libxl__ev_child_inuse(&bl->child)) {
> r = kill(bl->child.pid, SIGTERM);
> - if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
> - (unsigned long)bl->child.pid);
> + if (r) LOGE(WARN, "%sfailed to kill bootloader [%lu]",
> + rc ? "after failure, " : "", (unsigned
> long)bl->child.pid);
> }
> bl->rc = rc;
> }
> @@ -508,7 +509,10 @@ static void bootloader_gotptys(libxl__egc *egc,
> libxl__openpty_state *op)
> bl->keystrokes.maxsz = BOOTLOADER_BUF_OUT;
> bl->keystrokes.copywhat =
> GCSPRINTF("bootloader input for domain %"PRIu32, bl->domid);
> - bl->keystrokes.callback = bootloader_keystrokes_copyfail;
> + bl->keystrokes.callback = bootloader_keystrokes_copyfail;
> + bl->keystrokes.callback_pollhup = bootloader_keystrokes_copyfail;
> + /* pollhup gets called with errnoval==-1 which is not otherwise
> + * possible since errnos are nonnegative, so it's unambiguous */
> rc = libxl__datacopier_start(&bl->keystrokes);
> if (rc) goto out;
>
> @@ -516,7 +520,8 @@ static void bootloader_gotptys(libxl__egc *egc,
> libxl__openpty_state *op)
> bl->display.maxsz = BOOTLOADER_BUF_IN;
> bl->display.copywhat =
> GCSPRINTF("bootloader output for domain %"PRIu32, bl->domid);
> - bl->display.callback = bootloader_display_copyfail;
> + bl->display.callback = bootloader_display_copyfail;
> + bl->display.callback_pollhup = bootloader_display_copyfail;
> rc = libxl__datacopier_start(&bl->display);
> if (rc) goto out;
>
> @@ -562,30 +567,42 @@ static void bootloader_gotptys(libxl__egc *egc,
> libxl__openpty_state *op)
>
> /* perhaps one of these will be called, but perhaps not */
> static void bootloader_copyfail(libxl__egc *egc, const char *which,
> - libxl__bootloader_state *bl, int onwrite, int errnoval)
> + libxl__bootloader_state *bl, int ondisplay, int onwrite, int
> errnoval)
> {
> STATE_AO_GC(bl->ao);
> + int rc = ERROR_FAIL;
> +
> + if (errnoval==-1) {
> + /* POLLHUP */
> + if (!!ondisplay != !!onwrite) {
> + rc = 0;
> + bl->got_pollhup = 1;
> + } else {
> + LOG(ERROR, "unexpected POLLHUP on %s", which);
> + }
> + }
> if (!onwrite && !errnoval)
> LOG(ERROR, "unexpected eof copying %s", which);
> - bootloader_abort(egc, bl, ERROR_FAIL);
> +
> + bootloader_stop(egc, bl, rc);
> }
> static void bootloader_keystrokes_copyfail(libxl__egc *egc,
> libxl__datacopier_state *dc, int onwrite, int errnoval)
> {
> libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, keystrokes);
> - bootloader_copyfail(egc, "bootloader input", bl, onwrite, errnoval);
> + bootloader_copyfail(egc, "bootloader input", bl, 0, onwrite, errnoval);
> }
> static void bootloader_display_copyfail(libxl__egc *egc,
> libxl__datacopier_state *dc, int onwrite, int errnoval)
> {
> libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
> - bootloader_copyfail(egc, "bootloader output", bl, onwrite, errnoval);
> + bootloader_copyfail(egc, "bootloader output", bl, 1, onwrite, errnoval);
> }
>
> static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck
> *dc)
> {
> libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
> - bootloader_abort(egc, bl, ERROR_FAIL);
> + bootloader_stop(egc, bl, ERROR_FAIL);
> }
>
> static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
> @@ -599,6 +616,8 @@ static void bootloader_finished(libxl__egc *egc,
> libxl__ev_child *child,
> libxl__datacopier_kill(&bl->display);
>
> if (status) {
> + if (bl->got_pollhup && WIFSIGNALED(status) &&
> WTERMSIG(status)==SIGTERM)
> + LOG(ERROR, "got POLLHUP, sent SIGTERM");
> LOG(ERROR, "bootloader failed - consult logfile %s", bl->logfile);
> libxl_report_child_exitstatus(CTX, XTL_ERROR, "bootloader",
> pid, status);
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 58004b3..2d6c71a 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2076,7 +2076,9 @@ typedef struct libxl__datacopier_buf
> libxl__datacopier_buf;
> * errnoval==0 means we got eof and all data was written
> * errnoval!=0 means we had a read error, logged
> * onwrite==-1 means some other internal failure, errnoval not valid, logged
> - * in all cases copier is killed before calling this callback */
> + * If we get POLLHUP, we call callback_pollhup(..., onwrite, -1);
> + * or if callback_pollhup==0 this is an internal failure, as above.
> + * In all cases copier is killed before calling this callback */
> typedef void libxl__datacopier_callback(libxl__egc *egc,
> libxl__datacopier_state *dc, int onwrite, int errnoval);
>
> @@ -2095,6 +2097,7 @@ struct libxl__datacopier_state {
> const char *copywhat, *readwhat, *writewhat; /* for error msgs */
> FILE *log; /* gets a copy of everything */
> libxl__datacopier_callback *callback;
> + libxl__datacopier_callback *callback_pollhup;
> /* remaining fields are private to datacopier */
> libxl__ev_fd toread, towrite;
> ssize_t used;
> @@ -2279,7 +2282,7 @@ struct libxl__bootloader_state {
> int nargs, argsspace;
> const char **args;
> libxl__datacopier_state keystrokes, display;
> - int rc;
> + int rc, got_pollhup;
> };
>
> _hidden void libxl__bootloader_init(libxl__bootloader_state *bl);
> --
> 1.7.2.5
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |