[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 18/21] libxl: do not leak spawned middle children
libxl__spawn_spawn would, when libxl__spawn_detach was called, make the spawn become idle immediately. However it still has a child process which needs to be waited for: the `detachable' spawned child. This is wrong because the ultimate in-libxl caller may return to the application, with a child process still forked but not reaped libxl contrary to the documented behaviour of libxl. Instead, replace libxl__spawn_detach with libxl__spawn_initiate_detach which is asynchronous. The detachable spawned children are abolished; instead, we defer calling back to the in-libxl user until the middle child has been reaped. Also, remove erroneous comment suggesting that `death' callback parameter to libxl__ev_child_fork may be NULL. It may not, and there are no callers which pass NULL. Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx> Changes in v4: * Clarify semantics of sub-states of Attached. * Fix reference to `Failing' sub-state in comment to `Failed'. Changes in v3 of series: * Now also remove erroneous comment about NULL fork death callback. --- tools/libxl/libxl_dm.c | 14 ++++- tools/libxl/libxl_exec.c | 130 +++++++++++++++++++++++++----------------- tools/libxl/libxl_internal.h | 43 +++++++------- 3 files changed, 110 insertions(+), 77 deletions(-) diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index 340fcfa..b3de145 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -908,6 +908,8 @@ static void device_model_confirm(libxl__egc *egc, libxl__spawn_state *spawn, const char *xsdata); static void device_model_startup_failed(libxl__egc *egc, libxl__spawn_state *spawn); +static void device_model_detached(libxl__egc *egc, + libxl__spawn_state *spawn); /* our "next step" function, called from those callbacks and elsewhere */ static void device_model_spawn_outcome(libxl__egc *egc, @@ -1015,6 +1017,7 @@ retry_transaction: spawn->midproc_cb = libxl__spawn_record_pid; spawn->confirm_cb = device_model_confirm; spawn->failure_cb = device_model_startup_failed; + spawn->detached_cb = device_model_detached; rc = libxl__spawn_spawn(egc, spawn); if (rc < 0) @@ -1048,9 +1051,7 @@ static void device_model_confirm(libxl__egc *egc, libxl__spawn_state *spawn, if (strcmp(xsdata, "running")) return; - libxl__spawn_detach(gc, spawn); - - device_model_spawn_outcome(egc, dmss, 0); + libxl__spawn_initiate_detach(gc, spawn); } static void device_model_startup_failed(libxl__egc *egc, @@ -1060,6 +1061,13 @@ static void device_model_startup_failed(libxl__egc *egc, device_model_spawn_outcome(egc, dmss, ERROR_FAIL); } +static void device_model_detached(libxl__egc *egc, + libxl__spawn_state *spawn) +{ + libxl__dm_spawn_state *dmss = CONTAINER_OF(spawn, *dmss, spawn); + device_model_spawn_outcome(egc, dmss, 0); +} + static void device_model_spawn_outcome(libxl__egc *egc, libxl__dm_spawn_state *dmss, int rc) diff --git a/tools/libxl/libxl_exec.c b/tools/libxl/libxl_exec.c index cfa379c..0477386 100644 --- a/tools/libxl/libxl_exec.c +++ b/tools/libxl/libxl_exec.c @@ -238,15 +238,22 @@ err: /* * Full set of possible states of a libxl__spawn_state and its _detachable: * - * ss-> ss-> ss-> | ssd-> ssd-> - * timeout xswatch ssd | mid ss - * - Undefined undef undef no | - - - * - Idle Idle Idle no | - - - * - Active Active Active yes | Active yes - * - Partial Active/Idle Active/Idle maybe | Active/Idle yes (if exists) - * - Detached - - - | Active no + * detaching failed mid timeout xswatch + * - Undefined undef undef - undef undef + * - Idle any any Idle Idle Idle + * - Attached OK 0 0 Active Active Active + * - Attached Failed 0 1 Active Idle Idle + * - Detaching 1 maybe Active Idle Idle + * - Partial any any Idle Active/Idle Active/Idle * - * When in state Detached, the middle process has been sent a SIGKILL. + * When in states Detaching or Attached Failed, the middle process has + * been sent a SIGKILL. + * + * The difference between Attached OK and Attached Failed is not + * directly visible to callers - callers see these two the same, + * although of course Attached OK will hopefully eventually result in + * a call to detached_cb, whereas Attached Failed will end up + * in a call to failure_cb. */ /* Event callbacks. */ @@ -257,19 +264,18 @@ static void spawn_timeout(libxl__egc *egc, libxl__ev_time *ev, static void spawn_middle_death(libxl__egc *egc, libxl__ev_child *childw, pid_t pid, int status); -/* Precondition: Partial. Results: Detached. */ +/* Precondition: Partial. Results: Idle. */ static void spawn_cleanup(libxl__gc *gc, libxl__spawn_state *ss); -/* Precondition: Partial; caller has logged failure reason. - * Results: Caller notified of failure; - * after return, ss may be completely invalid as caller may reuse it */ -static void spawn_failed(libxl__egc *egc, libxl__spawn_state *ss); +/* Precondition: Attached or Detaching; caller has logged failure reason. + * Results: Detaching, or Attached Failed */ +static void spawn_fail(libxl__egc *egc, libxl__spawn_state *ss); void libxl__spawn_init(libxl__spawn_state *ss) { + libxl__ev_child_init(&ss->mid); libxl__ev_time_init(&ss->timeout); libxl__ev_xswatch_init(&ss->xswatch); - ss->ssd = 0; } int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss) @@ -280,8 +286,7 @@ int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss) int status, rc; libxl__spawn_init(ss); - ss->ssd = libxl__zalloc(NOGC, sizeof(*ss->ssd)); - libxl__ev_child_init(&ss->ssd->mid); + ss->failed = ss->detaching = 0; rc = libxl__ev_time_register_rel(gc, &ss->timeout, spawn_timeout, ss->timeout_ms); @@ -291,7 +296,7 @@ int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss) spawn_watch_event, ss->xspath); if (rc) goto out_err; - pid_t middle = libxl__ev_child_fork(gc, &ss->ssd->mid, spawn_middle_death); + pid_t middle = libxl__ev_child_fork(gc, &ss->mid, spawn_middle_death); if (middle ==-1) { rc = ERROR_FAIL; goto out_err; } if (middle) { @@ -344,54 +349,64 @@ int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss) static void spawn_cleanup(libxl__gc *gc, libxl__spawn_state *ss) { + assert(!libxl__ev_child_inuse(&ss->mid)); + libxl__ev_time_deregister(gc, &ss->timeout); + libxl__ev_xswatch_deregister(gc, &ss->xswatch); +} + +static void spawn_detach(libxl__gc *gc, libxl__spawn_state *ss) +/* Precondition: Attached or Detaching, but caller must have just set + * at least one of detaching or failed. + * Results: Detaching or Attached Failed */ +{ int r; + assert(libxl__ev_child_inuse(&ss->mid)); libxl__ev_time_deregister(gc, &ss->timeout); libxl__ev_xswatch_deregister(gc, &ss->xswatch); - libxl__spawn_state_detachable *ssd = ss->ssd; - if (ssd) { - if (libxl__ev_child_inuse(&ssd->mid)) { - pid_t child = ssd->mid.pid; - r = kill(child, SIGKILL); - if (r && errno != ESRCH) - LOGE(WARN, "%s: failed to kill intermediate child (pid=%lu)", - ss->what, (unsigned long)child); - } + pid_t child = ss->mid.pid; + r = kill(child, SIGKILL); + if (r && errno != ESRCH) + LOGE(WARN, "%s: failed to kill intermediate child (pid=%lu)", + ss->what, (unsigned long)child); +} - /* disconnect the ss and ssd from each other */ - ssd->ss = 0; - ss->ssd = 0; - } +void libxl__spawn_initiate_detach(libxl__gc *gc, libxl__spawn_state *ss) +{ + ss->detaching = 1; + spawn_detach(gc, ss); } -static void spawn_failed(libxl__egc *egc, libxl__spawn_state *ss) +static void spawn_fail(libxl__egc *egc, libxl__spawn_state *ss) +/* Caller must have logged. Must be last thing in calling function, + * as it may make the callback. Precondition: Attached or Detaching. */ { EGC_GC; - spawn_cleanup(gc, ss); - ss->failure_cb(egc, ss); /* must be last; callback may do anything to ss */ + ss->failed = 1; + spawn_detach(gc, ss); } static void spawn_timeout(libxl__egc *egc, libxl__ev_time *ev, const struct timeval *requested_abs) { - /* Before event, was Active; is now Partial. */ + /* Before event, was Attached. */ EGC_GC; libxl__spawn_state *ss = CONTAINER_OF(ev, *ss, timeout); LOG(ERROR, "%s: startup timed out", ss->what); - spawn_failed(egc, ss); /* must be last */ + spawn_fail(egc, ss); /* must be last */ } static void spawn_watch_event(libxl__egc *egc, libxl__ev_xswatch *xsw, const char *watch_path, const char *event_path) { - /* On entry, is Active. */ + /* On entry, is Attached. */ EGC_GC; libxl__spawn_state *ss = CONTAINER_OF(xsw, *ss, xswatch); char *p = libxl__xs_read(gc, 0, ss->xspath); if (!p && errno != ENOENT) { LOG(ERROR, "%s: xenstore read of %s failed", ss->what, ss->xspath); - spawn_failed(egc, ss); /* must be last */ + spawn_fail(egc, ss); /* must be last */ return; } ss->confirm_cb(egc, ss, p); /* must be last */ @@ -399,20 +414,22 @@ static void spawn_watch_event(libxl__egc *egc, libxl__ev_xswatch *xsw, static void spawn_middle_death(libxl__egc *egc, libxl__ev_child *childw, pid_t pid, int status) - /* Before event, was Active or Detached; - * is now Active or Detached except that ssd->mid is Idle */ + /* On entry, is Attached or Detaching */ { EGC_GC; - libxl__spawn_state_detachable *ssd = CONTAINER_OF(childw, *ssd, mid); - libxl__spawn_state *ss = ssd->ss; - - if (!WIFEXITED(status)) { + libxl__spawn_state *ss = CONTAINER_OF(childw, *ss, mid); + + if ((ss->failed || ss->detaching) && + ((WIFEXITED(status) && WEXITSTATUS(status)==0) || + (WIFSIGNALED(status) && WTERMSIG(status)==SIGKILL))) { + /* as expected */ + } else if (!WIFEXITED(status)) { + int loglevel = ss->detaching ? XTL_WARN : XTL_ERROR; const char *what = - GCSPRINTF("%s intermediate process (startup monitor)", - ss ? ss->what : "(detached)"); - int loglevel = ss ? XTL_ERROR : XTL_WARN; + GCSPRINTF("%s intermediate process (startup monitor)", ss->what); libxl_report_child_exitstatus(CTX, loglevel, what, pid, status); - } else if (ss) { /* otherwise it was supposed to be a daemon by now */ + ss->failed = 1; + } else { if (!status) LOG(ERROR, "%s [%ld]: unexpectedly exited with exit status 0," " when we were waiting for it to confirm startup", @@ -430,15 +447,22 @@ static void spawn_middle_death(libxl__egc *egc, libxl__ev_child *childw, LOG(ERROR, "%s [%ld]: died during startup due to unknown fatal" " signal number %d", ss->what, (unsigned long)pid, sig); } - ss->ssd = 0; /* detatch the ssd to make the ss be in state Partial */ - spawn_failed(egc, ss); /* must be last use of ss */ + ss->failed = 1; } - free(ssd); -} -void libxl__spawn_detach(libxl__gc *gc, libxl__spawn_state *ss) -{ spawn_cleanup(gc, ss); + + if (ss->failed && !ss->detaching) { + ss->failure_cb(egc, ss); /* must be last */ + return; + } + + if (ss->failed && ss->detaching) + LOG(WARN,"%s underlying machinery seemed to fail," + " but its function seems to have been successful", ss->what); + + assert(ss->detaching); + ss->detached_cb(egc, ss); } /* diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 85f4bc6..9df0db5 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -690,8 +690,7 @@ static inline int libxl__ev_xswatch_isregistered(const libxl__ev_xswatch *xw) * the libxl event machinery. * * The parent may signal the child but it must not reap it. That will - * be done by the event machinery. death may be NULL in which case - * the child is still reaped but its death is ignored. + * be done by the event machinery. * * It is not possible to "deregister" the child death event source. * It will generate exactly one event callback; until then the childw @@ -998,8 +997,8 @@ _hidden int libxl__device_pci_destroy_all(libxl__gc *gc, uint32_t domid); * * Higher-level double-fork and separate detach eg as for device models * - * Each libxl__spawn_state is in one of the conventional states - * Undefined, Idle, Active + * Each libxl__spawn_state is in one of these states + * Undefined, Idle, Attached, Detaching */ typedef struct libxl__obsolete_spawn_starting libxl__spawn_starting; @@ -1040,15 +1039,15 @@ _hidden void libxl__spawn_init(libxl__spawn_state*); * intermediate or final child; an error message will have been * logged. * - * confirm_cb and failure_cb will not be called reentrantly from - * within libxl__spawn_spawn. + * confirm_cb, failure_cb and detached_cb will not be called + * reentrantly from within libxl__spawn_spawn. * * what: string describing the spawned process, used for logging * * Logs errors. A copy of "what" is taken. * Return values: * < 0 error, *spawn is now Idle and need not be detached - * +1 caller is the parent, *spawn is Active and must eventually be detached + * +1 caller is the parent, *spawn is Attached and must be detached * 0 caller is now the inner child, should probably call libxl__exec * * The spawn state must be Undefined or Idle on entry. @@ -1056,12 +1055,15 @@ _hidden void libxl__spawn_init(libxl__spawn_state*); _hidden int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *spawn); /* - * libxl__spawn_detach - Detaches the daemonic child. + * libxl__spawn_request_detach - Detaches the daemonic child. * * Works by killing the intermediate process from spawn_spawn. * After this function returns, failures of either child are no * longer reported via failure_cb. * + * This is not synchronous: there will be a further callback when + * the detach is complete. + * * If called before the inner child has been created, this may prevent * it from running at all. Thus this should be called only when the * inner child has notified that it is ready. Normally it will be @@ -1069,10 +1071,10 @@ _hidden int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *spawn); * * Logs errors. * - * The spawn state must be Active or Idle on entry and will be Idle + * The spawn state must be Attached entry and will be Detaching * on return. */ -_hidden void libxl__spawn_detach(libxl__gc *gc, libxl__spawn_state*); +_hidden void libxl__spawn_initiate_detach(libxl__gc *gc, libxl__spawn_state*); /* * If successful, this should return 0. @@ -1109,15 +1111,11 @@ typedef void libxl__spawn_failure_cb(libxl__egc*, libxl__spawn_state*); typedef void libxl__spawn_confirm_cb(libxl__egc*, libxl__spawn_state*, const char *xsdata); -typedef struct { - /* Private to the spawn implementation. - */ - /* This separate struct, from malloc, allows us to "detach" - * the child and reap it later, when our user has gone - * away and freed its libxl__spawn_state */ - struct libxl__spawn_state *ss; - libxl__ev_child mid; -} libxl__spawn_state_detachable; +/* + * Called when the detach (requested by libxl__spawn_initiate_detach) has + * completed. On entry to the callback the spawn state is Idle. + */ +typedef void libxl__spawn_detached_cb(libxl__egc*, libxl__spawn_state*); struct libxl__spawn_state { /* must be filled in by user and remain valid */ @@ -1129,15 +1127,18 @@ struct libxl__spawn_state { libxl__spawn_midproc_cb *midproc_cb; libxl__spawn_failure_cb *failure_cb; libxl__spawn_confirm_cb *confirm_cb; + libxl__spawn_detached_cb *detached_cb; /* remaining fields are private to libxl_spawn_... */ + int detaching; /* we are in Detaching */ + int failed; /* might be true whenever we are not Idle */ + libxl__ev_child mid; /* always in use whenever we are not Idle */ libxl__ev_time timeout; libxl__ev_xswatch xswatch; - libxl__spawn_state_detachable *ssd; }; static inline int libxl__spawn_inuse(libxl__spawn_state *ss) - { return !!ss->ssd; } + { return libxl__ev_child_inuse(&ss->mid); } /* * libxl_spawner_record_pid - Record given pid in xenstore -- 1.7.2.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |