[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] Domain core-dumping fixes
Hi John, Please don't reset a domain after unpaused the domain. Could you move the reset into xend? e.g. try: log.info("Domain core dump requested for domain %s (%d) " "live=%d crash=%d.", dominfo.getName(), dominfo.getDomid(), live, crash) dominfo.dumpCore(filename) if crash: self.domain_destroy(domid) + elif reset: + self.domain_reset(domid) Best regards, Kan Wed, 11 Mar 2009 18:45:49 -0700, John Levon wrote: ># HG changeset patch ># User John Levon <john.levon@xxxxxxx> ># Date 1236822336 25200 ># Node ID 88b3a560b0fb2a5adca969d9b192220d64dfd105 ># Parent e92a56f3581975496d5d9f250823e46493e58548 >Domain core-dumping fixes > >The code was attempting to use the domain's current number of pages >(info.nr_pages) as a maximum index. We then walk the memory map and can >easily over-write past the end of the nr_pages-sized array, if the >domain has more pages mapped in than earlier (live dump). Restrict >ourselves to the current number of pages. > >Also fix the dump core method in xend to actually implement the crash >and live options. In particular this means that xend clients other than >xm now get non-live dumps by default. > >Signed-off-by: John Levon <john.levon@xxxxxxx> > >diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c >--- a/tools/libxc/xc_core.c >+++ b/tools/libxc/xc_core.c >@@ -518,7 +518,17 @@ xc_domain_dumpcore_via_callback(int xc_h > if ( sts != 0 ) > goto out; > >+ /* >+ * Note: this is the *current* number of pages and may change under >+ * a live dump-core. We'll just take this value, and if more pages >+ * exist, we'll skip them. If there's less, then we'll just not use >+ * all the array... >+ * >+ * We don't want to use the total potential size of the memory map >+ * since that is usually much higher than info.nr_pages. >+ */ > nr_pages = info.nr_pages; >+ > if ( !auto_translated_physmap ) > { > /* obtain p2m table */ >@@ -770,7 +780,7 @@ xc_domain_dumpcore_via_callback(int xc_h > > pfn_start = memory_map[map_idx].addr >> PAGE_SHIFT; > pfn_end = pfn_start + (memory_map[map_idx].size >> PAGE_SHIFT); >- for ( i = pfn_start; i < pfn_end; i++ ) >+ for ( i = pfn_start; i < pfn_end && j < nr_pages; i++ ) > { > uint64_t gmfn; > void *vaddr; >diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h >--- a/tools/libxc/xenctrl.h >+++ b/tools/libxc/xenctrl.h >@@ -158,7 +158,7 @@ typedef struct xc_dominfo { > paused:1, blocked:1, running:1, > hvm:1, debugged:1; > unsigned int shutdown_reason; /* only meaningful if shutdown==1 */ >- unsigned long nr_pages; >+ unsigned long nr_pages; /* current number, not maximum */ > unsigned long shared_info_frame; > uint64_t cpu_time; > unsigned long max_memkb; >diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/ >XendDomain.py >--- a/tools/python/xen/xend/XendDomain.py >+++ b/tools/python/xen/xend/XendDomain.py >@@ -1252,13 +1252,23 @@ class XendDomain: > POWER_STATE_NAMES[DOM_STATE_PAUSED], > POWER_STATE_NAMES[dominfo._stateGet()]) > >- try: >- log.info("Domain core dump requested for domain %s (%d) " >- "live=%d crash=%d.", >- dominfo.getName(), dominfo.getDomid(), live, crash) >- return dominfo.dumpCore(filename) >- except Exception, ex: >- raise XendError(str(ex)) >+ dopause = (not live and dominfo._stateGet() == DOM_STATE_RUNNING) >+ if dopause: >+ dominfo.pause() >+ >+ try: >+ try: >+ log.info("Domain core dump requested for domain %s (%d) " >+ "live=%d crash=%d.", >+ dominfo.getName(), dominfo.getDomid(), live, crash) >+ dominfo.dumpCore(filename) >+ if crash: >+ self.domain_destroy(domid) >+ except Exception, ex: >+ raise XendError(str(ex)) >+ finally: >+ if dopause and not crash: >+ dominfo.unpause() > > def domain_destroy(self, domid): > """Terminate domain immediately. >diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/ >xend/XendDomainInfo.py >--- a/tools/python/xen/xend/XendDomainInfo.py >+++ b/tools/python/xen/xend/XendDomainInfo.py >@@ -2036,26 +2036,31 @@ class XendDomainInfo: > @raise: XendError if core dumping failed. > """ > >+ if not corefile: >+ this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime()) >+ corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time, >+ self.info['name_label'], self.domid) >+ >+ if os.path.isdir(corefile): >+ raise XendError("Cannot dump core in a directory: %s" % >+ corefile) >+ > try: >- if not corefile: >- this_time = time.strftime("%Y-%m%d-%H%M.%S", time. >localtime()) >- corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time, >- self.info['name_label'], self.domid) >- >- if os.path.isdir(corefile): >- raise XendError("Cannot dump core in a directory: %s" % >- corefile) >- >- self._writeVm(DUMPCORE_IN_PROGRESS, 'True') >- xc.domain_dumpcore(self.domid, corefile) >+ try: >+ self._writeVm(DUMPCORE_IN_PROGRESS, 'True') >+ xc.domain_dumpcore(self.domid, corefile) >+ except RuntimeError, ex: >+ corefile_incomp = corefile+'-incomplete' >+ try: >+ os.rename(corefile, corefile_incomp) >+ except: >+ pass >+ >+ log.error("core dump failed: id = %s name = %s: %s", >+ self.domid, self.info['name_label'], str(ex)) >+ raise XendError("Failed to dump core: %s" % str(ex)) >+ finally: > self._removeVm(DUMPCORE_IN_PROGRESS) >- except RuntimeError, ex: >- corefile_incomp = corefile+'-incomplete' >- os.rename(corefile, corefile_incomp) >- self._removeVm(DUMPCORE_IN_PROGRESS) >- log.exception("XendDomainInfo.dumpCore failed: id = %s name = >%s", >- self.domid, self.info['name_label']) >- raise XendError("Failed to dump core: %s" % str(ex)) > > # > # Device creation/deletion functions >diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py >--- a/tools/python/xen/xm/main.py >+++ b/tools/python/xen/xm/main.py >@@ -1351,22 +1351,10 @@ def xm_dump_core(args): > else: > filename = None > >- if not live: >- ds = server.xend.domain.pause(dom, True) >- >- try: >- print "Dumping core of domain: %s ..." % str(dom) >- server.xend.domain.dump(dom, filename, live, crash) >- >- if crash: >- print "Destroying domain: %s ..." % str(dom) >- server.xend.domain.destroy(dom) >- elif reset: >- print "Resetting domain: %s ..." % str(dom) >- server.xend.domain.reset(dom) >- finally: >- if not live and not crash and not reset and ds == DOM_STATE_RUNNING: >- server.xend.domain.unpause(dom) >+ print "Dumping core of domain: %s ..." % str(dom) >+ server.xend.domain.dump(dom, filename, live, crash) >+ if reset: >+ server.xend.domain.reset(dom) > > def xm_rename(args): > arg_check(args, "rename", 2) > >_______________________________________________ >Xen-devel mailing list >Xen-devel@xxxxxxxxxxxxxxxxxxx >http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |