[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH][RFC] problem with rename-restart behavior in xend
While fixing the /vm/<uuid> leak discussed in this thread http://lists.xensource.com/archives/html/xen-devel/2008-01/msg00297.html I found that 'rename-restart' option in guest config file was not working properly. On a 3.2 system with 'on_crash=rename-restart' in guest config, after crashing the guest I observed jfehlig4: # xm li Name ID Mem VCPUs State Time(s) Domain-0 0 1233 4 r----- 962.6 Domain-8159ab1d-bb0d-6853-4264-5f129efdd4af 23 384 1 ----c- 14.1 Notice the domain did not restart due to _stateGet returning XEN_API_VM_POWER_STATE_RUNNING. From xend.log [2008-01-23 15:07:10 27490] ERROR (XendDomainInfo:111) Domain construction failed Traceback (most recent call last): File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", line 109, in create_from_dict vm.start() File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", line 444, in start raise XendError('VM already running') XendError: VM already running Patch xend_crashed_ps.diff fixes that problem but I wanted to get some comments on the approach. Essentially it introduces a power state of crashed. At first I wasn't too keen on the idea, but after some though I think it is analogous the the paused power state, i.e. the domain has some resources assigned to it but it is no longer executing instructions. This patch is not complete but I wanted to see how folks felt about it before proceeding. The patch needs to include corresponding changes to XenAPI C bindings, documentation, etc. to be complete. Now, after getting past this problem I noticed that the crashed domain was restarted but with same name :-(. E.g. jfehlig4: # xm li Name ID Mem VCPUs State Time(s) Domain-0 0 1233 4 r----- 937.9 Domain-e64b12a0-0493-44d7-afde-55c776513426 21 384 1 ----c- 14.3 Domain-e64b12a0-0493-44d7-afde-55c776513426 22 384 1 r----- 7.3 Patch xend_rename-restart.diff fixes this problem and is rather straight-forward I believe. With both patches the expected behavior is observed: jfehlig4: # xm li Name ID Mem VCPUs State Time(s) Domain-0 0 1233 4 r----- 1002.4 Domain-84e3e790-83bc-d6de-6638-5321916adaee 24 384 1 ----c- 13.9 sles10_64 25 384 1 -b---- 17.0 If there are no objections the the crashed power state notion I will compete the patch and resubmit both. BTW, I'm making progress on the xenstore leak but all of these other bugs keep getting in the way :-). Cheers, Jim diff -r 1c826ea72a80 tools/python/xen/xend/XendAPIConstants.py --- a/tools/python/xen/xend/XendAPIConstants.py Wed Jan 23 15:42:52 2008 +0000 +++ b/tools/python/xen/xend/XendAPIConstants.py Wed Jan 23 15:34:30 2008 -0700 @@ -25,6 +25,7 @@ XEN_API_VM_POWER_STATE = [ 'Running', 'Suspended', 'Halted', + 'Crashed', 'Unknown' ] @@ -33,7 +34,8 @@ XEN_API_VM_POWER_STATE_RUNNING = 2 XEN_API_VM_POWER_STATE_RUNNING = 2 XEN_API_VM_POWER_STATE_SUSPENDED = 3 XEN_API_VM_POWER_STATE_SHUTTINGDOWN = 4 -XEN_API_VM_POWER_STATE_UNKNOWN = 5 +XEN_API_VM_POWER_STATE_CRASHED = 5 +XEN_API_VM_POWER_STATE_UNKNOWN = 6 XEN_API_ON_NORMAL_EXIT = [ 'destroy', diff -r 1c826ea72a80 tools/python/xen/xend/XendConstants.py --- a/tools/python/xen/xend/XendConstants.py Wed Jan 23 15:42:52 2008 +0000 +++ b/tools/python/xen/xend/XendConstants.py Wed Jan 23 15:33:35 2008 -0700 @@ -61,6 +61,7 @@ DOM_STATES = [ 'running', 'suspended', 'shutdown', + 'crashed', 'unknown', ] @@ -69,6 +70,7 @@ DOM_STATE_RUNNING = XEN_API_VM_POWER_STA DOM_STATE_RUNNING = XEN_API_VM_POWER_STATE_RUNNING DOM_STATE_SUSPENDED = XEN_API_VM_POWER_STATE_SUSPENDED DOM_STATE_SHUTDOWN = XEN_API_VM_POWER_STATE_SHUTTINGDOWN +DOM_STATE_CRASHED = XEN_API_VM_POWER_STATE_CRASHED DOM_STATE_UNKNOWN = XEN_API_VM_POWER_STATE_UNKNOWN DOM_STATES_OLD = [ diff -r 1c826ea72a80 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Wed Jan 23 15:42:52 2008 +0000 +++ b/tools/python/xen/xend/XendDomain.py Wed Jan 23 15:37:51 2008 -0700 @@ -43,6 +43,7 @@ from xen.xend.XendConstants import DOM_S from xen.xend.XendConstants import DOM_STATE_HALTED, DOM_STATE_PAUSED from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN +from xen.xend.XendConstants import DOM_STATE_CRASHED from xen.xend.XendConstants import TRIGGER_TYPE from xen.xend.XendDevices import XendDevices from xen.xend.XendAPIConstants import * @@ -69,6 +70,7 @@ POWER_STATE_NAMES = dict([(x, XEN_API_VM DOM_STATE_RUNNING, DOM_STATE_SUSPENDED, DOM_STATE_SHUTDOWN, + DOM_STATE_CRASHED, DOM_STATE_UNKNOWN]]) POWER_STATE_ALL = 'all' @@ -1191,7 +1193,7 @@ class XendDomain: if dominfo.getDomid() == DOM0_ID: raise XendError("Cannot pause privileged domain %s" % domid) ds = dominfo._stateGet() - if ds not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + if ds not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED, DOM_STATE_CRASHED): raise VMBadState("Domain '%s' is not started" % domid, POWER_STATE_NAMES[DOM_STATE_RUNNING], POWER_STATE_NAMES[ds]) @@ -1216,7 +1218,7 @@ class XendDomain: if dominfo.getDomid() == DOM0_ID: raise XendError("Cannot dump core for privileged domain %s" % domid) - if dominfo._stateGet() not in (DOM_STATE_PAUSED, DOM_STATE_RUNNING): + if dominfo._stateGet() not in (DOM_STATE_PAUSED, DOM_STATE_RUNNING, DOM_STATE_CRASHED): raise VMBadState("Domain '%s' is not started" % domid, POWER_STATE_NAMES[DOM_STATE_PAUSED], POWER_STATE_NAMES[dominfo._stateGet()]) diff -r 1c826ea72a80 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Jan 23 15:42:52 2008 +0000 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jan 23 15:38:24 2008 -0700 @@ -414,7 +414,7 @@ class XendDomainInfo: """ from xen.xend import XendDomain - if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED, XEN_API_VM_POWER_STATE_SUSPENDED): + if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED, XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED): try: XendTask.log_progress(0, 30, self._constructDomain) XendTask.log_progress(31, 60, self._initDomain) @@ -648,7 +648,7 @@ class XendDomainInfo: return rc def getDeviceSxprs(self, deviceClass): - if self._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + if self._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED, DOM_STATE_CRASHED): return self.getDeviceController(deviceClass).sxprs() else: sxprs = [] @@ -2268,6 +2268,9 @@ class XendDomainInfo: return XEN_API_VM_POWER_STATE_SUSPENDED else: return XEN_API_VM_POWER_STATE_HALTED + elif info['crashed']: + # Crashed + return XEN_API_VM_POWER_STATE_CRASHED else: # We are either RUNNING or PAUSED if info['paused']: diff -r 6050851a599f tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Jan 23 15:39:26 2008 -0700 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jan 23 15:41:12 2008 -0700 @@ -1393,9 +1393,10 @@ class XendDomainInfo: self._writeVm('xend/previous_restart_time', str(now)) + new_dom_info = self.info try: if rename: - self._preserveForRestart() + new_dom_info = self._preserveForRestart() else: self._unwatchVm() self.destroyDomain() @@ -1409,7 +1410,7 @@ class XendDomainInfo: new_dom = None try: new_dom = XendDomain.instance().domain_create_from_dict( - self.info) + new_dom_info) new_dom.waitForDevices() new_dom.unpause() rst_cnt = self._readVm('xend/restart_count') @@ -1440,11 +1441,15 @@ class XendDomainInfo: new_name, new_uuid) self._unwatchVm() self._releaseDevices() + new_dom_info = self.info.copy() + new_dom_info['name_label'] = self.info['name_label'] + new_dom_info['uuid'] = self.info['uuid'] self.info['name_label'] = new_name self.info['uuid'] = new_uuid self.vmpath = XS_VMROOT + new_uuid self._storeVmDetails() self._preserve() + return new_dom_info def _preserve(self): _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |