[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] Fix xend xenstore handling
On Fri, Dec 21, 2007 at 03:08:03PM +0000, Daniel P. Berrange wrote: > > This is the cause of the "reboot loop" xend failures I reported earlier. > > Note that I've only tested this patch against 3.1, however the code, > > and the fix, is the same in unstable. I realise it's late in the cycle > > but this bug is bad enough to need fixing IMHO. > > I think the entire code segment from the point at which xstransact() is > created needs to be in a try...finally block to be safe against the > code throwing exceptions, otherwise you could very ocassionally get the > final xs.abort() being missed in error conditions. OK, how about this one? (I'm starting testing it now) regards john # HG changeset patch # User john.levon@xxxxxxx # Date 1198250774 28800 # Node ID c847f62cbad09ccbf0ba63ee30204228c16d60fb # Parent 5092403708afc964a908cf1a193f3fc68fb4b950 Fix xend xenstore handling. xend can get into a situation where two processes are attempting to interact with the xenstore socket, with disastrous results. Fix the two bad users of xstransact, add a big warning, and fix the destructor so future mistakes will be detected earlier. Signed-off-by: John Levon <john.levon@xxxxxxx> diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1529,18 +1529,19 @@ class XendDomainInfo: log.debug("Releasing devices") t = xstransact("%s/device" % self.dompath) - for devclass in XendDevices.valid_devices(): - for dev in t.list(devclass): - try: - log.debug("Removing %s", dev); - self.destroyDevice(devclass, dev, False); - except: - # Log and swallow any exceptions in removal -- - # there's nothing more we can do. + try: + for devclass in XendDevices.valid_devices(): + for dev in t.list(devclass): + try: + log.debug("Removing %s", dev); + self.destroyDevice(devclass, dev, False); + except: + # Log and swallow any exceptions in removal -- + # there's nothing more we can do. log.exception("Device release failed: %s; %s; %s", self.info['name_label'], devclass, dev) - - + finally: + t.abort() def getDeviceController(self, name): """Get the device controller for this domain, and if it @@ -1852,16 +1853,18 @@ class XendDomainInfo: # build list of phantom devices to be removed after normal devices plist = [] if self.domid is not None: - from xen.xend.xenstore.xstransact import xstransact t = xstransact("%s/device/vbd" % GetDomainPath(self.domid)) - for dev in t.list(): - backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ - % (self.dompath, dev)) - if backend_phantom_vbd is not None: - frontend_phantom_vbd = xstransact.Read("%s/frontend" \ - % backend_phantom_vbd) - plist.append(backend_phantom_vbd) - plist.append(frontend_phantom_vbd) + try: + for dev in t.list(): + backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ + % (self.dompath, dev)) + if backend_phantom_vbd is not None: + frontend_phantom_vbd = xstransact.Read("%s/frontend" \ + % backend_phantom_vbd) + plist.append(backend_phantom_vbd) + plist.append(frontend_phantom_vbd) + finally: + t.abort() return plist def _cleanup_phantom_devs(self, plist): diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -22,8 +22,6 @@ from xen.xend import sxp from xen.xend import sxp from xen.xend.XendError import VmError from xen.xend.XendLogging import log - -from xen.xend.xenstore.xstransact import xstransact from xen.xend.server.DevController import DevController diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py +++ b/tools/python/xen/xend/xenstore/xstransact.py @@ -7,8 +7,16 @@ from xen.xend.xenstore.xsutil import xshandle +class xstransact: + """WARNING: Be very careful if you're instantiating an xstransact object + yourself (i.e. not using the capitalized static helpers like .Read(). + It is essential that you clean up the object in place via + t.commit/abort(): GC can happen at any time, including contexts where + it's not safe to to use the shared xenstore socket fd. In particular, + if xend forks, and GC occurs, we can have two processes trying to + use the same xenstore fd, and all hell breaks loose. + """ -class xstransact: def __init__(self, path = ""): @@ -22,8 +30,9 @@ class xstransact: self.in_transaction = True def __del__(self): + # see above. if self.in_transaction: - xshandle().transaction_end(self.transaction, True) + raise RuntimeError("ERROR: GC of live transaction") def commit(self): if not self.in_transaction: _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |