|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 6/7] XendCheckpoint: implement colo
In colo mode, XendCheckpoit.py will communicate with both master and
xc_restore. This patch implements this communication. In colo mode,
the signature is "GuestColoRestore".
Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
tools/python/xen/xend/XendCheckpoint.py | 138 +++++++++++++++++++++++---------
1 file changed, 101 insertions(+), 37 deletions(-)
diff --git a/tools/python/xen/xend/XendCheckpoint.py
b/tools/python/xen/xend/XendCheckpoint.py
index fa09757..261d9d1 100644
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -25,6 +25,7 @@ from xen.xend.XendConstants import *
from xen.xend import XendNode
SIGNATURE = "LinuxGuestRecord"
+COLO_SIGNATURE = "GuestColoRestore"
QEMU_SIGNATURE = "QemuDeviceModelRecord"
dm_batch = 512
XC_SAVE = "xc_save"
@@ -203,10 +204,15 @@ def restore(xd, fd, dominfo = None, paused = False,
relocating = False):
signature = read_exact(fd, len(SIGNATURE),
"not a valid guest state file: signature read")
- if signature != SIGNATURE:
+ if signature != SIGNATURE and signature != COLO_SIGNATURE:
raise XendError("not a valid guest state file: found '%s'" %
signature)
+ if signature == COLO_SIGNATURE:
+ colo = True
+ else
+ colo = False
+
l = read_exact(fd, sizeof_int,
"not a valid guest state file: config size read")
vmconfig_size = unpack("!i", l)[0]
@@ -305,6 +311,7 @@ def restore(xd, fd, dominfo = None, paused = False,
relocating = False):
log.debug("[xc_restore]: %s", string.join(cmd))
handler = RestoreInputHandler()
+ restore_handler = RestoreHandler(fd, colo, dominfo, inputHandler)
forkHelper(cmd, fd, handler.handler, True)
@@ -321,35 +328,9 @@ def restore(xd, fd, dominfo = None, paused = False,
relocating = False):
raise XendError('Could not read store MFN')
if not is_hvm and handler.console_mfn is None:
- raise XendError('Could not read console MFN')
-
- restore_image.setCpuid()
-
- # xc_restore will wait for source to close connection
-
- dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
-
- #
- # We shouldn't hold the domains_lock over a waitForDevices
- # As this function sometime gets called holding this lock,
- # we must release it and re-acquire it appropriately
- #
- from xen.xend import XendDomain
+ raise XendError('Could not read console MFN')
- lock = True;
- try:
- XendDomain.instance().domains_lock.release()
- except:
- lock = False;
-
- try:
- dominfo.waitForDevices() # Wait for backends to set up
- finally:
- if lock:
- XendDomain.instance().domains_lock.acquire()
-
- if not paused:
- dominfo.unpause()
+ restorehandler.resume(True, paused, None)
return dominfo
except Exception, exn:
@@ -358,23 +339,106 @@ def restore(xd, fd, dominfo = None, paused = False,
relocating = False):
raise exn
+class RestoreHandler:
+ def __init__(self, fd, colo, dominfo, inputHandler):
+ self.fd = fd
+ self.colo = colo
+ self.firsttime = True
+ self.inputHandler = inputHandler
+ self.dominfo = dominfo
+
+ def resume(self, finish, paused, child):
+ fd = self.fd
+ dominfo = self.dominfo
+ handler = self.inputHandler
+ restore_image.setCpuid()
+ dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
+
+ if self.colo and not finish:
+ # notify master that checkpoint finishes
+ write_exact(fd, "finish", "failed to write finish done")
+ buf = read_exact(fd, 6, "failed to read resume flag")
+ if buf != "resume":
+ return False
+
+ from xen.xend import XendDomain
+
+ if self.firsttime:
+ lock = True;
+ try:
+ XendDomain.instance().domains_lock.release()
+ except:
+ lock = False;
+
+ try:
+ dominfo.waitForDevices() # Wait for backends to set up
+ finally:
+ if lock:
+ XendDomain.instance().domains_lock.acquire()
+ if not paused:
+ dominfo.unpause()
+ else:
+ # colo
+ xc.domain_resume(dominfo.domid, 0)
+ ResumeDomain(dominfo.domid)
+
+ if self.colo and not finish:
+ child.tochild.write("resume\n")
+ child.tochild.flush()
+ buf = child.fromchild.readline()
+ if buf != "resume\n":
+ return False
+ if self.firsttime:
+ util.runcmd("/etc/xen/scripts/HA_fw_runtime.sh slaver")
+ # notify master side VM resumed
+ write_exact(fd, "resume", "failed to write resume done");
+
+ # wait new checkpoint
+ buf = read_exact(fd, 8, "failed to read continue flag")
+ if buf != "continue":
+ return False
+
+ child.tochild.write("suspend\n")
+ buf = child.fromchild.readline()
+ if buf != "suspend\n":
+ return False
+
+ # notify master side suspend done.
+ write_exact(fd, "suspend", "failed to write suspend done")
+ buf = read_exact(fd, 5, "failed to read start flag")
+ if buf != "start":
+ return False
+
+ child.tochild.write("start\n")
+ child.tochild.flush()
+
+ self.firsttime = False
+
class RestoreInputHandler:
- def __init__(self):
+ def __init__(self, colo):
self.store_mfn = None
self.console_mfn = None
- def handler(self, line, _):
+ def handler(self, line, child, restorehandler):
+ if line == "finish\n":
+ # colo
+ return restorehandler.resume(False, False, child)
+
m = re.match(r"^(store-mfn) (\d+)$", line)
if m:
self.store_mfn = int(m.group(2))
- else:
- m = re.match(r"^(console-mfn) (\d+)$", line)
- if m:
- self.console_mfn = int(m.group(2))
+ return True
+
+ m = re.match(r"^(console-mfn) (\d+)$", line)
+ if m:
+ self.console_mfn = int(m.group(2))
+ return True
+
+ return False
-def forkHelper(cmd, fd, inputHandler, closeToChild):
+def forkHelper(cmd, fd, inputHandler, closeToChild, restorehandler):
child = xPopen3(cmd, True, -1, [fd])
if closeToChild:
@@ -392,7 +456,7 @@ def forkHelper(cmd, fd, inputHandler, closeToChild):
else:
line = line.rstrip()
log.debug('%s', line)
- inputHandler(line, child.tochild)
+ inputHandler(line, child, restorehandler)
except IOError, exn:
raise XendError('Error reading from child process for %s: %s' %
--
1.8.0
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |