[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC][PATCH 05/13] Kemari: Kemari sender
This is an updated version of the following patch. Followed the changes in live migration code. http://lists.xensource.com/archives/html/xen-devel/2009-03/msg00374.html Signed-off-by: Yoshisato Yanagisawa <yanagisawa.yoshisato@xxxxxxxxxxxxx> Signed-off-by: Yoshi Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> --- tools/libxc/xc_dom_kemari_save.c | 1139 +++++++++++++++++++++++++++++++++++++++ tools/xcutils/xc_kemari_save.c | 518 +++++++++++++++++ 2 files changed, 1657 insertions(+) diff -r b249f3e979a5 -r cf6a910e3663 tools/xcutils/xc_kemari_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xcutils/xc_kemari_save.c Wed Mar 11 18:03:47 2009 +0900 @@ -0,0 +1,518 @@ +/* + * xc_kemari_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2008 Nippon Telegraph and Telephone Corporation. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of + * this archive for more details. + * + * This source code is based on xc_save.c. + * Copied qemu_destroy_buffer and init_qemu_maps from xc_save.c. + * + * Copyright (C) 2005 by Christian Limpach + * + */ + + +#include <err.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <xs.h> +#include <xenctrl.h> +#include <xenguest.h> +#include <xc_private.h> +#include <xen/kemari.h> + +static volatile sig_atomic_t run = 1; +static int xc_handle, xce_handle, io_fd; +static struct kemari_ring *ring = NULL; +static uint32_t kemari_ring_size = 0; +static pid_t qemu_pid; +static int is_finalized = 0; +static int domid; + +/* For HVM guests, there are two sources of dirty pages: the Xen shadow + * log-dirty bitmap, which we get with a hypercall, and qemu's version. + * The protocol for getting page-dirtying data from qemu uses a + * double-buffered shared memory interface directly between xc_save and + * qemu-dm. + * + * xc_save calculates the size of the bitmaps and notifies qemu-dm + * through the store that it wants to share the bitmaps. qemu-dm then + * starts filling in the 'active' buffer. + * + * To change the buffers over, xc_save writes the other buffer number to + * the store and waits for qemu to acknowledge that it is now writing to + * the new active buffer. xc_save can then process and clear the old + * active buffer. */ + +static char *qemu_active_path; +static char *qemu_next_active_path; +static int qemu_shmid = -1; +static struct xs_handle *xs; + + +/* Mark the shared-memory segment for destruction */ +static void qemu_destroy_buffer(void) +{ + if (qemu_shmid != -1) + shmctl(qemu_shmid, IPC_RMID, NULL); + qemu_shmid = -1; +} + +static char *kemari_qemu_info = NULL; +static void qemu_save_image(int next_active) +{ + kemari_qemu_info[0] = next_active; + kemari_qemu_info[1] = 0; + xen_wmb(); + kill(qemu_pid, SIGUSR1); +} + +static void qemu_end_flip(void) +{ + while (kemari_qemu_info[1] == 0) + xen_rmb(); +} + +static void qemu_end_save(void) +{ + while (kemari_qemu_info[2] == 0) + xen_rmb(); +} + +static void qemu_image_sent(void) +{ + /* after QEMU image sent */ + kemari_qemu_info[2] = 0; + xen_wmb(); +} + +static void *init_qemu_maps(int domid, unsigned int bitmap_size) +{ + key_t key; + char key_ascii[17] = {0,}; + void *seg; + char *path, *p; + + /* Make a shared-memory segment */ + do { + key = rand(); /* No security, just a sequence of numbers */ + qemu_shmid = shmget(key, 2 * bitmap_size + PAGE_SIZE, + IPC_CREAT|IPC_EXCL|S_IRUSR|S_IWUSR); + if (qemu_shmid == -1 && errno != EEXIST) + errx(1, "can't get shmem to talk to qemu-dm"); + } while (qemu_shmid == -1); + + /* Remember to tidy up after ourselves */ + atexit(qemu_destroy_buffer); + + /* Map it into our address space */ + seg = shmat(qemu_shmid, NULL, 0); + if (seg == (void *) -1) + errx(1, "can't map shmem to talk to qemu-dm"); + memset(seg, 0, 2 * bitmap_size + PAGE_SIZE); + + /* Write the size of it into the first 32 bits */ + *(uint32_t *)seg = bitmap_size; + + /* Tell qemu about it */ + if ((xs = xs_daemon_open()) == NULL) + errx(1, "Couldn't contact xenstore"); + if (!(path = strdup("/local/domain/0/device-model/"))) + errx(1, "can't get domain path in store"); + if (!(path = realloc(path, strlen(path) + + 10 + + strlen("/logdirty/next-active") + 1))) + errx(1, "no memory for constructing xenstore path"); + snprintf(path + strlen(path), 11, "%i", domid); + strcat(path, "/logdirty/"); + p = path + strlen(path); + + strcpy(p, "key"); + snprintf(key_ascii, 17, "%16.16llx", (unsigned long long) key); + if (!xs_write(xs, XBT_NULL, path, key_ascii, 16)) + errx(1, "can't write key (%s) to store path (%s)\n", key_ascii, path); + + /* Watch for qemu's indication of the active buffer, and request it + * to start writing to buffer 0 */ + strcpy(p, "active"); + if (!xs_watch(xs, path, "qemu-active-buffer")) + errx(1, "can't set watch in store (%s)\n", path); + if (!(qemu_active_path = strdup(path))) + errx(1, "no memory for copying xenstore path"); + + strcpy(p, "next-active"); + if (!(qemu_next_active_path = strdup(path))) + errx(1, "no memory for copying xenstore path"); + + kemari_qemu_info = seg + 2 * bitmap_size; + xen_wmb(); + qemu_save_image(0); + + free(path); + return seg; +} + +static void close_handler(int sig_type) +{ + run = 0; +} + +static int handle_event(int domid, unsigned int flags) +{ + int ret = 1, rcv_port; + + if ((rcv_port = xc_evtchn_pending(xce_handle)) < 0) { + ERROR("Failed to read from event fd"); + goto out; + } + + if (xc_kemari_update(xc_handle, io_fd, domid, ring, flags, + qemu_save_image, qemu_end_flip, qemu_end_save, qemu_image_sent) != 0) { + xc_domain_pause(xc_handle, domid); + kill(qemu_pid, SIGSTOP); + ERROR("xc_kemari_update failed"); + goto out; + } + + if (xc_evtchn_unmask(xce_handle, rcv_port) < 0) { + ERROR("Failed to write to event fd"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static void set_signal_handler(void (*handler)(int)) +{ + struct sigaction act; + + act.sa_handler = handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGQUIT, &act, 0); + sigaction(SIGINT, &act, 0); + sigaction(SIGHUP, &act, 0); + sigaction(SIGTERM, &act, 0); +} + +static int attach_ports(int domid) +{ + struct xs_handle *xs_handle; + char **list, *data; + unsigned int list_size, data_size; + char path[128]; + uint32_t port; + int i, ret = 1; + + if ((xs_handle = xs_daemon_open()) == NULL) + errx(1, "Couldn't contact xenstore"); + + /* + * attach block port. + */ + snprintf(path, sizeof(path), "/local/domain/%d/device/vbd", domid); + list = xs_directory(xs_handle, XBT_NULL, path, &list_size); + if (list == NULL) + errx(1, "xs_directory (%s) failed", path); + + for (i = 0; i < list_size; i++) { + snprintf(path, sizeof(path), + "/local/domain/%d/device/vbd/%s/event-channel", domid, list[i]); + data = xs_read(xs_handle, XBT_NULL, path, &data_size); + if (data == NULL) + continue; + port = strtoul(data, NULL, 10); + if ((ret = xc_kemari_control(xc_handle, domid, XEN_KEMARI_OP_attach, + &port, NULL, + NULL, KEMARI_TAP_OUT)) != 0) { + ERROR("Error when attaching blk_port (%d) on kemari", port); + goto out; + } + free(data); + DPRINTF("blk_port %d attached\n", port); + } + free(list); + + /* + * attach net port. + */ + snprintf(path, sizeof(path), "/local/domain/%d/device/vif", domid); + list = xs_directory(xs_handle, XBT_NULL, path, &list_size); + if (list == NULL) + errx(1, "xs_directory (%s) failed", path); + + for (i = 0; i < list_size; i++) { + snprintf(path, sizeof(path), + "/local/domain/%d/device/vif/%s/event-channel", domid, list[i]); + data = xs_read(xs_handle, XBT_NULL, path, &data_size); + if (data == NULL) + continue; + port = strtoul(data, NULL, 10); + if ((ret = xc_kemari_control(xc_handle, domid, XEN_KEMARI_OP_attach, + &port, NULL, + NULL, KEMARI_TAP_OUT)) != 0) { + ERROR("Error when attaching net_port (%d) on kemari", port); + goto out; + } + free(data); + DPRINTF("net_port %d attached\n", port); + } + free(list); + + /* attach success */ + ret = 0; + +out: + xs_daemon_close(xs_handle); + + return ret; +} + +static pid_t get_qemu_pid(int domid) +{ + struct xs_handle *xs_handle; + char path[128]; + char *data; + unsigned int data_size; + pid_t pid = 0; + + if ((xs_handle = xs_daemon_open()) == NULL) + errx(1, "Couldn't contact xenstore"); + + snprintf(path, sizeof(path), + "/local/domain/%d/image/device-model-pid", domid); + data = xs_read(xs_handle, XBT_NULL, path, &data_size); + if (data == NULL) { + ERROR("Could not find QEMU pid for domid %d", domid); + goto out; + } + pid = strtoul(data, NULL, 10); + free(data); + +out: + xs_daemon_close(xs_handle); + + return pid; +} + +static void finalize(void) +{ + int ret; + + if (is_finalized) + return; + + set_signal_handler(SIG_IGN); + if (ring != NULL) + munmap(ring, kemari_ring_size * PAGE_SIZE); + + if ((ret = xc_kemari_control(xc_handle, domid, XEN_KEMARI_OP_off, + NULL, NULL, NULL, 0)) != 0) { + ERROR("Error when turning off kemari"); + } else { + DPRINTF("successufully execute KEMARI_OP_off\n"); + } + + if ( xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0 ) { + ERROR("Warning - couldn't disable shadow mode"); + } + + if (!run) + xc_domain_destroy(xc_handle, domid); + + xc_interface_close(xc_handle); + + is_finalized = 1; +} + +int +main(int argc, char **argv) +{ + unsigned int maxit, max_f, flags; + int ret; + int evtchn_fd; + uint32_t port, kemari_port; + uint64_t kemari_mfn; + fd_set inset; + + if (argc != 6) + errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]); + + xc_handle = xc_interface_open(); + if (xc_handle < 0) + errx(1, "failed to open control interface"); + + io_fd = atoi(argv[1]); + domid = atoi(argv[2]); + maxit = atoi(argv[3]); + max_f = atoi(argv[4]); + flags = atoi(argv[5]); + + set_signal_handler(close_handler); + if ((qemu_pid = get_qemu_pid(domid)) == 0) + errx(1, "failed to get qemu pid"); + atexit(finalize); + + if (io_fd == -1) /* means test mode */ + { + io_fd = open("/dev/null", O_RDWR); + flags |= XCFLAGS_DEBUG; + } + else + { + int one = 1; + if (setsockopt(io_fd, IPPROTO_TCP, TCP_NODELAY, + &one, sizeof(one)) < 0) { + ERROR("failed to set TCP_NODELAY"); + } + } + + if ((xce_handle = xc_evtchn_open()) < 0) { + errx(1, "failed to open control interface"); + } + + evtchn_fd = xc_evtchn_fd(xce_handle); + + if ( xc_shadow_control(xc_handle, domid, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { + int frc; + /* log-dirty already enabled? There's no test op, + so attempt to disable then reenable it */ + frc = xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL); + if ( frc >= 0 ) + { + frc = xc_shadow_control(xc_handle, domid, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL); + } + + if ( frc < 0 ) + { + err(errno, "Couldn't enable shadow mode (rc %d)", frc); + } + } + + if ((ret = xc_kemari_control(xc_handle, domid, XEN_KEMARI_OP_enable, + &kemari_port, &kemari_ring_size, + &kemari_mfn, 0) != 0)) { + errx(1, "Error when turning on kemari"); + } + + DPRINTF("kemari_port=%u, kemari_mfn=%llu, kemari_ring_size=%u\n", + kemari_port, kemari_mfn, kemari_ring_size); + + if (attach_ports(domid) != 0) { + ERROR("attaching port failed "); + goto out; + } + + if ((port = xc_evtchn_bind_interdomain(xce_handle, domid, + kemari_port)) < 0) { + ERROR("xc_evtchn_bind_interdomain failed "); + goto out; + } + + if ((ring = xc_map_foreign_range(xc_handle, DOMID_XEN, + kemari_ring_size * PAGE_SIZE, PROT_READ | PROT_WRITE, + kemari_mfn)) == 0) { + ERROR("xc_map_foreign_range failed"); + goto out; + } + + if (xc_domain_pause(xc_handle, domid) < 0) { + ERROR("Domain appears not to have paused"); + goto out; + } + + ret = xc_kemari_save(xc_handle, io_fd, domid, ring, flags, + !!(flags & XCFLAGS_HVM), + &init_qemu_maps); + if (ret != 0) { + ERROR("xc_kemari_save failed"); + goto out; + } + + FD_ZERO(&inset); + FD_SET(evtchn_fd, &inset); + + if (xc_domain_unpause(xc_handle, domid) < 0) { + ERROR("Domain appears not to have unpaused"); + goto out; + } + + DPRINTF("running start"); + + while (run) { + + if (select(evtchn_fd + 1, &inset, NULL, NULL, NULL) < 0) { + if (errno == EINTR) + continue; + ERROR("Error when waiting events by select()"); + break; + } + + if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset)) { + + if ((ret = handle_event(domid, flags)) != 0) { + ERROR("Error when handling events"); + break; + } + + /* usleep(10000); */ + + if (xc_evtchn_notify(xce_handle, port) < 0) { + ERROR("xc_evtchn_notify failed"); + /* goto out; */ + break; + } + + if(xc_domain_unpause(xc_handle, domid) < 0) { + ERROR("xc_domain_unpause"); + /* goto out; */ + break; + } + + } + } + + out: + close(io_fd); + finalize(); + + return ret; +} + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + diff -r b249f3e979a5 -r cf6a910e3663 tools/libxc/xc_dom_kemari_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_dom_kemari_save.c Wed Mar 11 18:03:47 2009 +0900 @@ -0,0 +1,1139 @@ +/****************************************************************************** + * xc_dom_kemari_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * This source code is based on xc_domain_save.c. + * Copied BITS_PER_LONG, BITS_TO_LONGS, BITMAP_SIZE, BITMAP_SHIFT, + * RATE_IS_MAX, test_bit, clear_bit, set_bit, tv_delta, noncached_write, + * initialize_mbit_rate, and ratewrite from xc_domain_save.c + * + * Copyright (c) 2003, K A Fraser. + */ + +#include <inttypes.h> +#include <time.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/time.h> + +#include "xc_private.h" +#include "xc_dom.h" +#include "xg_private.h" +#include "xg_save_restore.h" + +#include <xen/hvm/params.h> +#include "xc_e820.h" + +#ifdef __MINIOS__ +/* + * Caution: atomicity of following alternative libc functions are broken. + */ +static ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count) +{ + char buf[1024]; + int len, wrote_len = 0; + + if (offset != NULL) { + ERROR("Sorry sendfile for stubdomain should not have offset"); + errno = EIO; + return -1; + } + + while (count > 0) { + len = (count < sizeof(buf))?count:sizeof(buf); + len = read(in_fd, buf, len); + if (len < 0) + return -1; + if (write_exact(out_fd, buf, len)) + return -1; + wrote_len += len; + count -= len; + } + return wrote_len; +} + +#define IOV_MAX 1024 +struct iovec { + void *iov_base; /* Base address. */ + size_t iov_len; /* Length. */ +}; +static ssize_t writev(int d, const struct iovec *iov, int iovcnt) +{ + int i; + int len, wrote_len; + + if (iovcnt < 0 || iovcnt > IOV_MAX) { + errno = EINVAL; + return -1; + } + + for (i = 0, wrote_len = 0; i < iovcnt; i++) { + len = write(d, iov[i].iov_base, iov[i].iov_len); + if (len < 0) + return -1; + + wrote_len += len; + if (wrote_len < 0) { /* integer overflow */ + errno = EINVAL; + return -1; + } + + if (len != iov[i].iov_len) + return wrote_len; + } + + return wrote_len; +} +#else /* !__MINIOS__ */ +#include <sys/sendfile.h> +#include <sys/uio.h> +#endif /* __MINIOS__ */ + +/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */ +static unsigned long *qemu_bitmaps[2]; +static int qemu_active; +static int qemu_non_active; + +/* number of pfns this guest has (i.e. number of entries in the P2M) */ +static unsigned long p2m_size; + +/* page frame numbers */ +static unsigned long *pfn_type = NULL; + +/* The new domain's shared-info frame number. */ +static unsigned long shared_info_frame; + +/* + * guest memory + */ +#define GUEST_MEM_ENTRY_SIZE 1024 /* up to 4MB at a time. */ +static unsigned char ** guest_memory = NULL; +static unsigned long ** guest_memory_status = NULL; +static unsigned long guest_memory_size = 0; + +static inline int map_guest_mem(int xc_handle, uint32_t domid, + unsigned long base) +{ + int j; + unsigned char * region_base; + unsigned long * pfn_base; + + pfn_base = guest_memory_status[base]; + + memset(pfn_base, 0, GUEST_MEM_ENTRY_SIZE); + for (j = 0; j < GUEST_MEM_ENTRY_SIZE; j++) { + pfn_base[j] = base * GUEST_MEM_ENTRY_SIZE + j; + } + region_base = xc_map_foreign_batch( + xc_handle, domid, PROT_READ, pfn_base, GUEST_MEM_ENTRY_SIZE); + if ( region_base == NULL ) + { + PERROR("map failed at guest memory frame 0x%lx - 0x%lx (%lu)", + base * GUEST_MEM_ENTRY_SIZE, (base + 1)* GUEST_MEM_ENTRY_SIZE - 1, + base); + return -1; + } + + /* Look for and skip completely empty batches. */ + for ( j = 0; j < GUEST_MEM_ENTRY_SIZE; j++ ) + pfn_base[j] &= XEN_DOMCTL_PFINFO_LTAB_MASK; + for ( j = 0; j < GUEST_MEM_ENTRY_SIZE; j++ ) + if ( pfn_base[j] != XEN_DOMCTL_PFINFO_XTAB ) + break; + if ( j == GUEST_MEM_ENTRY_SIZE ) + { + munmap(region_base, GUEST_MEM_ENTRY_SIZE*PAGE_SIZE); + guest_memory[base] = NULL; + return 1; + } + + guest_memory[base] = region_base; + + return 0; +} + +static inline unsigned char * search_guest_mem(int xc_handle, uint32_t domid, + unsigned long mfn) +{ + unsigned long base = mfn / GUEST_MEM_ENTRY_SIZE; + unsigned long offset = mfn % GUEST_MEM_ENTRY_SIZE; + + if (base >= guest_memory_size) { + ERROR("Error base(%lu) is greater than guest_memory_size(%lu)\n", + base, guest_memory_size); + return NULL; + } + + if ( guest_memory_status[base][offset] == XEN_DOMCTL_PFINFO_XTAB ) { + /* reload XTAB place */ + munmap(guest_memory[base], GUEST_MEM_ENTRY_SIZE*PAGE_SIZE); + guest_memory[base] = NULL; + DPRINTF("guest_memory[%lu] (frame 0x%lx - 0x%lx) will be remapped\n", + base, base * GUEST_MEM_ENTRY_SIZE, + (base + 1) * GUEST_MEM_ENTRY_SIZE - 1); + } + + if (guest_memory[base] == NULL) + if (map_guest_mem(xc_handle, domid, offset)) + return NULL; + + return guest_memory[base] + offset * PAGE_SIZE; + /* Since I don't care of XEN_DOMCTL_PFINFO_LTAB_MASK, + this program may cause some accidents. */ +} + +static inline int init_guest_mem(int xc_handle, uint32_t dom) +{ + int i; + + guest_memory_size = p2m_size / GUEST_MEM_ENTRY_SIZE + 1; + DPRINTF("guest_memory_size: %lu\n", guest_memory_size); + + /* mapped memory */ + guest_memory = xg_memalign(PAGE_SIZE, + guest_memory_size * sizeof(guest_memory[0])); + if (guest_memory == NULL) + { + PERROR("failed to allocate guest_memory"); + return -1; + } + if ( lock_pages(guest_memory, guest_memory_size * sizeof(guest_memory[0]))) + { + ERROR("Unable to lock guest_memory array"); + return -1; + } + + /* memory status */ + guest_memory_status = xg_memalign(PAGE_SIZE, + guest_memory_size * sizeof(guest_memory_status[0])); + if ( guest_memory_status == NULL ) + { + ERROR("failed to alloc memory for guest_memory_status"); + errno = ENOMEM; + return -1; + } + if ( lock_pages(guest_memory_status, + guest_memory_size * sizeof(guest_memory_status[0]))) + { + ERROR("Unable to lock guest_memory_status array"); + return -1; + } + + for (i = 0; i < guest_memory_size; i++) { + guest_memory_status[i] = xg_memalign(PAGE_SIZE, + GUEST_MEM_ENTRY_SIZE * sizeof(guest_memory_status[0][0])); + if (guest_memory_status[i] == NULL) { + ERROR("failed to alloc memory for guest_memory_status[%d]", i); + errno = ENOMEM; + return -1; + } + if ( lock_pages(guest_memory_status, + guest_memory_size * sizeof(guest_memory_status[0][0]))) + { + ERROR("Unable to lock guest_memory_status[%d]", i); + return -1; + } + } + + for (i = 0; i < guest_memory_size; i++) + if (map_guest_mem(xc_handle, dom, i) < 0) + return -1; + + return 0; +} + +static int writev_exact(int fd, const struct iovec *iov, size_t count) +{ + int i; + size_t sum; + for (i = 0, sum = 0; i < count; i++) + sum += iov[i].iov_len; + + if (writev(fd, iov, count) != sum) + return -1; + else + return 0; +} + +/* grep fodder: machine_to_phys */ + + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define BITMAP_SIZE (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long)) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + +static uint64_t tv_delta(struct timeval *new, struct timeval *old) +{ + return (((new->tv_sec - old->tv_sec)*1000000) + + (new->tv_usec - old->tv_usec)); +} + +static int noncached_write(int fd, void *buffer, int len) +{ + static int write_count = 0; + int rc = (write_exact(fd, buffer, len) == 0) ? len : -1; + + write_count += len; + if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) ) + { + /* Time to discard cache - dont care if this fails */ + discard_file_cache(fd, 0 /* no flush */); + write_count = 0; + } + + return rc; +} + +#ifdef ADAPTIVE_SAVE + +/* +** We control the rate at which we transmit (or save) to minimize impact +** on running domains (including the target if we're doing live migrate). +*/ + +#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ +#define START_MBIT_RATE 100 /* initial transmit rate for migrate */ + +/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */ +#define RATE_TO_BTU 781250 + +/* Amount in bytes we allow ourselves to send in a burst */ +#define BURST_BUDGET (100*1024) + +/* We keep track of the current and previous transmission rate */ +static int mbit_rate, ombit_rate = 0; + +/* Have we reached the maximum transmission rate? */ +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) + +static inline void initialize_mbit_rate() +{ + mbit_rate = START_MBIT_RATE; +} + +static int ratewrite(int io_fd, void *buf, int n) +{ + static int budget = 0; + static int burst_time_us = -1; + static struct timeval last_put = { 0 }; + struct timeval now; + struct timespec delay; + long long delta; + + if ( START_MBIT_RATE == 0 ) + return noncached_write(io_fd, buf, n); + + budget -= n; + if ( budget < 0 ) + { + if ( mbit_rate != ombit_rate ) + { + burst_time_us = RATE_TO_BTU / mbit_rate; + ombit_rate = mbit_rate; + DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n", + mbit_rate, BURST_BUDGET, burst_time_us); + } + if ( last_put.tv_sec == 0 ) + { + budget += BURST_BUDGET; + gettimeofday(&last_put, NULL); + } + else + { + while ( budget < 0 ) + { + gettimeofday(&now, NULL); + delta = tv_delta(&now, &last_put); + while ( delta > burst_time_us ) + { + budget += BURST_BUDGET; + last_put.tv_usec += burst_time_us; + if ( last_put.tv_usec > 1000000 ) + { + last_put.tv_usec -= 1000000; + last_put.tv_sec++; + } + delta -= burst_time_us; + } + if ( budget > 0 ) + break; + delay.tv_sec = 0; + delay.tv_nsec = 1000 * (burst_time_us - delta); + while ( delay.tv_nsec > 0 ) + if ( nanosleep(&delay, &delay) == 0 ) + break; + } + } + } + return noncached_write(io_fd, buf, n); +} + +#else /* ! ADAPTIVE SAVE */ + +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _buf, _n) noncached_write((_io_fd), (_buf), (_n)) +#define initialize_mbit_rate() + +#endif + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + xc_shadow_op_stats_t *stats, int print) +{ + static struct timeval wall_last; + static long long d0_cpu_last; + static long long d1_cpu_last; + + struct timeval wall_now; + long long wall_delta; + long long d0_cpu_now, d0_cpu_delta; + long long d1_cpu_now, d1_cpu_delta; + + gettimeofday(&wall_now, NULL); + + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; + d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; + + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + DPRINTF("ARRHHH!!\n"); + + wall_delta = tv_delta(&wall_now,&wall_last)/1000; + if ( wall_delta == 0 ) + wall_delta = 1; + + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; + d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; + + if ( print ) + DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " + "dirtied %dMb/s %" PRId32 " pages\n", + wall_delta, + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), + (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), + stats->dirty_count); + +#ifdef ADAPTIVE_SAVE + if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate ) + { + mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) + + 50; + if ( mbit_rate > MAX_MBIT_RATE ) + mbit_rate = MAX_MBIT_RATE; + } +#endif + + d0_cpu_last = d0_cpu_now; + d1_cpu_last = d1_cpu_now; + wall_last = wall_now; + + return 0; +} + +static int send_qemu_image(int xc_handle, int io_fd, uint32_t dom) +{ + char path[128]; + struct stat st; + struct { + int minusfour; + uint32_t image_size; + } chunk = { -1, 0 }; + int qemu_fd; + int rc = -1; + + snprintf(path, sizeof(path), "/dev/shm/qemu-save.%d", dom); + if ((qemu_fd = open(path, O_RDONLY)) == -1) + { + PERROR("Error when opening qemu image %s", path); + goto out; + } + + if (fstat(qemu_fd, &st) == -1) + { + PERROR("Error fstat qemu file %s", path); + goto out; + } + chunk.image_size = st.st_size; + + if ( write_exact(io_fd, &chunk, sizeof(chunk)) ) + { + PERROR("Error when writing header for qemu image"); + goto out; + } + + if ( sendfile(io_fd, qemu_fd, NULL, chunk.image_size) != + chunk.image_size) + { + PERROR("Error when writing qemu image"); + goto out; + } + close(qemu_fd); + + rc = 0; +out: + return rc; +} + +static int send_hvm_params(int xc_handle, int io_fd, uint32_t dom) +{ + struct { + int id; + uint32_t pad; + uint64_t data; + } chunk = { 0, 0 }; + + chunk.id = -3; + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, + &chunk.data); + + if ( (chunk.data != 0) && + write_exact(io_fd, &chunk, sizeof(chunk)) ) + { + PERROR("Error when writing the ident_pt for EPT guest"); + return -1; + } + + chunk.id = -4; + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, + &chunk.data); + + if ( (chunk.data != 0) && + write_exact(io_fd, &chunk, sizeof(chunk)) ) + { + PERROR("Error when writing the vm86 TSS for guest"); + return -1; + } + + return 0; +} + +static int send_hvm_context(int xc_handle, int io_fd, + struct kemari_ring *ring, uint32_t dom) +{ + uint32_t buf_size = ring->hvm_ctxt.buf_size; + uint32_t rec_size = ring->hvm_ctxt.rec_size; + uint8_t *hvm_buf = (uint8_t *)ring + ring->hvm_ctxt.buf_offset; + int rc = -1; + + /* Get HVM context from Xen and save it too */ + if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, + buf_size)) == -1 ) + { + ERROR("HVM:Could not get hvm buffer"); + goto out; + } + + if ( write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) + { + PERROR("error write hvm buffer size"); + goto out; + } + + if ( write_exact(io_fd, hvm_buf, rec_size) ) + { + PERROR("write HVM info failed!\n"); + goto out; + } + rc = 0; + +out: + return rc; +} + +int xc_kemari_save(int xc_handle, int io_fd, uint32_t dom, + void *kemari_ring, uint32_t flags, + int hvm, void *(*init_qemu_maps)(int, unsigned)) +{ + int rc = 1, i, j, iter = 0; + int debug = (flags & XCFLAGS_DEBUG); + int sent_last_iter, skip_this_iter; + xc_dominfo_t info; + struct kemari_ring *ring = (struct kemari_ring *)kemari_ring; + + /* base of the region in which domain memory is mapped */ + unsigned char *region_base = NULL; + + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; + - to fixup by sending at the end if not already resent; */ + unsigned long *to_send = NULL, *to_fix = NULL; + + xc_shadow_op_stats_t stats; + + unsigned long needed_to_fix = 0; + unsigned long total_sent = 0; + + /* HVM: magic frames for ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ + + /* callback irq */ + uint64_t callback_irq = 0; + + if ( !hvm ) + { + ERROR("HVM domain is required for the kemari migration."); + return 1; + } + + initialize_mbit_rate(); + + if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) + { + ERROR("Could not get domain info"); + return 1; + } + + shared_info_frame = info.shared_info_frame; + DPRINTF("xc_kemari_save: shared_info_frame: %lu\n", shared_info_frame); + + /* Get the size of the P2M table */ + p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom) + 1; + DPRINTF("xc_kemari_save: p2m_size: %lu\n", p2m_size); + + /* Domain is still running at this point */ + { + /* Get qemu-dm logging dirty pages too */ + void *seg = init_qemu_maps(dom, BITMAP_SIZE); + qemu_bitmaps[0] = seg; + qemu_bitmaps[1] = seg + BITMAP_SIZE; + qemu_active = 0; + qemu_non_active = 1; + } + + /* pretend we sent all the pages last iteration */ + sent_last_iter = p2m_size; + + /* Setup to_send / to_fix bitmaps */ + to_send = xg_memalign(PAGE_SIZE, ROUNDUP(BITMAP_SIZE, PAGE_SHIFT)); + to_fix = calloc(1, BITMAP_SIZE); + + if ( !to_send || !to_fix ) + { + ERROR("Couldn't allocate to_send array"); + goto out; + } + + memset(to_send, 0xff, BITMAP_SIZE); + + if ( lock_pages(to_send, BITMAP_SIZE) ) + { + ERROR("Unable to lock to_send"); + return 1; + } + + pfn_type = xg_memalign(PAGE_SIZE, ROUNDUP( + MAX_BATCH_SIZE * sizeof(*pfn_type), PAGE_SHIFT)); + if ( pfn_type == NULL ) + { + ERROR("failed to alloc memory for pfn_type arrays"); + errno = ENOMEM; + goto out; + } + memset(pfn_type, 0, + ROUNDUP(MAX_BATCH_SIZE * sizeof(*pfn_type), PAGE_SHIFT)); + + if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) ) + { + ERROR("Unable to lock pfn_type array"); + goto out; + } + + /* Start writing out the saved-domain record. */ + if ( write_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + PERROR("write: p2m_size"); + goto out; + } + + /* send shared_info_frame */ + if ( write_exact(io_fd, &shared_info_frame, sizeof(unsigned long)) ) + { + PERROR("write: shared_info_frame"); + goto out; + } + + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, + &magic_pfns[0]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, + &magic_pfns[1]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, + &magic_pfns[2]); + DPRINTF("kemari_restore: magic_pfns 0: %lld, 1: %lld, 2: %lld\n", + magic_pfns[0], magic_pfns[1], magic_pfns[2]); + if ( write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + PERROR("Error when writing to state file (7)"); + goto out; + } + + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_CALLBACK_IRQ, + &callback_irq); + DPRINTF("kemari_restore: callback irq %llx", callback_irq); + if ( write_exact(io_fd, &callback_irq, sizeof(callback_irq)) ) + { + PERROR("Error when writing to state file (8)"); + goto out; + } + + print_stats(xc_handle, dom, 0, &stats, 0); + + /* Now write out each data page, canonicalising page tables as we go... */ + { + unsigned int prev_pc, sent_this_iter, N, batch, run; + + iter++; + sent_this_iter = 0; + skip_this_iter = 0; + prev_pc = 0; + N = 0; + + DPRINTF("Saving memory pages: iter %d 0%%", iter); + + while ( N < p2m_size ) + { + unsigned int this_pc = (N * 100) / p2m_size; + + if ( (this_pc - prev_pc) >= 5 ) + { + DPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ + for ( batch = 0; + (batch < MAX_BATCH_SIZE) && (N < p2m_size); + N++ ) + { + int n = N; + + if ( debug ) + { + DPRINTF("%d pfn= %08lx mfn= %08lx %d", + iter, (unsigned long)n, + (long unsigned int)0, + test_bit(n, to_send)); + DPRINTF("\n"); + } + + if ( !( (test_bit(n, to_send)) || (test_bit(n, to_fix))) ) + continue; + +#if 0 + /* Skip PFNs that aren't really there */ + if (((n >= 0xa0 && n < 0xc0) /* VGA hole */ + || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) + && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ ) { + if (n >= shared_info_frame && n <= shared_info_frame + 32) { + /* DPRINTF("shared_info_frame or grant: %d\n", n); */ + } else { + continue; + } + } +#endif + + /* + ** we get here if: + ** 1. page is marked to_send & hasn't already been re-dirtied + ** 2. add in pages that still need fixup (net bufs) + */ + + /* Hypercall interfaces operate in PFNs for HVM guests + * and MFNs for PV guests */ + pfn_type[batch] = n; + + if ( !is_mapped(pfn_type[batch]) ) + { + /* + ** not currently in psuedo-physical map -- set bit + ** in to_fix since we must send this page in last_iter + ** unless its sent sooner anyhow, or it never enters + ** pseudo-physical map (e.g. for ballooned down doms) + */ + set_bit(n, to_fix); + continue; + } + + if ( test_bit(n, to_fix) && + !test_bit(n, to_send) ) + { + needed_to_fix++; + DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n", + iter, n, pfn_type[batch]); + } + + clear_bit(n, to_fix); + + batch++; + } + + if ( batch == 0 ) + goto skip; /* vanishingly unlikely... */ + + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_type, batch); + if ( region_base == NULL ) + { + ERROR("map batch failed"); + goto out; + } + + { + /* Look for and skip completely empty batches. */ + for ( j = 0; j < batch; j++ ) + if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) != + XEN_DOMCTL_PFINFO_XTAB ) + break; + if ( j == batch ) + { + munmap(region_base, batch*PAGE_SIZE); + continue; /* bail on this batch: no valid pages */ + } + } + + if ( write_exact(io_fd, &batch, sizeof(unsigned int)) ) + { + PERROR("Error when writing to state file (2)"); + goto out; + } + + if ( write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) ) + { + PERROR("Error when writing to state file (3)"); + goto out; + } + + /* entering this loop, pfn_type is now in pfns (Not mfns) */ + run = 0; + for ( j = 0; j < batch; j++ ) + { + unsigned long pfn, pagetype; + + pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( pagetype != 0 ) + { + /* If the page is not a normal data page, write out any + run of pages we may have previously acumulated */ + if ( run ) + { + if ( ratewrite(io_fd, + (char*)region_base+(PAGE_SIZE*(j-run)), + PAGE_SIZE*run) != PAGE_SIZE*run ) + { + ERROR("Error when writing to state file (4a)" + " (errno %d)", errno); + goto out; + } + run = 0; + } + } + + /* skip pages that aren't present */ + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) + continue; + + pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && + (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) + { + DPRINTF("canonicalize_pagetable pagetype = %lx pfn = %lu\n", pagetype, pfn); + } + else + { + /* We have a normal page: accumulate it for writing. */ + run++; + } + } /* end of the write out for this batch */ + + if ( run ) + { + /* write out the last accumulated run of pages */ + if ( ratewrite(io_fd, + (char*)region_base+(PAGE_SIZE*(j-run)), + PAGE_SIZE*run) != PAGE_SIZE*run ) + { + ERROR("Error when writing to state file (4c)" + " (errno %d)", errno); + goto out; + } + } + + sent_this_iter += batch; + + munmap(region_base, batch*PAGE_SIZE); + + } /* end of this while loop for this iteration */ + + skip: + + total_sent += sent_this_iter; + + DPRINTF("\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); + + { + print_stats( xc_handle, dom, sent_this_iter, &stats, 1); + + DPRINTF("Total pages sent= %ld (%.2fx)\n", + total_sent, ((float)total_sent)/p2m_size ); + DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); + } + } /* end of infinite for loop */ + + DPRINTF("All memory is saved\n"); + + if (send_hvm_params(xc_handle, io_fd, dom) < 0) + goto out; + + /* Zero terminate */ + i = 0; + if ( write_exact(io_fd, &i, sizeof(int)) ) + { + PERROR("Error when writing to state file (6')"); + goto out; + } + + if (send_hvm_context(xc_handle, io_fd, ring, dom) < 0) + goto out; + + if (!debug) + { + int rcv_status; + if ( read_exact(io_fd, &rcv_status, sizeof(int))) { + ERROR("Error when reading receiver status"); + goto out; + } + DPRINTF("status received: %d\n", rcv_status); + } + + if (init_guest_mem(xc_handle, dom) < 0) + goto out; + + /* HVM guests are done now */ + rc = 0; + + out: + + /* Flush last write and discard cache for file. */ + discard_file_cache(io_fd, 1 /* flush */); + + free(to_send); + free(to_fix); + + DPRINTF("Save exit rc=%d\n",rc); + + return !!rc; +} + + +int xc_kemari_update(int xc_handle, int io_fd, uint32_t dom, + void *kemari_ring, uint32_t flags, + void (*qemu_save_image)(int), + void (*qemu_end_flip)(void), + void (*qemu_end_save)(void), + void (*qemu_image_sent)(void)) +{ + int rc = 1, k; + int debug = (flags & XCFLAGS_DEBUG); + uint32_t i, j, index = 0; + unsigned int batch = 0; + struct kemari_ring *ring = (struct kemari_ring *)kemari_ring; + struct kemari_ent *buf; + struct iovec iov[MAX_BATCH_SIZE + 2]; /* 2 for batch and pfn_type */ + int iovcnt = 2; + +#define ADD_IOV(base, len) do { \ + iov[iovcnt].iov_base = base; \ + iov[iovcnt].iov_len = len; \ + iovcnt++; \ +} while (0) + + + + /* flip active qemu */ + qemu_active = qemu_non_active; + qemu_non_active = qemu_active ? 0 : 1; + qemu_save_image(qemu_active); + + /* + * main iteration starts from here + */ + while (ring->cons < ring->prod) { + + kemari_ring_read(ring, &buf); + + for (i = buf->u.index.start, j = buf->u.index.end; i < j; i++) { + + int next, offset = 0; + + index = i * BITS_PER_LONG; + + kemari_ring_read(ring, &buf); + + while (buf->u.dirty_bitmap && offset < BITS_PER_LONG) { + int n; + next = ffs(buf->u.dirty_bitmap); + buf->u.dirty_bitmap >>= next; + offset += next; + n = offset + index - 1; +#if 0 + if (((n >= 0xa0 && n < 0xc0) /* VGA hole */ + || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) + && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ ) { + if (n >= shared_info_frame && n <= shared_info_frame + 32) { + ; + } else { + continue; + } + } +#endif + ADD_IOV(search_guest_mem(xc_handle, dom, n), PAGE_SIZE); + pfn_type[batch] = n; + batch++; + } + + if ((batch + BITS_PER_LONG - 1 < MAX_BATCH_SIZE) && + !(ring->cons == ring->prod)) + continue; + + /* Pull in the dirty bits from qemu-dm too */ + qemu_end_flip(); + for ( k = 0; k < BITMAP_SIZE / BITS_PER_LONG; k++) { + if (qemu_bitmaps[qemu_non_active][k] != 0) { + unsigned int bmp = qemu_bitmaps[qemu_non_active][k]; + + index = k * BITS_PER_LONG; + while (bmp && offset < BITS_PER_LONG) { + int n, next, offset = 0; + next = ffs(bmp); + bmp >>= next; + offset += next; + n = offset + index - 1; + + ADD_IOV(search_guest_mem(xc_handle, dom, n), PAGE_SIZE); + pfn_type[batch] = n; + batch++; + } + qemu_bitmaps[qemu_non_active][k] = 0; + } + if (batch >= MAX_BATCH_SIZE) { + ERROR("Sorry, reached MAX_BATCH_SIZE. " + "We will fix this lator."); + goto out; + } + } + + PPRINTF("batch %d\n", batch); + + /* send pages */ + iov[0].iov_base = &batch; + iov[0].iov_len = sizeof(batch); + + iov[1].iov_base = pfn_type; + iov[1].iov_len = sizeof(pfn_type[0]) * batch; + + for (k = 0; k < iovcnt / IOV_MAX + 1; k++) { + int count = (iovcnt<IOV_MAX*(k+1))?(iovcnt-IOV_MAX*k):IOV_MAX; + if (writev_exact(io_fd, &iov[IOV_MAX * k], count)) { + ERROR("Error when writing pages state file (2--4)" + " (errno %d)", errno); + goto out; + } + } + + batch = 0; + } + } + + if (send_hvm_params(xc_handle, io_fd, dom) < 0) + goto out; + qemu_end_save(); + if (!debug && send_qemu_image(xc_handle, io_fd, dom) < 0) + goto out; + qemu_image_sent(); + + /* Zero terminate */ + i = 0; + if ( write_exact(io_fd, &i, sizeof(int)) ) + { + PERROR("Error when writing to state file (6')"); + goto out; + } + + if (send_hvm_context(xc_handle, io_fd, ring, dom) < 0) + goto out; + + if (!debug) + { + int rcv_status; + if ( read_exact(io_fd, &rcv_status, sizeof(int))) { + ERROR("Error when reading receiver status"); + goto out; + } + } + + rc = 0; +out: + + return rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |