[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge with xen-ia64-unstable.hg
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Date 1169144704 0 # Node ID e2ca6bd160460095de13f01768ae730dbcc0ba7f # Parent 58637a0a7c7e66ecaeebb7608df44f6bbf0afc04 # Parent 7c653e58cbe424a3e1499f92297082dda6c005f8 Merge with xen-ia64-unstable.hg --- tools/python/xen/xend/XendRoot.py | 322 ------- buildconfigs/linux-defconfig_xen0_x86_32 | 4 buildconfigs/linux-defconfig_xen0_x86_64 | 4 buildconfigs/mk.linux-2.6-xen | 4 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c | 18 tools/ioemu/hw/cirrus_vga.c | 52 + tools/ioemu/hw/ide.c | 3 tools/ioemu/hw/pci.c | 3 tools/ioemu/target-i386-dm/helper2.c | 15 tools/ioemu/vl.c | 34 tools/libfsimage/common/Makefile | 2 tools/libxc/Makefile | 2 tools/libxc/xc_domain.c | 44 + tools/libxc/xc_hvm_restore.c | 360 ++++++++ tools/libxc/xc_hvm_save.c | 727 +++++++++++++++++ tools/libxc/xenctrl.h | 24 tools/libxc/xenguest.h | 20 tools/python/setup.py | 13 tools/python/xen/lowlevel/scf/scf.c | 156 +++ tools/python/xen/lowlevel/xc/xc.c | 22 tools/python/xen/util/xmlrpclib2.py | 1 tools/python/xen/web/httpserver.py | 12 tools/python/xen/xend/Vifctl.py | 4 tools/python/xen/xend/XendCheckpoint.py | 87 +- tools/python/xen/xend/XendConfig.py | 45 - tools/python/xen/xend/XendDomain.py | 12 tools/python/xen/xend/XendDomainInfo.py | 64 - tools/python/xen/xend/XendNode.py | 4 tools/python/xen/xend/XendOptions.py | 373 ++++++++ tools/python/xen/xend/XendProtocol.py | 6 tools/python/xen/xend/balloon.py | 6 tools/python/xen/xend/image.py | 25 tools/python/xen/xend/osdep.py | 10 tools/python/xen/xend/server/DevController.py | 8 tools/python/xen/xend/server/SrvRoot.py | 2 tools/python/xen/xend/server/SrvServer.py | 58 - tools/python/xen/xend/server/XMLRPCServer.py | 17 tools/python/xen/xend/server/netif.py | 13 tools/python/xen/xend/server/relocate.py | 14 tools/python/xen/xend/server/tests/test_controllers.py | 10 tools/python/xen/xend/server/tpmif.py | 8 tools/python/xen/xend/server/vfbif.py | 4 tools/python/xen/xm/create.py | 6 tools/python/xen/xm/main.py | 13 tools/python/xen/xm/opts.py | 8 tools/python/xen/xm/tests/test_create.py | 6 tools/xcutils/xc_restore.c | 19 tools/xcutils/xc_save.c | 5 xen/arch/x86/domain.c | 1 xen/arch/x86/domctl.c | 4 xen/arch/x86/hvm/hvm.c | 11 xen/arch/x86/hvm/i8254.c | 153 +++ xen/arch/x86/hvm/intercept.c | 303 +++++++ xen/arch/x86/hvm/rtc.c | 2 xen/arch/x86/hvm/vioapic.c | 132 +++ xen/arch/x86/hvm/vlapic.c | 74 + xen/arch/x86/hvm/vmx/vmx.c | 296 ++++++ xen/arch/x86/hvm/vpic.c | 83 + xen/arch/x86/hvm/vpt.c | 6 xen/arch/x86/mm.c | 81 - xen/arch/x86/mm/shadow/common.c | 7 xen/arch/x86/mm/shadow/multi.c | 2 xen/common/domain.c | 9 xen/common/domctl.c | 73 + xen/include/asm-x86/hvm/domain.h | 17 xen/include/asm-x86/hvm/hvm.h | 38 xen/include/asm-x86/hvm/support.h | 127 ++ xen/include/asm-x86/hvm/vpt.h | 2 xen/include/public/arch-x86/xen.h | 65 + xen/include/public/domctl.h | 16 xen/include/xlat.lst | 2 71 files changed, 3596 insertions(+), 577 deletions(-) diff -r 58637a0a7c7e -r e2ca6bd16046 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Wed Jan 17 21:45:34 2007 -0700 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Thu Jan 18 18:25:04 2007 +0000 @@ -548,7 +548,7 @@ CONFIG_MEGARAID_NEWGEN=y # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set +CONFIG_SCSI_SATA_AHCI=y # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_MV is not set @@ -713,7 +713,7 @@ CONFIG_SK98LIN=y CONFIG_SK98LIN=y # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -# CONFIG_BNX2 is not set +CONFIG_BNX2=y # # Ethernet (10000 Mbit) diff -r 58637a0a7c7e -r e2ca6bd16046 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Wed Jan 17 21:45:34 2007 -0700 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Thu Jan 18 18:25:04 2007 +0000 @@ -498,7 +498,7 @@ CONFIG_MEGARAID_NEWGEN=y # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set +CONFIG_SCSI_SATA_AHCI=y # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_MV is not set @@ -663,7 +663,7 @@ CONFIG_SK98LIN=y CONFIG_SK98LIN=y # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -# CONFIG_BNX2 is not set +CONFIG_BNX2=y # # Ethernet (10000 Mbit) diff -r 58637a0a7c7e -r e2ca6bd16046 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Wed Jan 17 21:45:34 2007 -0700 +++ b/buildconfigs/mk.linux-2.6-xen Thu Jan 18 18:25:04 2007 +0000 @@ -8,6 +8,10 @@ LINUX_DIR = build-linux-$(LINUX_VER)- IMAGE_TARGET ?= vmlinuz INSTALL_BOOT_PATH ?= $(DESTDIR) + +ifeq ($(XEN_TARGET_ARCH),ia64) +INSTALL_BOOT_PATH := $(DESTDIR)/boot +endif LINUX_VER3 := $(LINUX_SERIES).$(word 3, $(subst ., ,$(LINUX_VER))) diff -r 58637a0a7c7e -r e2ca6bd16046 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Wed Jan 17 21:45:34 2007 -0700 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Thu Jan 18 18:25:04 2007 +0000 @@ -56,7 +56,9 @@ static void *syscall_page; int __init sysenter_setup(void) { - syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + void *page = (void *)get_zeroed_page(GFP_ATOMIC); + + syscall_page = page; #ifdef CONFIG_XEN if (boot_cpu_has(X86_FEATURE_SEP)) { @@ -70,16 +72,16 @@ int __init sysenter_setup(void) } #endif - if (boot_cpu_has(X86_FEATURE_SEP)) { - memcpy(syscall_page, - &vsyscall_sysenter_start, - &vsyscall_sysenter_end - &vsyscall_sysenter_start); + if (!boot_cpu_has(X86_FEATURE_SEP)) { + memcpy(page, + &vsyscall_int80_start, + &vsyscall_int80_end - &vsyscall_int80_start); return 0; } - memcpy(syscall_page, - &vsyscall_int80_start, - &vsyscall_int80_end - &vsyscall_int80_start); + memcpy(page, + &vsyscall_sysenter_start, + &vsyscall_sysenter_end - &vsyscall_sysenter_start); return 0; } diff -r 58637a0a7c7e -r e2ca6bd16046 tools/ioemu/hw/cirrus_vga.c --- a/tools/ioemu/hw/cirrus_vga.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/ioemu/hw/cirrus_vga.c Thu Jan 18 18:25:04 2007 +0000 @@ -3010,11 +3010,44 @@ static CPUWriteMemoryFunc *cirrus_mmio_w cirrus_mmio_writel, }; +void cirrus_stop_acc(CirrusVGAState *s) +{ + if (s->map_addr){ + int error; + s->map_addr = 0; + error = unset_vram_mapping(s->cirrus_lfb_addr, + s->cirrus_lfb_end); + fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n"); + + munmap(s->vram_ptr, VGA_RAM_SIZE); + } +} + +void cirrus_restart_acc(CirrusVGAState *s) +{ + if (s->cirrus_lfb_addr && s->cirrus_lfb_end) { + void *vram_pointer, *old_vram; + fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, lfb_end=0x%lx.\n", + s->cirrus_lfb_addr, s->cirrus_lfb_end); + vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end); + if (!vram_pointer){ + fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n"); + } else { + old_vram = vga_update_vram((VGAState *)s, vram_pointer, + VGA_RAM_SIZE); + qemu_free(old_vram); + s->map_addr = s->cirrus_lfb_addr; + s->map_end = s->cirrus_lfb_end; + } + } +} + /* load/save state */ static void cirrus_vga_save(QEMUFile *f, void *opaque) { CirrusVGAState *s = opaque; + uint8_t vga_acc; qemu_put_be32s(f, &s->latch); qemu_put_8s(f, &s->sr_index); @@ -3049,11 +3082,20 @@ static void cirrus_vga_save(QEMUFile *f, qemu_put_be32s(f, &s->hw_cursor_y); /* XXX: we do not save the bitblt state - we assume we do not save the state when the blitter is active */ + + vga_acc = (!!s->map_addr); + qemu_put_8s(f, &vga_acc); + qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr); + qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end); + qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); + if (vga_acc) + cirrus_stop_acc(s); } static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id) { CirrusVGAState *s = opaque; + uint8_t vga_acc = 0; if (version_id != 1) return -EINVAL; @@ -3091,6 +3133,14 @@ static int cirrus_vga_load(QEMUFile *f, qemu_get_be32s(f, &s->hw_cursor_x); qemu_get_be32s(f, &s->hw_cursor_y); + + qemu_get_8s(f, &vga_acc); + qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr); + qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end); + qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); + if (vga_acc){ + cirrus_restart_acc(s); + } /* force refresh */ s->graphic_mode = -1; @@ -3297,6 +3347,8 @@ void pci_cirrus_vga_init(PCIBus *bus, Di ds, vga_ram_base, vga_ram_offset, vga_ram_size); cirrus_init_common(s, device_id, 1); + register_savevm("cirrus_vga_pci", 0, 1, generic_pci_save, generic_pci_load, d); + /* setup memory space */ /* memory #0 LFB */ /* memory #1 memory-mapped I/O */ diff -r 58637a0a7c7e -r e2ca6bd16046 tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/ioemu/hw/ide.c Thu Jan 18 18:25:04 2007 +0000 @@ -2512,6 +2512,9 @@ void pci_piix3_ide_init(PCIBus *bus, Blo pic_set_irq_new, isa_pic, 15); ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6); ide_init_ioport(&d->ide_if[2], 0x170, 0x376); + + register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d); + #ifdef DMA_MULTI_THREAD dma_create_thread(); #endif //DMA_MULTI_THREAD diff -r 58637a0a7c7e -r e2ca6bd16046 tools/ioemu/hw/pci.c --- a/tools/ioemu/hw/pci.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/ioemu/hw/pci.c Thu Jan 18 18:25:04 2007 +0000 @@ -40,6 +40,8 @@ static int pci_irq_index; static int pci_irq_index; static PCIBus *first_bus; +static void pci_update_mappings(PCIDevice *d); + PCIBus *pci_register_bus(pci_set_irq_fn set_irq, void *pic, int devfn_min) { PCIBus *bus; @@ -71,6 +73,7 @@ int generic_pci_load(QEMUFile* f, void * return -EINVAL; qemu_get_buffer(f, s->config, 256); + pci_update_mappings(s); return 0; } diff -r 58637a0a7c7e -r e2ca6bd16046 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/ioemu/target-i386-dm/helper2.c Thu Jan 18 18:25:04 2007 +0000 @@ -546,6 +546,7 @@ int main_loop(void) { extern int vm_running; extern int shutdown_requested; + extern int suspend_requested; CPUState *env = cpu_single_env; int evtchn_fd = xc_evtchn_fd(xce_handle); @@ -563,12 +564,24 @@ int main_loop(void) qemu_system_reset(); reset_requested = 0; } + if (suspend_requested) { + fprintf(logfile, "device model received suspend signal!\n"); + break; + } } /* Wait up to 10 msec. */ main_loop_wait(10); } - destroy_hvm_domain(); + if (!suspend_requested) + destroy_hvm_domain(); + else { + char qemu_file[20]; + sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid); + if (qemu_savevm(qemu_file) < 0) + fprintf(stderr, "qemu save fail.\n"); + } + return 0; } diff -r 58637a0a7c7e -r e2ca6bd16046 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/ioemu/vl.c Thu Jan 18 18:25:04 2007 +0000 @@ -4441,6 +4441,11 @@ int qemu_loadvm(const char *filename) qemu_fseek(f, cur_pos + record_len, SEEK_SET); } fclose(f); + + /* del tmp file */ + if (unlink(filename) == -1) + fprintf(stderr, "delete tmp qemu state file failed.\n"); + ret = 0; the_end: if (saved_vm_running) @@ -5027,6 +5032,7 @@ static QEMUResetEntry *first_reset_entry static QEMUResetEntry *first_reset_entry; int reset_requested; int shutdown_requested; +int suspend_requested; static int powerdown_requested; void qemu_register_reset(QEMUResetHandler *func, void *opaque) @@ -5806,6 +5812,14 @@ int set_mm_mapping(int xc_handle, uint32 } return 0; +} + +void suspend(int sig) +{ + fprintf(logfile, "suspend sig handler called with requested=%d!\n", suspend_requested); + if (sig != SIGUSR1) + fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig); + suspend_requested = 1; } #if defined(__i386__) || defined(__x86_64__) @@ -6714,6 +6728,26 @@ int main(int argc, char **argv) vm_start(); } } + + /* register signal for the suspend request when save */ + { + struct sigaction act; + sigset_t set; + act.sa_handler = suspend; + act.sa_flags = SA_RESTART; + sigemptyset(&act.sa_mask); + + sigaction(SIGUSR1, &act, NULL); + + /* control panel mask some signals when spawn qemu, need unmask here*/ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + sigaddset(&set, SIGTERM); + if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1) + fprintf(stderr, "unblock signal fail, possible issue for HVM save!\n"); + + } + main_loop(); quit_timers(); return 0; diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libfsimage/common/Makefile --- a/tools/libfsimage/common/Makefile Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/libfsimage/common/Makefile Thu Jan 18 18:25:04 2007 +0000 @@ -1,7 +1,7 @@ XEN_ROOT = ../../.. XEN_ROOT = ../../.. include $(XEN_ROOT)/tools/Rules.mk -MAJOR = 1.1 +MAJOR = 1.0 MINOR = 0 CFLAGS += -Werror -Wp,-MD,.$(@F).d diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libxc/Makefile --- a/tools/libxc/Makefile Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/libxc/Makefile Thu Jan 18 18:25:04 2007 +0000 @@ -27,7 +27,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_linux_bui GUEST_SRCS-$(CONFIG_X86) += xc_linux_build.c GUEST_SRCS-$(CONFIG_IA64) += xc_linux_build.c GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c -include $(XEN_TARGET_ARCH)/Makefile diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/libxc/xc_domain.c Thu Jan 18 18:25:04 2007 +0000 @@ -233,6 +233,50 @@ int xc_domain_getinfolist(int xc_handle, unlock_pages(info, max_domains*sizeof(xc_domaininfo_t)); return ret; +} + +/* get info from hvm guest for save */ +int xc_domain_hvm_getcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt) +{ + int rc; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_gethvmcontext; + domctl.domain = (domid_t)domid; + set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt); + + if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 ) + return rc; + + rc = do_domctl(xc_handle, &domctl); + + safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt)); + + return rc; +} + +/* set info to hvm guest for restore */ +int xc_domain_hvm_setcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt) +{ + int rc; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_sethvmcontext; + domctl.domain = domid; + set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt); + + if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 ) + return rc; + + rc = do_domctl(xc_handle, &domctl); + + safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt)); + + return rc; } int xc_vcpu_getcontext(int xc_handle, diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libxc/xc_hvm_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_hvm_restore.c Thu Jan 18 18:25:04 2007 +0000 @@ -0,0 +1,360 @@ +/****************************************************************************** + * xc_hvm_restore.c + * + * Restore the state of a HVM guest. + * + * Copyright (c) 2003, K A Fraser. + * Copyright (c) 2006 Intel Corperation + * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <stdlib.h> +#include <unistd.h> + +#include "xg_private.h" +#include "xg_save_restore.h" + +#include <xen/hvm/ioreq.h> +#include <xen/hvm/params.h> +#include <xen/hvm/e820.h> + +/* max mfn of the whole machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the currrent guest */ +static unsigned int pt_levels; + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +/* A table mapping each PFN to its new MFN. */ +static xen_pfn_t *p2m = NULL; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +int xc_hvm_restore(int xc_handle, int io_fd, + uint32_t dom, unsigned long nr_pfns, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int pae, unsigned int apic) +{ + DECLARE_DOMCTL; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + char *region_base; + + unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; + + xc_dominfo_t info; + unsigned int rc = 1, n, i; + uint32_t rec_len, nr_vcpus; + hvm_domain_context_t hvm_ctxt; + unsigned long long v_end, memsize; + unsigned long shared_page_nr; + + unsigned long mfn, pfn; + unsigned int prev_pc, this_pc; + int verify = 0; + + /* Types of the pfns in the current region */ + unsigned long region_pfn_type[MAX_BATCH_SIZE]; + + /* hvm guest mem size (Mb) */ + memsize = (unsigned long long)*store_mfn; + v_end = memsize << 20; + + DPRINTF("xc_hvm_restore:dom=%d, nr_pfns=0x%lx, store_evtchn=%d, *store_mfn=%ld, console_evtchn=%d, *console_mfn=%ld, pae=%u, apic=%u.\n", + dom, nr_pfns, store_evtchn, *store_mfn, console_evtchn, *console_mfn, pae, apic); + + max_pfn = nr_pfns; + + if(!get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels)) { + ERROR("Unable to get platform info."); + return 1; + } + + DPRINTF("xc_hvm_restore start: max_pfn = %lx, max_mfn = %lx, hvirt_start=%lx, pt_levels=%d\n", + max_pfn, + max_mfn, + hvirt_start, + pt_levels); + + if (mlock(&ctxt, sizeof(ctxt))) { + /* needed for build dom0 op, but might as well do early */ + ERROR("Unable to mlock ctxt"); + return 1; + } + + + p2m = malloc(max_pfn * sizeof(xen_pfn_t)); + + if (p2m == NULL) { + ERROR("memory alloc failed"); + errno = ENOMEM; + goto out; + } + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + errno = ENOMEM; + goto out; + } + + for ( i = 0; i < max_pfn; i++ ) + p2m[i] = i; + for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < max_pfn; i++ ) + p2m[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; + + /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, (max_pfn > 0xa0) ? 0xa0 : max_pfn, + 0, 0, &p2m[0x00]); + if ( (rc == 0) && (max_pfn > 0xc0) ) + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, max_pfn - 0xc0, 0, 0, &p2m[0xc0]); + if ( rc != 0 ) + { + PERROR("Could not allocate memory for HVM guest.\n"); + goto out; + } + + + /**********XXXXXXXXXXXXXXXX******************/ + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("Could not get domain info"); + return 1; + } + + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + + for ( i = 0; i < max_pfn; i++) + p2m[i] = i; + + prev_pc = 0; + + n = 0; + while (1) { + + int j; + + this_pc = (n * 100) / max_pfn; + if ( (this_pc - prev_pc) >= 5 ) + { + PPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + if (!read_exact(io_fd, &j, sizeof(int))) { + ERROR("HVM restore Error when reading batch size"); + goto out; + } + + PPRINTF("batch %d\n",j); + + if (j == -1) { + verify = 1; + DPRINTF("Entering page verify mode\n"); + continue; + } + + if (j == 0) + break; /* our work here is done */ + + if (j > MAX_BATCH_SIZE) { + ERROR("Max batch size exceeded. Giving up."); + goto out; + } + + if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + ERROR("Error when reading region pfn types"); + goto out; + } + + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_WRITE, region_pfn_type, j); + + for ( i = 0; i < j; i++ ) + { + void *page; + + pfn = region_pfn_type[i]; + if ( pfn > max_pfn ) + { + ERROR("pfn out of range"); + goto out; + } + + if ( pfn >= 0xa0 && pfn < 0xc0) { + ERROR("hvm restore:pfn in vga hole"); + goto out; + } + + + mfn = p2m[pfn]; + + /* In verify mode, we use a copy; otherwise we work in place */ + page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); + + if (!read_exact(io_fd, page, PAGE_SIZE)) { + ERROR("Error when reading page (%x)", i); + goto out; + } + + if (verify) { + + int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); + + if (res) { + + int v; + + DPRINTF("************** pfn=%lx mfn=%lx gotcs=%08lx " + "actualcs=%08lx\n", pfn, p2m[pfn], + csum_page(region_base + i*PAGE_SIZE), + csum_page(buf)); + + for (v = 0; v < 4; v++) { + + unsigned long *p = (unsigned long *) + (region_base + i*PAGE_SIZE); + if (buf[v] != p[v]) + DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); + } + } + } + + } /* end of 'batch' for loop */ + munmap(region_base, j*PAGE_SIZE); + n+= j; /* crude stats */ + + }/*while 1*/ + +/* xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/ + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + + if ( v_end > HVM_BELOW_4G_RAM_END ) + shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; + else + shared_page_nr = (v_end >> PAGE_SHIFT) - 1; + + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); + + /* caculate the store_mfn , wrong val cause hang when introduceDomain */ + *store_mfn = (v_end >> PAGE_SHIFT) - 2; + DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", *store_mfn, v_end); + + /* restore hvm context including pic/pit/shpage */ + if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + ERROR("error read hvm context size!\n"); + goto out; + } + if (rec_len != sizeof(hvm_ctxt)) { + ERROR("hvm context size dismatch!\n"); + goto out; + } + + if (!read_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt))) { + ERROR("error read hvm context!\n"); + goto out; + } + + if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, &hvm_ctxt))) { + ERROR("error set hvm context!\n"); + goto out; + } + + if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { + ERROR("error read nr vcpu !\n"); + goto out; + } + DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus); + + for (i =0; i < nr_vcpus; i++) { + if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + ERROR("error read vcpu context size!\n"); + goto out; + } + if (rec_len != sizeof(ctxt)) { + ERROR("vcpu context size dismatch!\n"); + goto out; + } + + if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) { + ERROR("error read vcpu context.\n"); + goto out; + } + + if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) { + ERROR("Could not set vcpu context, rc=%d", rc); + goto out; + } + } + + rc = 0; + goto out; + + out: + if ( (rc != 0) && (dom != 0) ) + xc_domain_destroy(xc_handle, dom); + free(p2m); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libxc/xc_hvm_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_hvm_save.c Thu Jan 18 18:25:04 2007 +0000 @@ -0,0 +1,727 @@ +/****************************************************************************** + * xc_hvm_save.c + * + * Save the state of a running HVM guest. + * + * Copyright (c) 2003, K A Fraser. + * Copyright (c) 2006 Intel Corperation + * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <inttypes.h> +#include <time.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> + +#include "xc_private.h" +#include "xg_private.h" +#include "xg_save_restore.h" + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_hvm_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + +/* max mfn of the whole machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the currrent guest */ +static unsigned int pt_levels; + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline int permute( int i, int nr, int order_nr ) +{ + /* Need a simple permutation function so that we scan pages in a + pseudo random order, enabling us to get a better estimate of + the domain's page dirtying rate as we go (there are often + contiguous ranges of pfns that have similar behaviour, and we + want to mix them up. */ + + /* e.g. nr->oder 15->4 16->4 17->5 */ + /* 512MB domain, 128k pages, order 17 */ + + /* + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA + */ + + /* + QPONMLKJIHGFEDCBA + EDCBA + QPONM + LKJIHGF + */ + + do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } + while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ + + return i; +} + +static uint64_t tv_to_us(struct timeval *new) +{ + return (new->tv_sec * 1000000) + new->tv_usec; +} + +static uint64_t llgettimeofday(void) +{ + struct timeval now; + gettimeofday(&now, NULL); + return tv_to_us(&now); +} + +static uint64_t tv_delta(struct timeval *new, struct timeval *old) +{ + return ((new->tv_sec - old->tv_sec)*1000000 ) + + (new->tv_usec - old->tv_usec); +} + + +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define initialize_mbit_rate() + +static inline ssize_t write_exact(int fd, void *buf, size_t count) +{ + if(write(fd, buf, count) != count) + return 0; + return 1; +} + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + xc_shadow_op_stats_t *stats, int print) +{ + static struct timeval wall_last; + static long long d0_cpu_last; + static long long d1_cpu_last; + + struct timeval wall_now; + long long wall_delta; + long long d0_cpu_now, d0_cpu_delta; + long long d1_cpu_now, d1_cpu_delta; + + gettimeofday(&wall_now, NULL); + + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; + d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; + + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + DPRINTF("ARRHHH!!\n"); + + wall_delta = tv_delta(&wall_now,&wall_last)/1000; + + if (wall_delta == 0) wall_delta = 1; + + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; + d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; + + if (print) + DPRINTF( + "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " + "dirtied %dMb/s %" PRId32 " pages\n", + wall_delta, + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), + (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), + stats->dirty_count); + + d0_cpu_last = d0_cpu_now; + d1_cpu_last = d1_cpu_now; + wall_last = wall_now; + + return 0; +} + +static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, + unsigned long *arr, int runs) +{ + long long start, now; + xc_shadow_op_stats_t stats; + int j; + + start = llgettimeofday(); + + for (j = 0; j < runs; j++) { + int i; + + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + arr, max_pfn, NULL, 0, NULL); + DPRINTF("#Flush\n"); + for ( i = 0; i < 40; i++ ) { + usleep(50000); + now = llgettimeofday(); + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, + NULL, 0, NULL, 0, &stats); + + DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", + ((now-start)+500)/1000, + stats.fault_count, stats.dirty_count); + } + } + + return -1; +} + +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info, + vcpu_guest_context_t *ctxt) +{ + int i = 0; + + if (!(*suspend)(dom)) { + ERROR("Suspend request failed"); + return -1; + } + + retry: + + if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { + ERROR("Could not get domain info"); + return -1; + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) + ERROR("Could not get vcpu context"); + + + if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) + return 0; // success + + if (info->paused) { + // try unpausing domain, wait, and retest + xc_domain_unpause( xc_handle, dom ); + + ERROR("Domain was paused. Wait and re-test."); + usleep(10000); // 10ms + + goto retry; + } + + + if( ++i < 100 ) { + ERROR("Retry suspend domain."); + usleep(10000); // 10ms + goto retry; + } + + ERROR("Unable to suspend domain."); + + return -1; +} + +int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +{ + xc_dominfo_t info; + + int rc = 1, i, last_iter, iter = 0; + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); + int sent_last_iter, skip_this_iter; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A table containg the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + unsigned long *pfn_batch = NULL; + + /* A copy of hvm domain context */ + hvm_domain_context_t hvm_ctxt; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + /* base of the region in which domain memory is mapped */ + unsigned char *region_base = NULL; + + uint32_t nr_pfns, rec_size, nr_vcpus; + unsigned long *page_array = NULL; + + /* power of 2 order of max_pfn */ + int order_nr; + + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; */ + unsigned long *to_send = NULL, *to_skip = NULL; + + xc_shadow_op_stats_t stats; + + unsigned long total_sent = 0; + + DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, live=%d, debug=%d.\n", + dom, max_iters, max_factor, flags, + live, debug); + + /* If no explicit control parameters given, use defaults */ + if(!max_iters) + max_iters = DEF_MAX_ITERS; + if(!max_factor) + max_factor = DEF_MAX_FACTOR; + + initialize_mbit_rate(); + + if(!get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels)) { + ERROR("HVM:Unable to get platform info."); + return 1; + } + + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("HVM:Could not get domain info"); + return 1; + } + nr_vcpus = info.nr_online_vcpus; + + if (mlock(&ctxt, sizeof(ctxt))) { + ERROR("HVM:Unable to mlock ctxt"); + return 1; + } + + /* Only have to worry about vcpu 0 even for SMP */ + if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { + ERROR("HVM:Could not get vcpu context"); + goto out; + } + shared_info_frame = info.shared_info_frame; + + /* A cheesy test to see whether the domain contains valid state. */ + if (ctxt.ctrlreg[3] == 0) + { + ERROR("Domain is not in a valid HVM guest state"); + goto out; + } + + /* cheesy sanity check */ + if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { + ERROR("Invalid HVM state record -- pfn count out of range: %lu", + (info.max_memkb >> (PAGE_SHIFT - 10))); + goto out; + } + + /* Map the shared info frame */ + if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame))) { + ERROR("HVM:Couldn't map live_shinfo"); + goto out; + } + + max_pfn = live_shinfo->arch.max_pfn; + + DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); + + /* nr_pfns: total pages excluding vga acc mem + * max_pfn: nr_pfns + 0x20 vga hole(0xa0~0xc0) + * getdomaininfo.tot_pages: all the allocated pages for this domain + */ + if (live) { + ERROR("hvm domain doesn't support live migration now.\n"); + goto out; + + if (xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0) { + ERROR("Couldn't enable shadow mode"); + goto out; + } + + /* excludes vga acc mem */ + nr_pfns = info.nr_pages - 0x800; + + last_iter = 0; + DPRINTF("hvm domain live migration debug start: logdirty enable.\n"); + } else { + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + last_iter = 1; + + /* suspend hvm domain */ + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { + ERROR("HVM Domain appears not to have suspended"); + goto out; + } + nr_pfns = info.nr_pages; + DPRINTF("after suspend hvm domain nr_pages=0x%x.\n", nr_pfns); + } + + DPRINTF("after 1st handle hvm domain nr_pfns=0x%x, nr_pages=0x%lx, max_memkb=0x%lx, live=%d.\n", + nr_pfns, + info.nr_pages, + info.max_memkb, + live); + + nr_pfns = info.nr_pages; + + /*XXX: caculate the VGA hole*/ + max_pfn = nr_pfns + 0x20; + + skip_this_iter = 0;/*XXX*/ + /* pretend we sent all the pages last iteration */ + sent_last_iter = max_pfn; + + /* calculate the power of 2 order of max_pfn, e.g. + 15->4 16->4 17->5 */ + for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++) + continue; + + /* Setup to_send / to_fix and to_skip bitmaps */ + to_send = malloc(BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + + if (!to_send ||!to_skip) { + ERROR("Couldn't allocate to_send array"); + goto out; + } + + memset(to_send, 0xff, BITMAP_SIZE); + + if (lock_pages(to_send, BITMAP_SIZE)) { + ERROR("Unable to lock to_send"); + return 1; + } + + /* (to fix is local only) */ + if (lock_pages(to_skip, BITMAP_SIZE)) { + ERROR("Unable to lock to_skip"); + return 1; + } + + analysis_phase(xc_handle, dom, max_pfn, to_skip, 0); + + /* get all the HVM domain pfns */ + if ( (page_array = (unsigned long *) malloc (sizeof(unsigned long) * max_pfn)) == NULL) { + ERROR("HVM:malloc fail!\n"); + goto out; + } + + for ( i = 0; i < max_pfn; i++) + page_array[i] = i; + + + /* We want zeroed memory so use calloc rather than malloc. */ + pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type)); + pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); + + if ((pfn_type == NULL) || (pfn_batch == NULL)) { + ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + errno = ENOMEM; + goto out; + } + + if (lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type))) { + ERROR("Unable to lock"); + goto out; + } + + /* Start writing out the saved-domain record. */ + if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + ERROR("write: max_pfn"); + goto out; + } + + while(1) { + + unsigned int prev_pc, sent_this_iter, N, batch; + + iter++; + sent_this_iter = 0; + skip_this_iter = 0; + prev_pc = 0; + N=0; + + DPRINTF("Saving HVM domain memory pages: iter %d 0%%", iter); + + while( N < max_pfn ){ + + unsigned int this_pc = (N * 100) / max_pfn; + + if ((this_pc - prev_pc) >= 5) { + DPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + /* slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + if (!last_iter && xc_shadow_control( + xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, + to_skip, max_pfn, NULL, 0, NULL) != max_pfn) { + ERROR("Error peeking HVM shadow bitmap"); + goto out; + } + + + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ + for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) { + + int n = permute(N, max_pfn, order_nr); + + if (debug) { + DPRINTF("%d pfn= %08lx mfn= %08lx %d \n", + iter, (unsigned long)n, page_array[n], + test_bit(n, to_send)); + } + + if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) + skip_this_iter++; /* stats keeping */ + + if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter))) + continue; + + if (n >= 0xa0 && n < 0xc0) { +/* DPRINTF("get a vga hole pfn= %x.\n", n);*/ + continue; + } + /* + ** we get here if: + ** 1. page is marked to_send & hasn't already been re-dirtied + ** 2. (ignore to_skip in last iteration) + */ + + pfn_batch[batch] = n; + pfn_type[batch] = page_array[n]; + + batch++; + } + + if (batch == 0) + goto skip; /* vanishingly unlikely... */ + + /* map_foreign use pfns now !*/ + if ((region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_batch, batch)) == 0) { + ERROR("map batch failed"); + goto out; + } + + /* write num of pfns */ + if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { + ERROR("Error when writing to state file (2)"); + goto out; + } + + /* write all the pfns */ + if(!write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch)) { + ERROR("Error when writing to state file (3)"); + goto out; + } + + if (ratewrite(io_fd, region_base, PAGE_SIZE * batch) != PAGE_SIZE * batch) { + ERROR("ERROR when writting to state file (4)"); + goto out; + } + + + sent_this_iter += batch; + + munmap(region_base, batch*PAGE_SIZE); + + } /* end of this while loop for this iteration */ + + skip: + + total_sent += sent_this_iter; + + DPRINTF("\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); + + if (last_iter) { + print_stats( xc_handle, dom, sent_this_iter, &stats, 1); + + DPRINTF("Total pages sent= %ld (%.2fx)\n", + total_sent, ((float)total_sent)/max_pfn ); + } + + if (last_iter && debug){ + int minusone = -1; + memset(to_send, 0xff, BITMAP_SIZE); + debug = 0; + DPRINTF("Entering debug resend-all mode\n"); + + /* send "-1" to put receiver into debug mode */ + if(!write_exact(io_fd, &minusone, sizeof(int))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + + continue; + } + + if (last_iter) break; + + if (live) { + + + if( + ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > max_pfn*max_factor) ) { + + DPRINTF("Start last iteration for HVM domain\n"); + last_iter = 1; + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, + &ctxt)) { + ERROR("Domain appears not to have suspended"); + goto out; + } + + DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", + info.shared_info_frame, + (unsigned long)ctxt.user_regs.eip, + (unsigned long)ctxt.user_regs.edx); + } + + if (xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, + max_pfn, NULL, 0, &stats) != max_pfn) { + ERROR("Error flushing shadow PT"); + goto out; + } + + sent_last_iter = sent_this_iter; + + print_stats(xc_handle, dom, sent_this_iter, &stats, 1); + + } + + + } /* end of while 1 */ + + + DPRINTF("All HVM memory is saved\n"); + + /* Zero terminate */ + i = 0; + if (!write_exact(io_fd, &i, sizeof(int))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + + /* save hvm hypervisor state including pic/pit/shpage */ + if (mlock(&hvm_ctxt, sizeof(hvm_ctxt))) { + ERROR("Unable to mlock ctxt"); + return 1; + } + + if (xc_domain_hvm_getcontext(xc_handle, dom, &hvm_ctxt)){ + ERROR("HVM:Could not get hvm context"); + goto out; + } + + rec_size = sizeof(hvm_ctxt); + if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + ERROR("error write hvm ctxt size"); + goto out; + } + + if ( !write_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt)) ) { + ERROR("write HVM info failed!\n"); + } + + /* save vcpu/vmcs context */ + if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { + ERROR("error write nr vcpus"); + goto out; + } + + /*XXX: need a online map to exclude down cpu */ + for (i = 0; i < nr_vcpus; i++) { + + if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) { + ERROR("HVM:Could not get vcpu context"); + goto out; + } + + rec_size = sizeof(ctxt); + DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); + if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + ERROR("error write vcpu ctxt size"); + goto out; + } + + if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) { + ERROR("write vmcs failed!\n"); + goto out; + } + } + + /* Success! */ + rc = 0; + + out: + + if (live) { + if(xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0) { + DPRINTF("Warning - couldn't disable shadow mode"); + } + } + + free(page_array); + + free(pfn_type); + free(pfn_batch); + free(to_send); + free(to_skip); + + return !!rc; +} diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/libxc/xenctrl.h Thu Jan 18 18:25:04 2007 +0000 @@ -313,6 +313,30 @@ int xc_domain_getinfolist(int xc_handle, xc_domaininfo_t *info); /** + * This function returns information about the context of a hvm domain + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the domain to get information from + * @parm hvm_ctxt a pointer to a structure to store the execution context of the + * hvm domain + * @return 0 on success, -1 on failure + */ +int xc_domain_hvm_getcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt); + +/** + * This function will set the context for hvm domain + * + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the domain to set the hvm domain context for + * @parm hvm_ctxt pointer to the the hvm context with the values to set + * @return 0 on success, -1 on failure + */ +int xc_domain_hvm_setcontext(int xc_handle, + uint32_t domid, + hvm_domain_context_t *hvm_ctxt); + +/** * This function returns information about the execution context of a * particular vcpu of a domain. * diff -r 58637a0a7c7e -r e2ca6bd16046 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/libxc/xenguest.h Thu Jan 18 18:25:04 2007 +0000 @@ -11,6 +11,7 @@ #define XCFLAGS_LIVE 1 #define XCFLAGS_DEBUG 2 +#define XCFLAGS_HVM 4 /** @@ -25,6 +26,13 @@ int xc_linux_save(int xc_handle, int io_ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, int (*suspend)(int domid)); +/** + * This function will save a hvm domain running unmodified guest. + * @return 0 on success, -1 on failure + */ +int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, + int (*suspend)(int domid)); /** * This function will restore a saved domain running Linux. @@ -41,6 +49,18 @@ int xc_linux_restore(int xc_handle, int unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn); + +/** + * This function will restore a saved hvm domain running unmodified guest. + * + * @parm store_mfn pass mem size & returned with the mfn of the store page + * @return 0 on success, -1 on failure + */ +int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, + unsigned long *store_mfn, unsigned int console_evtchn, + unsigned long *console_mfn, + unsigned int pae, unsigned int apic); /** * This function will create a domain for a paravirtualized Linux diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/setup.py --- a/tools/python/setup.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/setup.py Thu Jan 18 18:25:04 2007 +0000 @@ -30,12 +30,23 @@ xs = Extension("xs", libraries = libraries, sources = [ "xen/lowlevel/xs/xs.c" ]) +scf = Extension("scf", + extra_compile_args = extra_compile_args, + include_dirs = include_dirs + [ "xen/lowlevel/scf" ], + library_dirs = library_dirs, + libraries = libraries, + sources = [ "xen/lowlevel/scf/scf.c" ]) + acm = Extension("acm", extra_compile_args = extra_compile_args, include_dirs = include_dirs + [ "xen/lowlevel/acm" ], library_dirs = library_dirs, libraries = libraries, sources = [ "xen/lowlevel/acm/acm.c" ]) + +modules = [ xc, xs, acm ] +if os.uname()[0] == 'SunOS': + modules.append(scf) setup(name = 'xen', version = '3.0', @@ -56,7 +67,7 @@ setup(name = 'xen', 'xen.xm.tests' ], ext_package = "xen.lowlevel", - ext_modules = [ xc, xs, acm ] + ext_modules = modules ) os.chdir('logging') diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/lowlevel/scf/scf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/lowlevel/scf/scf.c Thu Jan 18 18:25:04 2007 +0000 @@ -0,0 +1,156 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <Python.h> + +#include <libscf.h> +#include <stdio.h> + +#define XEND_FMRI "svc:/system/xen/xend:default" +#define XEND_PG "config" + +static PyObject *scf_exc; + +static void * +scf_exception(const char *err, const char *value) +{ + int scferr = scf_error(); + const char *scfstrerr = scf_strerror(scferr); + PyObject *obj = Py_BuildValue("(isss)", scferr, err, scfstrerr, value); + PyErr_SetObject(scf_exc, obj); + return (NULL); +} + +static PyObject * +pyscf_get_bool(PyObject *o, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "name", NULL }; + scf_simple_prop_t *prop; + uint8_t *val; + char *name; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name)) + return (NULL); + + prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name); + + if (prop == NULL) + return (scf_exception("scf_simple_prop_get() failed", name)); + + if ((val = scf_simple_prop_next_boolean(prop)) == NULL) + return (scf_exception("scf_simple_prop_next_boolean() failed", + name)); + + if (*val) { + scf_simple_prop_free(prop); + Py_INCREF(Py_True); + return (Py_True); + } + + scf_simple_prop_free(prop); + Py_INCREF(Py_False); + return (Py_False); +} + +static PyObject * +pyscf_get_int(PyObject *o, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "name", NULL }; + scf_simple_prop_t *prop; + PyObject *obj; + int64_t *val; + char *name; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name)) + return (NULL); + + prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name); + + if (prop == NULL) + return (scf_exception("scf_simple_prop_get() failed", name)); + + if ((val = scf_simple_prop_next_integer(prop)) == NULL) + return (scf_exception("scf_simple_prop_next_integer() failed", + name)); + + obj = PyInt_FromLong((long)*val); + scf_simple_prop_free(prop); + return (obj); +} + +static PyObject * +pyscf_get_string(PyObject *o, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "name", NULL }; + scf_simple_prop_t *prop; + PyObject *obj; + char *name; + char *str; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name)) + return (NULL); + + prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name); + + if (prop == NULL) + return (scf_exception("scf_simple_prop_get() failed", name)); + + if ((str = scf_simple_prop_next_astring(prop)) == NULL) { + scf_simple_prop_free(prop); + return (scf_exception("scf_simple_prop_next_astring() failed", + name)); + } + + obj = PyString_FromString(str); + scf_simple_prop_free(prop); + return (obj); +} + +PyDoc_STRVAR(pyscf_get_bool__doc__, + "get_bool(name) - get the value of the named boolean property"); +PyDoc_STRVAR(pyscf_get_int__doc__, + "get_int(name) - get the value of the named integer property"); +PyDoc_STRVAR(pyscf_get_string__doc__, + "get_string(name) - get the value of the named string property"); + +static struct PyMethodDef pyscf_module_methods[] = { + { "get_bool", (PyCFunction) pyscf_get_bool, + METH_VARARGS|METH_KEYWORDS, pyscf_get_bool__doc__ }, + { "get_int", (PyCFunction) pyscf_get_int, + METH_VARARGS|METH_KEYWORDS, pyscf_get_int__doc__ }, + { "get_string", (PyCFunction) pyscf_get_string, + METH_VARARGS|METH_KEYWORDS, pyscf_get_string__doc__ }, + { NULL, NULL, 0, NULL } +}; + +PyMODINIT_FUNC +initscf(void) +{ + PyObject *m; + m = Py_InitModule("scf", pyscf_module_methods); + + scf_exc = PyErr_NewException("scf.error", NULL, NULL); + Py_INCREF(scf_exc); + PyModule_AddObject(m, "error", scf_exc); + PyModule_AddIntConstant(m, "SCF_ERROR_NOT_FOUND", SCF_ERROR_NOT_FOUND); +} diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/lowlevel/xc/xc.c Thu Jan 18 18:25:04 2007 +0000 @@ -158,6 +158,20 @@ static PyObject *pyxc_domain_destroy(XcO static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args) { return dom_op(self, args, xc_domain_destroy); +} + +static PyObject *pyxc_domain_shutdown(XcObject *self, PyObject *args) +{ + uint32_t dom, reason; + + if (!PyArg_ParseTuple(args, "ii", &dom, &reason)) + return NULL; + + if (xc_domain_shutdown(self->xc_handle, dom, reason) != 0) + return pyxc_error_to_exception(); + + Py_INCREF(zero); + return zero; } @@ -1027,6 +1041,14 @@ static PyMethodDef pyxc_methods[] = { METH_VARARGS, "\n" "Destroy a domain.\n" " dom [int]: Identifier of domain to be destroyed.\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "domain_shutdown", + (PyCFunction)pyxc_domain_shutdown, + METH_VARARGS, "\n" + "Shutdown a domain.\n" + " dom [int, 0]: Domain identifier to use.\n" + " reason [int, 0]: Reason for shutdown.\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "vcpu_setaffinity", diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/util/xmlrpclib2.py --- a/tools/python/xen/util/xmlrpclib2.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/util/xmlrpclib2.py Thu Jan 18 18:25:04 2007 +0000 @@ -256,6 +256,7 @@ class UnixXMLRPCRequestHandler(XMLRPCReq class UnixXMLRPCServer(TCPXMLRPCServer): address_family = socket.AF_UNIX + allow_address_reuse = True def __init__(self, addr, allowed, xenapi, logRequests = 1): mkdir.parents(os.path.dirname(addr), stat.S_IRWXU, True) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/web/httpserver.py --- a/tools/python/xen/web/httpserver.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/web/httpserver.py Thu Jan 18 18:25:04 2007 +0000 @@ -294,8 +294,6 @@ class HttpServer: backlog = 5 - closed = False - def __init__(self, root, interface, port=8080): self.root = root self.interface = interface @@ -303,6 +301,7 @@ class HttpServer: # ready indicates when we are ready to begin accept connections # it should be set after a successful bind self.ready = False + self.closed = False def run(self): self.bind() @@ -316,7 +315,6 @@ class HttpServer: def stop(self): self.close() - def bind(self): self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -334,7 +332,10 @@ class HttpServer: def close(self): self.closed = True - try: + self.ready = False + try: + # shutdown socket explicitly to allow reuse + self.socket.shutdown(socket.SHUT_RDWR) self.socket.close() except: pass @@ -344,6 +345,9 @@ class HttpServer: def getResource(self, req): return self.root.getRequestResource(req) + + def shutdown(self): + self.close() class UnixHttpServer(HttpServer): diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/Vifctl.py --- a/tools/python/xen/xend/Vifctl.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/Vifctl.py Thu Jan 18 18:25:04 2007 +0000 @@ -20,7 +20,7 @@ """ import os -import XendRoot +import XendOptions def network(op): @@ -30,7 +30,7 @@ def network(op): """ if op not in ['start', 'stop']: raise ValueError('Invalid operation: ' + op) - script = XendRoot.instance().get_network_script() + script = XendOptions.instance().get_network_script() if script: script.insert(1, op) os.spawnv(os.P_WAIT, script[0], script) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/XendCheckpoint.py Thu Jan 18 18:25:04 2007 +0000 @@ -22,11 +22,14 @@ from xen.xend.XendConstants import * from xen.xend.XendConstants import * SIGNATURE = "LinuxGuestRecord" +QEMU_SIGNATURE = "QemuDeviceModelRecord" +dm_batch = 512 XC_SAVE = "xc_save" XC_RESTORE = "xc_restore" sizeof_int = calcsize("i") +sizeof_unsigned_int = calcsize("I") sizeof_unsigned_long = calcsize("L") @@ -69,6 +72,11 @@ def save(fd, dominfo, network, live, dst "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") + image_cfg = dominfo.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + + if hvm: + log.info("save hvm domain") # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is @@ -76,7 +84,7 @@ def save(fd, dominfo, network, live, dst # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), - str(dominfo.getDomid()), "0", "0", str(int(live)) ] + str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): @@ -90,11 +98,28 @@ def save(fd, dominfo, network, live, dst log.info("Domain %d suspended.", dominfo.getDomid()) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) + #send signal to device model for save + if hvm: + log.info("release_devices for hvm domain") + dominfo._releaseDevices(True) tochild.write("done\n") tochild.flush() log.debug('Written done') forkHelper(cmd, fd, saveInputHandler, False) + + # put qemu device model state + if hvm: + write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature") + qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), os.O_RDONLY) + while True: + buf = os.read(qemu_fd, dm_batch) + if len(buf): + write_exact(fd, buf, "could not write device model state") + else: + break + os.close(qemu_fd) + os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid()) dominfo.destroyDomain() try: @@ -149,19 +174,49 @@ def restore(xd, fd, dominfo = None, paus nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 + # if hvm, pass mem size to calculate the store_mfn + hvm = 0 + apic = 0 + pae = 0 + image_cfg = dominfo.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + if hvm: + # the 'memory' in config has been removed + hvm = dominfo.info['memory_static_min'] + apic = dominfo.info['image']['hvm'].get('apic', 0) + pae = dominfo.info['image']['hvm'].get('pae', 0) + log.info("restore hvm domain %d, mem=%d, apic=%d, pae=%d", + dominfo.domid, hvm, apic, pae) + try: - l = read_exact(fd, sizeof_unsigned_long, - "not a valid guest state file: pfn count read") + if hvm: + l = read_exact(fd, sizeof_unsigned_int, + "not a valid hvm guest state file: pfn count read") + nr_pfns = unpack("I", l)[0] # native sizeof int + else: + l = read_exact(fd, sizeof_unsigned_long, + "not a valid guest state file: pfn count read") + max_pfn = unpack("L", l)[0] # native sizeof long if max_pfn > 16*1024*1024: # XXX raise XendError( "not a valid guest state file: pfn count out of range") - balloon.free(xc.pages_to_kib(nr_pfns)) + shadow = dominfo.info['shadow_memory'] + log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, " + "nr_pfns=0x%x.", dominfo.info['shadow_memory'], + dominfo.info['memory_static_max'], + dominfo.info['memory_static_min'], nr_pfns) + + + balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024) + + shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow) + dominfo.info['shadow_memory'] = shadow_cur cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), max_pfn, - store_port, console_port]) + store_port, console_port, hvm, pae, apic]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() @@ -171,10 +226,30 @@ def restore(xd, fd, dominfo = None, paus if handler.store_mfn is None or handler.console_mfn is None: raise XendError('Could not read store/console MFN') - os.read(fd, 1) # Wait for source to close connection dominfo.waitForDevices() # Wait for backends to set up if not paused: dominfo.unpause() + + # get qemu state and create a tmp file for dm restore + if hvm: + qemu_signature = read_exact(fd, len(QEMU_SIGNATURE), + "invalid device model signature read") + if qemu_signature != QEMU_SIGNATURE: + raise XendError("not a valid device model state: found '%s'" % + qemu_signature) + qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), + os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + while True: + buf = os.read(fd, dm_batch) + if len(buf): + write_exact(qemu_fd, buf, + "could not write dm state to tmp file") + else: + break + os.close(qemu_fd) + + + os.read(fd, 1) # Wait for source to close connection dominfo.completeRestore(handler.store_mfn, handler.console_mfn) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/XendConfig.py Thu Jan 18 18:25:04 2007 +0000 @@ -77,6 +77,25 @@ def scrub_password(data): return re.sub(r'\(vncpasswd\s+[^\)]+\)','(vncpasswd XXXXXX)', data) else: return data + +# +# CPU fields: +# +# vcpus_number -- the maximum number of vcpus that this domain may ever have. +# aka XendDomainInfo.getVCpuCount(). +# vcpus -- the legacy configuration name for above. +# max_vcpu_id -- vcpus_number - 1. This is given to us by Xen. +# +# cpus -- the list of pCPUs available to each vCPU. +# +# vcpu_avail: a bitmap telling the guest domain whether it may use each of +# its VCPUs. This is translated to +# <dompath>/cpu/<id>/availability = {online,offline} for use +# by the guest domain. +# online_vpcus -- the number of VCPUs currently up, as reported by Xen. This +# is changed by changing vcpu_avail, and waiting for the +# domain to respond. +# # Mapping from XendConfig configuration keys to the old @@ -185,7 +204,7 @@ LEGACY_CFG_TYPES = { 'uuid': str, 'name': str, 'vcpus': int, - 'vcpu_avail': int, + 'vcpu_avail': long, 'memory': int, 'shadow_memory': int, 'maxmem': int, @@ -355,9 +374,6 @@ class XendConfig(dict): 'cpu_weight': 256, 'cpu_cap': 0, 'vcpus_number': 1, - 'online_vcpus': 1, - 'max_vcpu_id': 0, - 'vcpu_avail': 1, 'console_refs': [], 'vif_refs': [], 'vbd_refs': [], @@ -389,7 +405,7 @@ class XendConfig(dict): event) def _vcpus_sanity_check(self): - if self.get('vcpus_number') != None: + if 'vcpus_number' in self and 'vcpu_avail' not in self: self['vcpu_avail'] = (1 << self['vcpus_number']) - 1 def _uuid_sanity_check(self): @@ -405,7 +421,7 @@ class XendConfig(dict): def _dominfo_to_xapi(self, dominfo): self['domid'] = dominfo['domid'] self['online_vcpus'] = dominfo['online_vcpus'] - self['max_vcpu_id'] = dominfo['max_vcpu_id'] + self['vcpus_number'] = dominfo['max_vcpu_id'] + 1 self['memory_dynamic_min'] = (dominfo['mem_kb'] + 1023)/1024 self['memory_dynamic_max'] = (dominfo['maxmem_kb'] + 1023)/1024 self['cpu_time'] = dominfo['cpu_time']/1e9 @@ -636,9 +652,6 @@ class XendConfig(dict): self['memory_dynamic_max'] = self['memory_static_max'] self['memory_dynamic_min'] = self['memory_static_min'] - # make sure max_vcpu_id is set correctly - self['max_vcpu_id'] = self['vcpus_number'] - 1 - # set device references in the configuration self['devices'] = cfg.get('devices', {}) @@ -720,13 +733,11 @@ class XendConfig(dict): _set_cfg_if_exists('on_xend_stop') _set_cfg_if_exists('on_xend_start') _set_cfg_if_exists('vcpu_avail') - _set_cfg_if_exists('max_vcpu_id') # needed for vcpuDomDetails _set_cfg_if_exists('cpu_weight') _set_cfg_if_exists('cpu_cap') # Parse and store runtime configuration _set_cfg_if_exists('start_time') - _set_cfg_if_exists('online_vcpus') _set_cfg_if_exists('cpu_time') _set_cfg_if_exists('shutdown_reason') _set_cfg_if_exists('up_time') @@ -1115,19 +1126,17 @@ class XendConfig(dict): # configuration log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp)) - kernel_args = "" + # user-specified args must come last: previous releases did this and + # some domU kernels rely upon the ordering. + kernel_args = sxp.child_value(image_sxp, 'args', '') # attempt to extract extra arguments from SXP config arg_ip = sxp.child_value(image_sxp, 'ip') if arg_ip and not re.search(r'ip=[^ ]+', kernel_args): - kernel_args += 'ip=%s ' % arg_ip + kernel_args = 'ip=%s ' % arg_ip + kernel_args arg_root = sxp.child_value(image_sxp, 'root') if arg_root and not re.search(r'root=', kernel_args): - kernel_args += 'root=%s ' % arg_root - - # user-specified args must come last: previous releases did this and - # some domU kernels rely upon the ordering. - kernel_args += sxp.child_value(image_sxp, 'args', '') + kernel_args = 'root=%s ' % arg_root + kernel_args if bootloader: self['_temp_using_bootloader'] = '1' diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/XendDomain.py Thu Jan 18 18:25:04 2007 +0000 @@ -32,7 +32,7 @@ import xen.lowlevel.xc import xen.lowlevel.xc -from xen.xend import XendRoot, XendCheckpoint, XendDomainInfo +from xen.xend import XendOptions, XendCheckpoint, XendDomainInfo from xen.xend.PrettyPrint import prettyprint from xen.xend.XendConfig import XendConfig from xen.xend.XendError import XendError, XendInvalidDomain, VmError @@ -51,7 +51,7 @@ from xen.xend import uuid from xen.xend import uuid xc = xen.lowlevel.xc.xc() -xroot = XendRoot.instance() +xoptions = XendOptions.instance() __all__ = [ "XendDomain" ] @@ -214,7 +214,7 @@ class XendDomain: @rtype: String @return: Path. """ - dom_path = xroot.get_xend_domains_path() + dom_path = xoptions.get_xend_domains_path() if domuuid: dom_path = os.path.join(dom_path, domuuid) return dom_path @@ -361,7 +361,7 @@ class XendDomain: def _setDom0CPUCount(self): """Sets the number of VCPUs dom0 has. Retreived from the - Xend configuration, L{XendRoot}. + Xend configuration, L{XendOptions}. @requires: Expects to be protected by domains_lock. @rtype: None @@ -369,7 +369,7 @@ class XendDomain: dom0 = self.privilegedDomain() # get max number of vcpus to use for dom0 from config - target = int(xroot.get_dom0_vcpus()) + target = int(xoptions.get_dom0_vcpus()) log.debug("number of vcpus to use is %d", target) # target == 0 means use all processors @@ -1164,7 +1164,7 @@ class XendDomain: dominfo.checkLiveMigrateMemory() if port == 0: - port = xroot.get_xend_relocation_port() + port = xoptions.get_xend_relocation_port() try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((dst, port)) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Jan 18 18:25:04 2007 +0000 @@ -38,7 +38,7 @@ from xen.util import security from xen.util import security from xen.xend import balloon, sxp, uuid, image, arch, osdep -from xen.xend import XendRoot, XendNode, XendConfig +from xen.xend import XendOptions, XendNode, XendConfig from xen.xend.XendConfig import scrub_password from xen.xend.XendBootloader import bootloader @@ -54,29 +54,10 @@ BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' xc = xen.lowlevel.xc.xc() -xroot = XendRoot.instance() +xoptions = XendOptions.instance() log = logging.getLogger("xend.XendDomainInfo") #log.setLevel(logging.TRACE) - - -# -# There are a number of CPU-related fields: -# -# vcpus: the number of virtual CPUs this domain is configured to use. -# vcpu_avail: a bitmap telling the guest domain whether it may use each of -# its VCPUs. This is translated to -# <dompath>/cpu/<id>/availability = {online,offline} for use -# by the guest domain. -# cpumap: a list of bitmaps, one for each VCPU, giving the physical -# CPUs that that VCPU may use. -# cpu: a configuration setting requesting that VCPU 0 is pinned to -# the specified physical CPU. -# -# vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent -# across save, restore, migrate, and restart). The other settings are only -# specific to the domain, so are lost when the VM moves. -# def create(config): @@ -451,6 +432,16 @@ class XendDomainInfo: self._removeVm('xend/previous_restart_time') self.storeDom("control/shutdown", reason) + ## shutdown hypercall for hvm domain desides xenstore write + image_cfg = self.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + if hvm: + for code in DOMAIN_SHUTDOWN_REASONS.keys(): + if DOMAIN_SHUTDOWN_REASONS[code] == reason: + break + xc.domain_shutdown(self.domid, code) + + def pause(self): """Pause domain @@ -614,7 +605,7 @@ class XendDomainInfo: ['name', self.info['name_label']], ['vcpu_count', self.info['vcpus_number']]] - for i in range(0, self.info['max_vcpu_id']+1): + for i in range(0, self.info['vcpus_number']): info = xc.vcpu_getinfo(self.domid, i) sxpr.append(['vcpu', @@ -739,7 +730,7 @@ class XendDomainInfo: 'domid': str(self.domid), 'vm': self.vmpath, 'name': self.info['name_label'], - 'console/limit': str(xroot.get_console_limit() * 1024), + 'console/limit': str(xoptions.get_console_limit() * 1024), 'memory/target': str(self.info['memory_static_min'] * 1024) } @@ -898,8 +889,9 @@ class XendDomainInfo: self._writeDom(self._vcpuDomDetails()) else: self.info['vcpus_number'] = vcpus - self.info['online_vcpus'] = vcpus xen.xend.XendDomain.instance().managed_config_save(self) + log.info("Set VCPU count on domain %s to %d", self.info['name_label'], + vcpus) def getLabel(self): return security.get_security_info(self.info, 'label') @@ -976,7 +968,7 @@ class XendDomainInfo: self.info['name_label'], self.domid) self._writeVm(LAST_SHUTDOWN_REASON, 'crash') - if xroot.get_enable_dump(): + if xoptions.get_enable_dump(): try: self.dumpCore() except XendError: @@ -1228,8 +1220,11 @@ class XendDomainInfo: if self.image: self.image.createDeviceModel() - def _releaseDevices(self): + def _releaseDevices(self, suspend = False): """Release all domain's devices. Nothrow guarantee.""" + if suspend and self.image: + self.image.destroy(suspend) + return while True: t = xstransact("%s/device" % self.dompath) @@ -1381,7 +1376,7 @@ class XendDomainInfo: # this is done prior to memory allocation to aide in memory # distribution for NUMA systems. if self.info['cpus'] is not None and len(self.info['cpus']) > 0: - for v in range(0, self.info['max_vcpu_id']+1): + for v in range(0, self.info['vcpus_number']): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) # Use architecture- and image-specific calculations to determine @@ -1395,6 +1390,7 @@ class XendDomainInfo: self.info['shadow_memory'] * 1024, self.info['memory_static_max'] * 1024) + log.debug("_initDomain:shadow_memory=0x%x, memory_static_max=0x%x, memory_static_min=0x%x.", self.info['shadow_memory'], self.info['memory_static_max'], self.info['memory_static_min'],) # Round shadow up to a multiple of a MiB, as shadow_mem_control # takes MiB and we must not round down and end up under-providing. shadow = ((shadow + 1023) / 1024) * 1024 @@ -1494,6 +1490,16 @@ class XendDomainInfo: self.console_mfn = console_mfn self._introduceDomain() + image_cfg = self.info.get('image', {}) + hvm = image_cfg.has_key('hvm') + if hvm: + self.image = image.create(self, + self.info, + self.info['image'], + self.info['devices']) + if self.image: + self.image.createDeviceModel(True) + self.image.register_shutdown_watch() self._storeDomDetails() self._registerWatches() self.refreshShutdown() @@ -2028,8 +2034,8 @@ class XendDomainInfo: # TODO: spec says that key is int, however, python does not allow # non-string keys to dictionaries. vcpu_util = {} - if 'max_vcpu_id' in self.info and self.domid != None: - for i in range(0, self.info['max_vcpu_id']+1): + if 'vcpus_number' in self.info and self.domid != None: + for i in range(0, self.info['vcpus_number']): info = xc.vcpu_getinfo(self.domid, i) vcpu_util[str(i)] = info['cpu_time']/1000000000.0 diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/XendNode.py Thu Jan 18 18:25:04 2007 +0000 @@ -24,7 +24,7 @@ from xen.util import Brctl from xen.xend import uuid from xen.xend.XendError import XendError, NetworkAlreadyConnected -from xen.xend.XendRoot import instance as xendroot +from xen.xend.XendOptions import instance as xendoptions from xen.xend.XendStorageRepository import XendStorageRepository from xen.xend.XendLogging import log from xen.xend.XendPIF import * @@ -45,7 +45,7 @@ class XendNode: """ self.xc = xen.lowlevel.xc.xc() - self.state_store = XendStateStore(xendroot().get_xend_state_path()) + self.state_store = XendStateStore(xendoptions().get_xend_state_path()) # load host state from XML file saved_host = self.state_store.load_state('host') diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendOptions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/XendOptions.py Thu Jan 18 18:25:04 2007 +0000 @@ -0,0 +1,373 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd +#============================================================================ + +"""Xend root class. +Creates the servers and handles configuration. + +Other classes get config variables by importing this module, +using instance() to get a XendOptions instance, and then +the config functions (e.g. get_xend_port()) to get +configured values. +""" + +import os +import os.path +import string +import sys + +from xen.xend import sxp, osdep, XendLogging +from xen.xend.XendError import XendError + +if os.uname()[0] == 'SunOS': + from xen.lowlevel import scf + +class XendOptions: + """Configuration options.""" + + """Where network control scripts live.""" + network_script_dir = osdep.scripts_dir + + """Where block control scripts live.""" + block_script_dir = osdep.scripts_dir + + """Default path to the log file. """ + logfile_default = "/var/log/xen/xend.log" + + """Default level of information to be logged.""" + loglevel_default = 'DEBUG' + + """Default Xen-API server configuration. """ + xen_api_server_default = [['unix']] + + """Default for the flag indicating whether xend should run an http server + (deprecated).""" + xend_http_server_default = 'no' + + xend_tcp_xmlrpc_server_default = 'no' + + xend_unix_xmlrpc_server_default = 'yes' + + """Default interface address xend listens at. """ + xend_address_default = '' + + """Default for the flag indicating whether xend should run a relocation server.""" + xend_relocation_server_default = 'no' + + """Default interface address the xend relocation server listens at. """ + xend_relocation_address_default = '' + + """Default port xend serves HTTP at. """ + xend_port_default = 8000 + + """Default port xend serves relocation at. """ + xend_relocation_port_default = 8002 + + xend_relocation_hosts_allow_default = '' + + """Default for the flag indicating whether xend should run a unix-domain + server (deprecated).""" + xend_unix_server_default = 'no' + + """Default external migration tool """ + external_migration_tool_default = '' + + """Default path the unix-domain server listens at.""" + xend_unix_path_default = '/var/lib/xend/xend-socket' + + dom0_min_mem_default = 0 + + dom0_vcpus_default = 0 + + vncpasswd_default = None + + """Default interface to listen for VNC connections on""" + xend_vnc_listen_default = '127.0.0.1' + + """Default session storage path.""" + xend_domains_path_default = '/var/lib/xend/domains' + + """Default xend management state storage.""" + xend_state_path_default = '/var/lib/xend/state' + + """Default type of backend network interfaces""" + netback_type = osdep.netback_type + + """Default script to configure a backend network interface""" + vif_script = osdep.vif_script + + def __init__(self): + self.configure() + + def _logError(self, fmt, *args): + """Logging function to log to stderr. We use this for XendOptions log + messages because they may be logged before the logger has been + configured. Other components can safely use the logger. + """ + print >>sys.stderr, "xend [ERROR]", fmt % args + + + def configure(self): + self.set_config() + XendLogging.init(self.get_config_string("logfile", + self.logfile_default), + self.get_config_string("loglevel", + self.loglevel_default)) + + def set_config(self): + raise NotImplementedError() + + def get_config_bool(self, name, val=None): + raise NotImplementedError() + + def get_config_int(self, name, val=None): + raise NotImplementedError() + + def get_config_string(self, name, val=None): + raise NotImplementedError() + + def get_xen_api_server(self): + raise NotImplementedError() + + def get_xend_http_server(self): + """Get the flag indicating whether xend should run an http server. + """ + return self.get_config_bool("xend-http-server", self.xend_http_server_default) + + def get_xend_tcp_xmlrpc_server(self): + return self.get_config_bool("xend-tcp-xmlrpc-server", + self.xend_tcp_xmlrpc_server_default) + + def get_xend_unix_xmlrpc_server(self): + return self.get_config_bool("xend-unix-xmlrpc-server", + self.xend_unix_xmlrpc_server_default) + + def get_xend_relocation_server(self): + """Get the flag indicating whether xend should run a relocation server. + """ + return self.get_config_bool("xend-relocation-server", + self.xend_relocation_server_default) + + def get_xend_port(self): + """Get the port xend listens at for its HTTP interface. + """ + return self.get_config_int('xend-port', self.xend_port_default) + + def get_xend_relocation_port(self): + """Get the port xend listens at for connection to its relocation server. + """ + return self.get_config_int('xend-relocation-port', + self.xend_relocation_port_default) + + def get_xend_relocation_hosts_allow(self): + return self.get_config_string("xend-relocation-hosts-allow", + self.xend_relocation_hosts_allow_default) + + def get_xend_address(self): + """Get the address xend listens at for its HTTP port. + This defaults to the empty string which allows all hosts to connect. + If this is set to 'localhost' only the localhost will be able to connect + to the HTTP port. + """ + return self.get_config_string('xend-address', self.xend_address_default) + + def get_xend_relocation_address(self): + """Get the address xend listens at for its relocation server port. + This defaults to the empty string which allows all hosts to connect. + If this is set to 'localhost' only the localhost will be able to connect + to the relocation port. + """ + return self.get_config_string('xend-relocation-address', self.xend_relocation_address_default) + + def get_xend_unix_server(self): + """Get the flag indicating whether xend should run a unix-domain server. + """ + return self.get_config_bool("xend-unix-server", self.xend_unix_server_default) + + def get_xend_unix_path(self): + """Get the path the xend unix-domain server listens at. + """ + return self.get_config_string("xend-unix-path", self.xend_unix_path_default) + + def get_xend_domains_path(self): + """ Get the path for persistent domain configuration storage + """ + return self.get_config_string("xend-domains-path", self.xend_domains_path_default) + + def get_xend_state_path(self): + """ Get the path for persistent domain configuration storage + """ + return self.get_config_string("xend-state-path", self.xend_state_path_default) + + def get_network_script(self): + """@return the script used to alter the network configuration when + Xend starts and stops, or None if no such script is specified.""" + + s = self.get_config_string('network-script') + + if s: + result = s.split(" ") + result[0] = os.path.join(self.network_script_dir, result[0]) + return result + else: + return None + + def get_external_migration_tool(self): + """@return the name of the tool to handle virtual TPM migration.""" + return self.get_config_string('external-migration-tool', self.external_migration_tool_default) + + def get_enable_dump(self): + return self.get_config_bool('enable-dump', 'no') + + def get_vif_script(self): + return self.get_config_string('vif-script', self.vif_script) + + def get_dom0_min_mem(self): + return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default) + + def get_dom0_vcpus(self): + return self.get_config_int('dom0-cpus', self.dom0_vcpus_default) + + def get_console_limit(self): + return self.get_config_int('console-limit', 1024) + + def get_vnclisten_address(self): + return self.get_config_string('vnc-listen', self.xend_vnc_listen_default) + + def get_vncpasswd_default(self): + return self.get_config_string('vncpasswd', + self.vncpasswd_default) + +class XendOptionsFile(XendOptions): + + """Default path to the config file.""" + config_default = "/etc/xen/xend-config.sxp" + + """Environment variable used to override config_default.""" + config_var = "XEND_CONFIG" + + def set_config(self): + """If the config file exists, read it. If not, ignore it. + + The config file is a sequence of sxp forms. + """ + self.config_path = os.getenv(self.config_var, self.config_default) + if os.path.exists(self.config_path): + try: + fin = file(self.config_path, 'rb') + try: + config = sxp.parse(fin) + finally: + fin.close() + if config is None: + config = ['xend-config'] + else: + config.insert(0, 'xend-config') + self.config = config + except Exception, ex: + self._logError('Reading config file %s: %s', + self.config_path, str(ex)) + raise + else: + self._logError('Config file does not exist: %s', + self.config_path) + self.config = ['xend-config'] + + def get_config_value(self, name, val=None): + """Get the value of an atomic configuration element. + + @param name: element name + @param val: default value (optional, defaults to None) + @return: value + """ + return sxp.child_value(self.config, name, val=val) + + def get_config_bool(self, name, val=None): + v = string.lower(str(self.get_config_value(name, val))) + if v in ['yes', 'y', '1', 'on', 'true', 't']: + return True + if v in ['no', 'n', '0', 'off', 'false', 'f']: + return False + raise XendError("invalid xend config %s: expected bool: %s" % (name, v)) + + def get_config_int(self, name, val=None): + v = self.get_config_value(name, val) + try: + return int(v) + except Exception: + raise XendError("invalid xend config %s: expected int: %s" % (name, v)) + + def get_config_string(self, name, val=None): + return self.get_config_value(name, val) + + def get_xen_api_server(self): + """Get the Xen-API server configuration. + """ + return self.get_config_value('xen-api-server', + self.xen_api_server_default) + +if os.uname()[0] == 'SunOS': + class XendOptionsSMF(XendOptions): + + def set_config(self): + pass + + def get_config_bool(self, name, val=None): + try: + return scf.get_bool(name) + except scf.error, e: + if e[0] == scf.SCF_ERROR_NOT_FOUND: + return val + else: + raise XendError("option %s: %s:%s" % (name, e[1], e[2])) + + def get_config_int(self, name, val=None): + try: + return scf.get_int(name) + except scf.error, e: + if e[0] == scf.SCF_ERROR_NOT_FOUND: + return val + else: + raise XendError("option %s: %s:%s" % (name, e[1], e[2])) + + def get_config_string(self, name, val=None): + try: + return scf.get_string(name) + except scf.error, e: + if e[0] == scf.SCF_ERROR_NOT_FOUND: + return val + else: + raise XendError("option %s: %s:%s" % (name, e[1], e[2])) + + def get_xen_api_server(self): + # When the new server is a supported configuration, we should + # expand this. + return [["unix"]] + +def instance(): + """Get an instance of XendOptions. + Use this instead of the constructor. + """ + global inst + try: + inst + except: + if os.uname()[0] == 'SunOS': + inst = XendOptionsSMF() + else: + inst = XendOptionsFile() + return inst diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendProtocol.py --- a/tools/python/xen/xend/XendProtocol.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/XendProtocol.py Thu Jan 18 18:25:04 2007 +0000 @@ -24,7 +24,7 @@ from encode import * from encode import * from xen.xend import sxp -from xen.xend import XendRoot +from xen.xend import XendOptions DEBUG = 0 @@ -34,7 +34,7 @@ HTTP_NO_CONTENT = 2 HTTP_NO_CONTENT = 204 -xroot = XendRoot.instance() +xoptions = XendOptions.instance() class XendError(RuntimeError): @@ -218,7 +218,7 @@ class UnixXendClientProtocol(HttpXendCli def __init__(self, path=None): if path is None: - path = xroot.get_xend_unix_path() + path = xoptions.get_xend_unix_path() self.path = path def makeConnection(self, _): diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Wed Jan 17 21:45:34 2007 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,322 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -# Copyright (C) 2005 XenSource Ltd -#============================================================================ - -"""Xend root class. -Creates the servers and handles configuration. - -Other classes get config variables by importing this module, -using instance() to get a XendRoot instance, and then -the config functions (e.g. get_xend_port()) to get -configured values. -""" - -import os -import os.path -import string -import sys - -from xen.xend import sxp, osdep, XendLogging -from xen.xend.XendError import XendError - -class XendRoot: - """Root of the management classes.""" - - """Default path to the config file.""" - config_default = "/etc/xen/xend-config.sxp" - - """Environment variable used to override config_default.""" - config_var = "XEND_CONFIG" - - """Where network control scripts live.""" - network_script_dir = osdep.scripts_dir - - """Where block control scripts live.""" - block_script_dir = osdep.scripts_dir - - """Default path to the log file. """ - logfile_default = "/var/log/xen/xend.log" - - """Default level of information to be logged.""" - loglevel_default = 'DEBUG' - - """Default Xen-API server configuration. """ - xen_api_server_default = [['unix']] - - """Default for the flag indicating whether xend should run an http server - (deprecated).""" - xend_http_server_default = 'no' - - xend_tcp_xmlrpc_server_default = 'no' - - xend_unix_xmlrpc_server_default = 'yes' - - """Default interface address xend listens at. """ - xend_address_default = '' - - """Default for the flag indicating whether xend should run a relocation server.""" - xend_relocation_server_default = 'no' - - """Default interface address the xend relocation server listens at. """ - xend_relocation_address_default = '' - - """Default port xend serves HTTP at. """ - xend_port_default = '8000' - - """Default port xend serves relocation at. """ - xend_relocation_port_default = '8002' - - xend_relocation_hosts_allow_default = '' - - """Default for the flag indicating whether xend should run a unix-domain - server (deprecated).""" - xend_unix_server_default = 'no' - - """Default external migration tool """ - external_migration_tool_default = '' - - """Default path the unix-domain server listens at.""" - xend_unix_path_default = '/var/lib/xend/xend-socket' - - dom0_min_mem_default = '0' - - dom0_vcpus_default = '0' - - vncpasswd_default = None - - """Default interface to listen for VNC connections on""" - xend_vnc_listen_default = '127.0.0.1' - - """Default session storage path.""" - xend_domains_path_default = '/var/lib/xend/domains' - - """Default xend management state storage.""" - xend_state_path_default = '/var/lib/xend/state' - - components = {} - - def __init__(self): - self.config_path = None - self.config = None - self.configure() - - - def _logError(self, fmt, *args): - """Logging function to log to stderr. We use this for XendRoot log - messages because they may be logged before the logger has been - configured. Other components can safely use the logger. - """ - print >>sys.stderr, "xend [ERROR]", fmt % args - - - def configure(self): - self.set_config() - XendLogging.init(self.get_config_value("logfile", - self.logfile_default), - self.get_config_value("loglevel", - self.loglevel_default)) - - - def set_config(self): - """If the config file exists, read it. If not, ignore it. - - The config file is a sequence of sxp forms. - """ - self.config_path = os.getenv(self.config_var, self.config_default) - if os.path.exists(self.config_path): - try: - fin = file(self.config_path, 'rb') - try: - config = sxp.parse(fin) - finally: - fin.close() - if config is None: - config = ['xend-config'] - else: - config.insert(0, 'xend-config') - self.config = config - except Exception, ex: - self._logError('Reading config file %s: %s', - self.config_path, str(ex)) - raise - else: - self._logError('Config file does not exist: %s', - self.config_path) - self.config = ['xend-config'] - - def get_config(self, name=None): - """Get the configuration element with the given name, or - the whole configuration if no name is given. - - @param name: element name (optional) - @return: config or none - """ - if name is None: - val = self.config - else: - val = sxp.child(self.config, name) - return val - - def get_config_value(self, name, val=None): - """Get the value of an atomic configuration element. - - @param name: element name - @param val: default value (optional, defaults to None) - @return: value - """ - return sxp.child_value(self.config, name, val=val) - - def get_config_bool(self, name, val=None): - v = string.lower(str(self.get_config_value(name, val))) - if v in ['yes', 'y', '1', 'on', 'true', 't']: - return True - if v in ['no', 'n', '0', 'off', 'false', 'f']: - return False - raise XendError("invalid xend config %s: expected bool: %s" % (name, v)) - - def get_config_int(self, name, val=None): - v = self.get_config_value(name, val) - try: - return int(v) - except Exception: - raise XendError("invalid xend config %s: expected int: %s" % (name, v)) - - def get_xen_api_server(self): - """Get the Xen-API server configuration. - """ - return self.get_config_value('xen-api-server', - self.xen_api_server_default) - - def get_xend_http_server(self): - """Get the flag indicating whether xend should run an http server. - """ - return self.get_config_bool("xend-http-server", self.xend_http_server_default) - - def get_xend_tcp_xmlrpc_server(self): - return self.get_config_bool("xend-tcp-xmlrpc-server", - self.xend_tcp_xmlrpc_server_default) - - def get_xend_unix_xmlrpc_server(self): - return self.get_config_bool("xend-unix-xmlrpc-server", - self.xend_unix_xmlrpc_server_default) - - def get_xend_relocation_server(self): - """Get the flag indicating whether xend should run a relocation server. - """ - return self.get_config_bool("xend-relocation-server", - self.xend_relocation_server_default) - - def get_xend_port(self): - """Get the port xend listens at for its HTTP interface. - """ - return self.get_config_int('xend-port', self.xend_port_default) - - def get_xend_relocation_port(self): - """Get the port xend listens at for connection to its relocation server. - """ - return self.get_config_int('xend-relocation-port', - self.xend_relocation_port_default) - - def get_xend_relocation_hosts_allow(self): - return self.get_config_value("xend-relocation-hosts-allow", - self.xend_relocation_hosts_allow_default) - - def get_xend_address(self): - """Get the address xend listens at for its HTTP port. - This defaults to the empty string which allows all hosts to connect. - If this is set to 'localhost' only the localhost will be able to connect - to the HTTP port. - """ - return self.get_config_value('xend-address', self.xend_address_default) - - def get_xend_relocation_address(self): - """Get the address xend listens at for its relocation server port. - This defaults to the empty string which allows all hosts to connect. - If this is set to 'localhost' only the localhost will be able to connect - to the relocation port. - """ - return self.get_config_value('xend-relocation-address', self.xend_relocation_address_default) - - def get_xend_unix_server(self): - """Get the flag indicating whether xend should run a unix-domain server. - """ - return self.get_config_bool("xend-unix-server", self.xend_unix_server_default) - - def get_xend_unix_path(self): - """Get the path the xend unix-domain server listens at. - """ - return self.get_config_value("xend-unix-path", self.xend_unix_path_default) - - def get_xend_domains_path(self): - """ Get the path for persistent domain configuration storage - """ - return self.get_config_value("xend-domains-path", self.xend_domains_path_default) - - def get_xend_state_path(self): - """ Get the path for persistent domain configuration storage - """ - return self.get_config_value("xend-state-path", self.xend_state_path_default) - - def get_network_script(self): - """@return the script used to alter the network configuration when - Xend starts and stops, or None if no such script is specified.""" - - s = self.get_config_value('network-script') - - if s: - result = s.split(" ") - result[0] = os.path.join(self.network_script_dir, result[0]) - return result - else: - return None - - def get_external_migration_tool(self): - """@return the name of the tool to handle virtual TPM migration.""" - return self.get_config_value('external-migration-tool', self.external_migration_tool_default) - - def get_enable_dump(self): - return self.get_config_bool('enable-dump', 'no') - - def get_vif_script(self): - return self.get_config_value('vif-script', 'vif-bridge') - - def get_dom0_min_mem(self): - return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default) - - def get_dom0_vcpus(self): - return self.get_config_int('dom0-cpus', self.dom0_vcpus_default) - - def get_console_limit(self): - return self.get_config_int('console-limit', 1024) - - def get_vnclisten_address(self): - return self.get_config_value('vnc-listen', self.xend_vnc_listen_default) - - def get_vncpasswd_default(self): - return self.get_config_value('vncpasswd', - self.vncpasswd_default) - -def instance(): - """Get an instance of XendRoot. - Use this instead of the constructor. - """ - global inst - try: - inst - except: - inst = XendRoot() - return inst diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/balloon.py Thu Jan 18 18:25:04 2007 +0000 @@ -22,7 +22,7 @@ import xen.lowlevel.xc import xen.lowlevel.xc import XendDomain -import XendRoot +import XendOptions from XendLogging import log from XendError import VmError @@ -107,11 +107,11 @@ def free(need_mem): # usage, so we recheck the required alloc each time around the loop, but # track the last used value so that we don't trigger too many watches. - xroot = XendRoot.instance() + xoptions = XendOptions.instance() xc = xen.lowlevel.xc.xc() try: - dom0_min_mem = xroot.get_dom0_min_mem() * 1024 + dom0_min_mem = xoptions.get_dom0_min_mem() * 1024 retries = 0 sleep_time = SLEEP_TIME_GROWTH diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/image.py Thu Jan 18 18:25:04 2007 +0000 @@ -173,7 +173,7 @@ class ImageHandler: """Build the domain. Define in subclass.""" raise NotImplementedError() - def createDeviceModel(self): + def createDeviceModel(self, restore = False): """Create device model for the domain (define in subclass if needed).""" pass @@ -377,11 +377,12 @@ class HVMImageHandler(ImageHandler): # xm config file def parseDeviceModelArgs(self, imageConfig, deviceConfig): dmargs = [ 'boot', 'fda', 'fdb', 'soundhw', - 'localtime', 'serial', 'stdvga', 'isa', 'vcpus', + 'localtime', 'serial', 'stdvga', 'isa', 'acpi', 'usb', 'usbdevice', 'keymap' ] - ret = [] hvmDeviceConfig = imageConfig['hvm']['devices'] - + + ret = ['-vcpus', str(self.vm.getVCpuCount())] + for a in dmargs: v = hvmDeviceConfig.get(a) @@ -461,14 +462,14 @@ class HVMImageHandler(ImageHandler): vnclisten = imageConfig.get('vnclisten') if not(vnclisten): - vnclisten = (xen.xend.XendRoot.instance(). + vnclisten = (xen.xend.XendOptions.instance(). get_vnclisten_address()) if vnclisten: ret += ['-vnclisten', vnclisten] vncpasswd = vncpasswd_vmconfig if vncpasswd is None: - vncpasswd = (xen.xend.XendRoot.instance(). + vncpasswd = (xen.xend.XendOptions.instance(). get_vncpasswd_default()) if vncpasswd is None: raise VmError('vncpasswd is not set up in ' + @@ -478,7 +479,7 @@ class HVMImageHandler(ImageHandler): return ret - def createDeviceModel(self): + def createDeviceModel(self, restore = False): if self.pid: return # Execute device model. @@ -487,6 +488,8 @@ class HVMImageHandler(ImageHandler): args = args + ([ "-d", "%d" % self.vm.getDomid(), "-m", "%s" % (self.getRequiredInitialReservation() / 1024)]) args = args + self.dmargs + if restore: + args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" % self.vm.getDomid() ]) env = dict(os.environ) if self.display: env['DISPLAY'] = self.display @@ -505,12 +508,16 @@ class HVMImageHandler(ImageHandler): self.register_reboot_feature_watch() self.pid = self.vm.gatherDom(('image/device-model-pid', int)) - def destroy(self): + def destroy(self, suspend = False): self.unregister_shutdown_watch() self.unregister_reboot_feature_watch(); if self.pid: try: - os.kill(self.pid, signal.SIGKILL) + sig = signal.SIGKILL + if suspend: + log.info("use sigusr1 to signal qemu %d", self.pid) + sig = signal.SIGUSR1 + os.kill(self.pid, sig) except OSError, exn: log.exception(exn) try: diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/osdep.py --- a/tools/python/xen/xend/osdep.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/osdep.py Thu Jan 18 18:25:04 2007 +0000 @@ -33,9 +33,19 @@ _pygrub_path = { "SunOS": "/usr/lib/xen/bin/pygrub" } +_netback_type = { + "SunOS": "SUNW_mac" +} + +_vif_script = { + "SunOS": "vif-vnic" +} + def _get(var, default=None): return var.get(os.uname()[0], default) scripts_dir = _get(_scripts_dir, "/etc/xen/scripts") xend_autorestart = _get(_xend_autorestart) pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub") +netback_type = _get(_netback_type, "netfront") +vif_script = _get(_vif_script, "vif-bridge") diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/DevController.py Thu Jan 18 18:25:04 2007 +0000 @@ -19,7 +19,7 @@ from threading import Event from threading import Event import types -from xen.xend import sxp, XendRoot +from xen.xend import sxp, XendOptions from xen.xend.XendError import VmError from xen.xend.XendLogging import log @@ -50,7 +50,7 @@ xenbusState = { 'Closed' : 6, } -xroot = XendRoot.instance() +xoptions = XendOptions.instance() xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys()))) @@ -324,7 +324,7 @@ class DevController: Make sure that the migration has finished and only then return from the call. """ - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool: log.info("Calling external migration tool for step %d" % step) fd = os.popen("%s -type %s -step %d -host %s -domname %s" % @@ -341,7 +341,7 @@ class DevController: """ Recover from device migration. The given step was the last one that was successfully executed. """ - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool: log.info("Calling external migration tool") fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" % diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/SrvRoot.py --- a/tools/python/xen/xend/server/SrvRoot.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/SrvRoot.py Thu Jan 18 18:25:04 2007 +0000 @@ -25,7 +25,7 @@ class SrvRoot(SrvDir): """Server sub-components. Each entry is (name, class), where 'name' is the entry name and 'class' is the name of its class. """ - #todo Get this list from the XendRoot config. + #todo Get this list from the XendOptions config. subdirs = [ ('node', 'SrvNode' ), ('domain', 'SrvDomainDir' ), diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/SrvServer.py --- a/tools/python/xen/xend/server/SrvServer.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/SrvServer.py Thu Jan 18 18:25:04 2007 +0000 @@ -48,7 +48,7 @@ from threading import Thread from xen.web.httpserver import HttpServer, UnixHttpServer -from xen.xend import XendNode, XendRoot, XendAPI +from xen.xend import XendNode, XendOptions, XendAPI from xen.xend import Vifctl from xen.xend.XendLogging import log from xen.xend.XendClient import XEN_API_SOCKET @@ -57,7 +57,7 @@ from SrvRoot import SrvRoot from SrvRoot import SrvRoot from XMLRPCServer import XMLRPCServer -xroot = XendRoot.instance() +xoptions = XendOptions.instance() class XendServers: @@ -65,6 +65,7 @@ class XendServers: def __init__(self, root): self.servers = [] self.root = root + self.running = False self.cleaningUp = False self.reloadingConfig = False @@ -79,6 +80,7 @@ class XendServers: server.shutdown() except: pass + self.running = False def reloadConfig(self, signum = 0, frame = None): log.debug("SrvServer.reloadConfig()") @@ -107,12 +109,11 @@ class XendServers: if server.ready: continue - thread = Thread(target=server.run, name=server.__class__.__name__) - if isinstance(server, HttpServer): - thread.setDaemon(True) + thread = Thread(target=server.run, + name=server.__class__.__name__) + thread.setDaemon(True) thread.start() threads.append(thread) - # check for when all threads have initialized themselves and then # close the status pipe @@ -143,47 +144,32 @@ class XendServers: status.close() status = None - # Interruptible Thread.join - Python Bug #1167930 - # Replaces: for t in threads: t.join() - # Reason: The above will cause python signal handlers to be - # blocked so we're not able to catch SIGTERM in any - # way for cleanup - runningThreads = threads - while len(runningThreads) > 0: - try: - for t in threads: - t.join(1.0) - runningThreads = [t for t in threads - if t.isAlive() and not t.isDaemon()] - if self.cleaningUp and len(runningThreads) > 0: - log.debug("Waiting for %s." % - [x.getName() for x in runningThreads]) - except: - pass - + # loop to keep main thread alive until it receives a SIGTERM + self.running = True + while self.running: + time.sleep(100000000) + if self.reloadingConfig: log.info("Restarting all XML-RPC and Xen-API servers...") self.cleaningUp = False self.reloadingConfig = False - xroot.set_config() - new_servers = [x for x in self.servers - if isinstance(x, HttpServer)] - self.servers = new_servers + xoptions.set_config() + self.servers = [] _loadConfig(self, self.root, True) else: break def _loadConfig(servers, root, reload): - if not reload and xroot.get_xend_http_server(): + if xoptions.get_xend_http_server(): servers.add(HttpServer(root, - xroot.get_xend_address(), - xroot.get_xend_port())) - if not reload and xroot.get_xend_unix_server(): - path = xroot.get_xend_unix_path() + xoptions.get_xend_address(), + xoptions.get_xend_port())) + if xoptions.get_xend_unix_server(): + path = xoptions.get_xend_unix_path() log.info('unix path=' + path) servers.add(UnixHttpServer(root, path)) - api_cfg = xroot.get_xen_api_server() + api_cfg = xoptions.get_xen_api_server() if api_cfg: try: addrs = [(str(x[0]).split(':'), @@ -218,10 +204,10 @@ def _loadConfig(servers, root, reload): except TypeError, exn: log.error('Xen-API server configuration %s is invalid.', api_cfg) - if xroot.get_xend_tcp_xmlrpc_server(): + if xoptions.get_xend_tcp_xmlrpc_server(): servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False, True)) - if xroot.get_xend_unix_xmlrpc_server(): + if xoptions.get_xend_unix_xmlrpc_server(): servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False)) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/XMLRPCServer.py --- a/tools/python/xen/xend/server/XMLRPCServer.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/XMLRPCServer.py Thu Jan 18 18:25:04 2007 +0000 @@ -179,21 +179,24 @@ class XMLRPCServer: # Custom runloop so we can cleanup when exiting. # ----------------------------------------------------------------- try: - self.server.socket.settimeout(1.0) while self.running: self.server.handle_request() finally: - self.cleanup() + self.shutdown() def cleanup(self): - log.debug("XMLRPCServer.cleanup()") - try: - self.server.socket.close() + log.debug('XMLRPCServer.cleanup()') + try: + if hasattr(self, 'server'): + # shutdown socket explicitly to allow reuse + self.server.socket.shutdown(socket.SHUT_RDWR) + self.server.socket.close() except Exception, exn: log.exception(exn) pass def shutdown(self): self.running = False - self.ready = False - + if self.ready: + self.ready = False + self.cleanup() diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/netif.py Thu Jan 18 18:25:04 2007 +0000 @@ -24,10 +24,10 @@ import random import random import re -from xen.xend import XendRoot +from xen.xend import XendOptions from xen.xend.server.DevController import DevController -xroot = XendRoot.instance() +xoptions = XendOptions.instance() def randomMAC(): """Generate a random MAC address. @@ -138,8 +138,8 @@ class NetifController(DevController): def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - script = os.path.join(xroot.network_script_dir, - config.get('script', xroot.get_vif_script())) + script = os.path.join(xoptions.network_script_dir, + config.get('script', xoptions.get_vif_script())) typ = config.get('type') bridge = config.get('bridge') mac = config.get('mac') @@ -150,9 +150,8 @@ class NetifController(DevController): devid = self.allocateDeviceID() - # The default type is 'netfront'. if not typ: - typ = 'netfront' + typ = xoptions.netback_type if not mac: mac = randomMAC() @@ -190,7 +189,7 @@ class NetifController(DevController): (script, ip, bridge, mac, typ, vifname, rate, uuid) = devinfo if script: - network_script_dir = xroot.network_script_dir + os.sep + network_script_dir = xoptions.network_script_dir + os.sep result['script'] = script.replace(network_script_dir, "") if ip: result['ip'] = ip diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/relocate.py --- a/tools/python/xen/xend/server/relocate.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/relocate.py Thu Jan 18 18:25:04 2007 +0000 @@ -24,7 +24,7 @@ from xen.web import protocol, tcp, unix from xen.xend import sxp from xen.xend import XendDomain -from xen.xend import XendRoot +from xen.xend import XendOptions from xen.xend.XendError import XendError from xen.xend.XendLogging import log @@ -114,15 +114,15 @@ class RelocationProtocol(protocol.Protoc def listenRelocation(): - xroot = XendRoot.instance() - if xroot.get_xend_unix_server(): + xoptions = XendOptions.instance() + if xoptions.get_xend_unix_server(): path = '/var/lib/xend/relocation-socket' unix.UnixListener(path, RelocationProtocol) - if xroot.get_xend_relocation_server(): - port = xroot.get_xend_relocation_port() - interface = xroot.get_xend_relocation_address() + if xoptions.get_xend_relocation_server(): + port = xoptions.get_xend_relocation_port() + interface = xoptions.get_xend_relocation_address() - hosts_allow = xroot.get_xend_relocation_hosts_allow() + hosts_allow = xoptions.get_xend_relocation_hosts_allow() if hosts_allow == '': hosts_allow = None else: diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/tests/test_controllers.py --- a/tools/python/xen/xend/server/tests/test_controllers.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/tests/test_controllers.py Thu Jan 18 18:25:04 2007 +0000 @@ -2,9 +2,9 @@ import re import re import unittest -import xen.xend.XendRoot +import xen.xend.XendOptions -xen.xend.XendRoot.XendRoot.config_default = '/dev/null' +xen.xend.XendOptions.XendOptions.config_default = '/dev/null' from xen.xend.server import netif @@ -13,7 +13,7 @@ FAKE_DEVID = 63 FAKE_DEVID = 63 -xroot = xen.xend.XendRoot.instance() +xoptions = xen.xend.XendOptions.instance() class test_controllers(unittest.TestCase): @@ -36,8 +36,8 @@ class test_controllers(unittest.TestCase self.assertEqual(backdets['handle'], str(FAKE_DEVID)) self.assertEqual(backdets['script'], - os.path.join(xroot.network_script_dir, - xroot.get_vif_script())) + os.path.join(xoptions.network_script_dir, + xoptions.get_vif_script())) self.assertValidMac(backdets['mac'], expectedMac) self.assertEqual(frontdets['handle'], str(FAKE_DEVID)) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/tpmif.py --- a/tools/python/xen/xend/server/tpmif.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/tpmif.py Thu Jan 18 18:25:04 2007 +0000 @@ -20,7 +20,7 @@ """Support for virtual TPM interfaces.""" -from xen.xend import XendRoot +from xen.xend import XendOptions from xen.xend.XendLogging import log from xen.xend.XendError import XendError from xen.xend.XendConstants import DEV_MIGRATE_TEST, VTPM_DELETE_SCRIPT @@ -29,7 +29,7 @@ import os import os import re -xroot = XendRoot.instance() +xoptions = XendOptions.instance() def destroy_vtpmstate(name): if os.path.exists(VTPM_DELETE_SCRIPT): @@ -88,7 +88,7 @@ class TPMifController(DevController): def migrate(self, deviceConfig, network, dst, step, domName): """@see DevContoller.migrate""" if network: - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool != '': log.info("Request to network-migrate device to %s. step=%d.", dst, step) @@ -116,7 +116,7 @@ class TPMifController(DevController): def recover_migrate(self, deviceConfig, network, dst, step, domName): """@see DevContoller.recover_migrate""" if network: - tool = xroot.get_external_migration_tool() + tool = xoptions.get_external_migration_tool() if tool != '': log.info("Request to recover network-migrated device. last good step=%d.", step) diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xend/server/vfbif.py --- a/tools/python/xen/xend/server/vfbif.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xend/server/vfbif.py Thu Jan 18 18:25:04 2007 +0000 @@ -52,7 +52,7 @@ class VfbifController(DevController): if config.has_key("vncpasswd"): passwd = config["vncpasswd"] else: - passwd = xen.xend.XendRoot.instance().get_vncpasswd_default() + passwd = xen.xend.XendOptions.instance().get_vncpasswd_default() if passwd: self.vm.storeVm("vncpasswd", passwd) log.debug("Stored a VNC password for vfb access") @@ -66,7 +66,7 @@ class VfbifController(DevController): elif config.has_key("vncdisplay"): args += ["--vncport", "%d" % (5900 + int(config["vncdisplay"]))] vnclisten = config.get("vnclisten", - xen.xend.XendRoot.instance().get_vnclisten_address()) + xen.xend.XendOptions.instance().get_vnclisten_address()) args += [ "--listen", vnclisten ] spawn_detached(args[0], args + std_args, os.environ) elif t == "sdl": diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xm/create.py Thu Jan 18 18:25:04 2007 +0000 @@ -189,6 +189,10 @@ gopts.var('vcpus', val='VCPUS', gopts.var('vcpus', val='VCPUS', fn=set_int, default=1, use="# of Virtual CPUS in domain.") + +gopts.var('vcpu_avail', val='VCPUS', + fn=set_long, default=None, + use="Bitmask for virtual CPUs to make available immediately.") gopts.var('cpu_cap', val='CAP', fn=set_int, default=None, @@ -740,7 +744,7 @@ def make_config(vals): map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory', 'restart', 'on_poweroff', - 'on_reboot', 'on_crash', 'vcpus', 'features', + 'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features', 'on_xend_start', 'on_xend_stop']) if vals.uuid is not None: diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xm/main.py Thu Jan 18 18:25:04 2007 +0000 @@ -693,12 +693,15 @@ def parse_doms_info(info): up_time = time.time() - start_time return { - 'domid' : get_info('domid', str, ''), - 'name' : get_info('name', str, '??'), + 'domid' : get_info('domid', str, ''), + 'name' : get_info('name', str, '??'), 'mem' : get_info('memory_dynamic_min', int, 0), - 'vcpus' : get_info('online_vcpus', int, 0), - 'state' : get_info('state', str, ''), - 'cpu_time' : get_info('cpu_time', float, 0), + 'state' : get_info('state', str, ''), + 'cpu_time' : get_info('cpu_time', float, 0.0), + # VCPUs is the number online when the VM is up, or the number + # configured otherwise. + 'vcpus' : get_info('online_vcpus', int, + get_info('vcpus', int, 0)), 'up_time' : up_time, 'seclabel' : security.get_security_printlabel(info), } diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xm/opts.py --- a/tools/python/xen/xm/opts.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xm/opts.py Thu Jan 18 18:25:04 2007 +0000 @@ -571,6 +571,14 @@ def set_int(opt, k, v): opt.opts.err('Invalid value: ' + str(v)) opt.set(v) +def set_long(opt, k, v): + """Set an option to a long integer value.""" + try: + v = long(v) + except: + opt.opts.err('Invalid value: ' + str(v)) + opt.set(v) + def set_float(opt, k, v): """Set an option to a float value.""" try: diff -r 58637a0a7c7e -r e2ca6bd16046 tools/python/xen/xm/tests/test_create.py --- a/tools/python/xen/xm/tests/test_create.py Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/python/xen/xm/tests/test_create.py Thu Jan 18 18:25:04 2007 +0000 @@ -3,9 +3,9 @@ import tempfile import tempfile import unittest -import xen.xend.XendRoot - -xen.xend.XendRoot.XendRoot.config_default = '/dev/null' +import xen.xend.XendOptions + +xen.xend.XendOptions.XendOptions.config_default = '/dev/null' import xen.xm.create diff -r 58637a0a7c7e -r e2ca6bd16046 tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/xcutils/xc_restore.c Thu Jan 18 18:25:04 2007 +0000 @@ -19,12 +19,13 @@ main(int argc, char **argv) main(int argc, char **argv) { unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn; + unsigned int hvm, pae, apic; int ret; unsigned long store_mfn, console_mfn; - if (argc != 6) + if (argc != 9) errx(1, - "usage: %s iofd domid nr_pfns store_evtchn console_evtchn", + "usage: %s iofd domid nr_pfns store_evtchn console_evtchn hvm pae apic", argv[0]); xc_fd = xc_interface_open(); @@ -36,9 +37,19 @@ main(int argc, char **argv) nr_pfns = atoi(argv[3]); store_evtchn = atoi(argv[4]); console_evtchn = atoi(argv[5]); + hvm = atoi(argv[6]); + pae = atoi(argv[7]); + apic = atoi(argv[8]); - ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, - &store_mfn, console_evtchn, &console_mfn); + if (hvm) { + /* pass the memsize to xc_hvm_restore to find the store_mfn */ + store_mfn = hvm; + ret = xc_hvm_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, + &store_mfn, console_evtchn, &console_mfn, pae, apic); + } else + ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, + &store_mfn, console_evtchn, &console_mfn); + if (ret == 0) { printf("store-mfn %li\n", store_mfn); printf("console-mfn %li\n", console_mfn); diff -r 58637a0a7c7e -r e2ca6bd16046 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Wed Jan 17 21:45:34 2007 -0700 +++ b/tools/xcutils/xc_save.c Thu Jan 18 18:25:04 2007 +0000 @@ -51,7 +51,10 @@ main(int argc, char **argv) max_f = atoi(argv[4]); flags = atoi(argv[5]); - ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); + if (flags & XCFLAGS_HVM) + ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); + else + ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); xc_interface_close(xc_fd); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/domain.c Thu Jan 18 18:25:04 2007 +0000 @@ -573,6 +573,7 @@ int arch_set_info_guest( else { hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs); + hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt); } if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/domctl.c Thu Jan 18 18:25:04 2007 +0000 @@ -322,8 +322,10 @@ void arch_get_info_guest(struct vcpu *v, if ( is_hvm_vcpu(v) ) { - if ( !IS_COMPAT(v->domain) ) + if ( !IS_COMPAT(v->domain) ) { hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg); + hvm_save_cpu_context(v, &c.nat->hvmcpu_ctxt); + } #ifdef CONFIG_COMPAT else { diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/hvm.c Thu Jan 18 18:25:04 2007 +0000 @@ -149,11 +149,19 @@ int hvm_domain_initialise(struct domain void hvm_domain_destroy(struct domain *d) { + HVMStateEntry *se, *dse; pit_deinit(d); rtc_deinit(d); pmtimer_deinit(d); hpet_deinit(d); + se = d->arch.hvm_domain.first_se; + while (se) { + dse = se; + se = se->next; + xfree(dse); + } + if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( (void *)d->arch.hvm_domain.shared_page_va); @@ -189,6 +197,9 @@ int hvm_vcpu_initialise(struct vcpu *v) rtc_init(v, RTC_PORT(0), RTC_IRQ); pmtimer_init(v, ACPI_PM_TMR_BLK_ADDRESS); hpet_init(v); + + /* init hvm sharepage */ + shpage_init(v->domain, get_sp(v->domain)); /* Init guest TSC to start from zero. */ hvm_set_guest_time(v, 0); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/i8254.c --- a/xen/arch/x86/hvm/i8254.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/i8254.c Thu Jan 18 18:25:04 2007 +0000 @@ -207,11 +207,11 @@ static inline void pit_load_count(PITCha switch (s->mode) { case 2: /* create periodic time */ - create_periodic_time(&s->pt, period, 0, 0, pit_time_fired, s); + create_periodic_time(current, &s->pt, period, 0, 0, pit_time_fired, s); break; case 1: /* create one shot time */ - create_periodic_time(&s->pt, period, 0, 1, pit_time_fired, s); + create_periodic_time(current, &s->pt, period, 0, 1, pit_time_fired, s); #ifdef DEBUG_PIT printk("HVM_PIT: create one shot time.\n"); #endif @@ -356,6 +356,154 @@ void pit_stop_channel0_irq(PITState * pi destroy_periodic_time(&s->pt); } +#ifdef HVM_DEBUG_SUSPEND +static void pit_info(PITState *pit) +{ + PITChannelState *s; + int i; + + for(i = 0; i < 3; i++) { + printk("*****pit channel %d's state:*****\n", i); + s = &pit->channels[i]; + printk("pit 0x%x.\n", s->count); + printk("pit 0x%x.\n", s->latched_count); + printk("pit 0x%x.\n", s->count_latched); + printk("pit 0x%x.\n", s->status_latched); + printk("pit 0x%x.\n", s->status); + printk("pit 0x%x.\n", s->read_state); + printk("pit 0x%x.\n", s->write_state); + printk("pit 0x%x.\n", s->write_latch); + printk("pit 0x%x.\n", s->rw_mode); + printk("pit 0x%x.\n", s->mode); + printk("pit 0x%x.\n", s->bcd); + printk("pit 0x%x.\n", s->gate); + printk("pit %"PRId64"\n", s->count_load_time); + + if (s->pt) { + struct periodic_time *pt = s->pt; + printk("pit channel %d has a periodic timer:\n", i); + printk("pt %d.\n", pt->enabled); + printk("pt %d.\n", pt->one_shot); + printk("pt %d.\n", pt->irq); + printk("pt %d.\n", pt->first_injected); + + printk("pt %d.\n", pt->pending_intr_nr); + printk("pt %d.\n", pt->period); + printk("pt %"PRId64"\n", pt->period_cycles); + printk("pt %"PRId64"\n", pt->last_plt_gtime); + } + } + +} +#else +static void pit_info(PITState *pit) +{ +} +#endif + +static void pit_save(hvm_domain_context_t *h, void *opaque) +{ + struct domain *d = opaque; + PITState *pit = &d->arch.hvm_domain.pl_time.vpit; + PITChannelState *s; + struct periodic_time *pt; + int i, pti = -1; + + pit_info(pit); + + for(i = 0; i < 3; i++) { + s = &pit->channels[i]; + hvm_put_32u(h, s->count); + hvm_put_16u(h, s->latched_count); + hvm_put_8u(h, s->count_latched); + hvm_put_8u(h, s->status_latched); + hvm_put_8u(h, s->status); + hvm_put_8u(h, s->read_state); + hvm_put_8u(h, s->write_state); + hvm_put_8u(h, s->write_latch); + hvm_put_8u(h, s->rw_mode); + hvm_put_8u(h, s->mode); + hvm_put_8u(h, s->bcd); + hvm_put_8u(h, s->gate); + hvm_put_64u(h, s->count_load_time); + + if (s->pt.enabled && pti == -1) + pti = i; + } + + pt = &pit->channels[pti].pt; + + /* save the vcpu for pt */ + hvm_put_32u(h, pt->vcpu->vcpu_id); + + /* save guest time */ + hvm_put_8u(h, pti); + hvm_put_32u(h, pt->pending_intr_nr); + hvm_put_64u(h, pt->last_plt_gtime); + +} + +static int pit_load(hvm_domain_context_t *h, void *opaque, int version_id) +{ + struct domain *d = opaque; + PITState *pit = &d->arch.hvm_domain.pl_time.vpit; + PITChannelState *s; + int i, pti, vcpu_id; + u32 period; + + if (version_id != 1) + return -EINVAL; + + for(i = 0; i < 3; i++) { + s = &pit->channels[i]; + s->count = hvm_get_32u(h); + s->latched_count = hvm_get_16u(h); + s->count_latched = hvm_get_8u(h); + s->status_latched = hvm_get_8u(h); + s->status = hvm_get_8u(h); + s->read_state = hvm_get_8u(h); + s->write_state = hvm_get_8u(h); + s->write_latch = hvm_get_8u(h); + s->rw_mode = hvm_get_8u(h); + s->mode = hvm_get_8u(h); + s->bcd = hvm_get_8u(h); + s->gate = hvm_get_8u(h); + s->count_load_time = hvm_get_64u(h); + } + + vcpu_id = hvm_get_32u(h); + + pti = hvm_get_8u(h); + if ( pti < 0 || pti > 2) { + printk("pit load get a wrong channel %d when HVM resume.\n", pti); + return -EINVAL; + } + + s = &pit->channels[pti]; + period = DIV_ROUND((s->count * 1000000000ULL), PIT_FREQ); + + printk("recreate periodic timer %d in mode %d, freq=%d.\n", pti, s->mode, period); + switch (s->mode) { + case 2: + /* create periodic time */ + create_periodic_time(d->vcpu[vcpu_id], &s->pt, period, 0, 0, pit_time_fired, s); + break; + case 1: + /* create one shot time */ + create_periodic_time(d->vcpu[vcpu_id], &s->pt, period, 0, 1, pit_time_fired, s); + break; + default: + printk("pit mode %"PRId8" should not use periodic timer!\n", s->mode); + return -EINVAL; + } + s->pt.pending_intr_nr = hvm_get_32u(h); + s->pt.last_plt_gtime = hvm_get_64u(h); + + pit_info(pit); + + return 0; +} + static void pit_reset(void *opaque) { PITState *pit = opaque; @@ -383,6 +531,7 @@ void pit_init(struct vcpu *v, unsigned l s++; s->pt.vcpu = v; s++; s->pt.vcpu = v; + hvm_register_savevm(v->domain, "xen_hvm_i8254", PIT_BASE, 1, pit_save, pit_load, v->domain); register_portio_handler(v->domain, PIT_BASE, 4, handle_pit_io); /* register the speaker port */ register_portio_handler(v->domain, 0x61, 1, handle_speaker_io); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/intercept.c Thu Jan 18 18:25:04 2007 +0000 @@ -29,6 +29,8 @@ #include <asm/current.h> #include <io_ports.h> #include <xen/event.h> +#include <xen/compile.h> +#include <public/version.h> extern struct hvm_mmio_handler hpet_mmio_handler; @@ -155,6 +157,307 @@ static inline void hvm_mmio_access(struc } } +/* save/restore support */ +#define HVM_FILE_MAGIC 0x54381286 +#define HVM_FILE_VERSION 0x00000001 + +int hvm_register_savevm(struct domain *d, + const char *idstr, + int instance_id, + int version_id, + SaveStateHandler *save_state, + LoadStateHandler *load_state, + void *opaque) +{ + HVMStateEntry *se, **pse; + + if ( (se = xmalloc(struct HVMStateEntry)) == NULL ){ + printk("allocat hvmstate entry fail.\n"); + return -1; + } + + strncpy(se->idstr, idstr, HVM_SE_IDSTR_LEN); + + se->instance_id = instance_id; + se->version_id = version_id; + se->save_state = save_state; + se->load_state = load_state; + se->opaque = opaque; + se->next = NULL; + + /* add at the end of list */ + pse = &d->arch.hvm_domain.first_se; + while (*pse != NULL) + pse = &(*pse)->next; + *pse = se; + return 0; +} + +int hvm_save(struct vcpu *v, hvm_domain_context_t *h) +{ + uint32_t len, len_pos, cur_pos; + uint32_t eax, ebx, ecx, edx; + HVMStateEntry *se; + char *chgset; + + if (!is_hvm_vcpu(v)) { + printk("hvm_save only for hvm guest!\n"); + return -1; + } + + memset(h, 0, sizeof(hvm_domain_context_t)); + hvm_put_32u(h, HVM_FILE_MAGIC); + hvm_put_32u(h, HVM_FILE_VERSION); + + /* save xen changeset */ + chgset = strrchr(XEN_CHANGESET, ' '); + if ( chgset ) + chgset++; + else + chgset = XEN_CHANGESET; + + len = strlen(chgset); + hvm_put_8u(h, len); + hvm_put_buffer(h, chgset, len); + + /* save cpuid */ + cpuid(1, &eax, &ebx, &ecx, &edx); + hvm_put_32u(h, eax); + + for(se = v->domain->arch.hvm_domain.first_se; se != NULL; se = se->next) { + /* ID string */ + len = strnlen(se->idstr, HVM_SE_IDSTR_LEN); + hvm_put_8u(h, len); + hvm_put_buffer(h, se->idstr, len); + + hvm_put_32u(h, se->instance_id); + hvm_put_32u(h, se->version_id); + + /* record size */ + len_pos = hvm_ctxt_tell(h); + hvm_put_32u(h, 0); + + se->save_state(h, se->opaque); + + cur_pos = hvm_ctxt_tell(h); + len = cur_pos - len_pos - 4; + hvm_ctxt_seek(h, len_pos); + hvm_put_32u(h, len); + hvm_ctxt_seek(h, cur_pos); + + } + + h->size = hvm_ctxt_tell(h); + hvm_ctxt_seek(h, 0); + + if (h->size >= HVM_CTXT_SIZE) { + printk("hvm_domain_context overflow when hvm_save! need %"PRId32" bytes for use.\n", h->size); + return -1; + } + + return 0; + +} + +static HVMStateEntry *find_se(struct domain *d, const char *idstr, int instance_id) +{ + HVMStateEntry *se; + + for(se = d->arch.hvm_domain.first_se; se != NULL; se = se->next) { + if (!strncmp(se->idstr, idstr, HVM_SE_IDSTR_LEN) && + instance_id == se->instance_id){ + return se; + } + } + return NULL; +} + +int hvm_load(struct vcpu *v, hvm_domain_context_t *h) +{ + uint32_t len, rec_len, rec_pos, magic, instance_id, version_id; + uint32_t eax, ebx, ecx, edx; + HVMStateEntry *se; + char idstr[HVM_SE_IDSTR_LEN]; + xen_changeset_info_t chgset; + char *cur_chgset; + int ret; + + if (!is_hvm_vcpu(v)) { + printk("hvm_load only for hvm guest!\n"); + return -1; + } + + if (h->size >= HVM_CTXT_SIZE) { + printk("hvm_load fail! seems hvm_domain_context overflow when hvm_save! need %"PRId32" bytes.\n", h->size); + return -1; + } + + hvm_ctxt_seek(h, 0); + + magic = hvm_get_32u(h); + if (magic != HVM_FILE_MAGIC) { + printk("HVM restore magic dismatch!\n"); + return -1; + } + + magic = hvm_get_32u(h); + if (magic != HVM_FILE_VERSION) { + printk("HVM restore version dismatch!\n"); + return -1; + } + + /* check xen change set */ + cur_chgset = strrchr(XEN_CHANGESET, ' '); + if ( cur_chgset ) + cur_chgset++; + else + cur_chgset = XEN_CHANGESET; + + len = hvm_get_8u(h); + if (len > 20) { /*typical length is 18 -- "revision number:changeset id" */ + printk("wrong change set length %d when hvm restore!\n", len); + return -1; + } + + hvm_get_buffer(h, chgset, len); + chgset[len] = '\0'; + if (strncmp(cur_chgset, chgset, len + 1)) + printk("warnings: try to restore hvm guest(%s) on a different changeset %s.\n", + chgset, cur_chgset); + + + if ( !strcmp(cur_chgset, "unavailable") ) + printk("warnings: try to restore hvm guest when changeset is unavailable.\n"); + + + /* check cpuid */ + cpuid(1, &eax, &ebx, &ecx, &edx); + ebx = hvm_get_32u(h); + /*TODO: need difine how big difference is acceptable */ + if (ebx != eax) + printk("warnings: try to restore hvm guest(0x%"PRIx32") " + "on a different type processor(0x%"PRIx32").\n", + ebx, + eax); + + while(1) { + if (hvm_ctxt_end(h)) { + break; + } + + /* ID string */ + len = hvm_get_8u(h); + if (len > HVM_SE_IDSTR_LEN) { + printk("wrong HVM save entry idstr len %d!", len); + return -1; + } + + hvm_get_buffer(h, idstr, len); + idstr[len] = '\0'; + + instance_id = hvm_get_32u(h); + version_id = hvm_get_32u(h); + + rec_len = hvm_get_32u(h); + rec_pos = hvm_ctxt_tell(h); + + se = find_se(v->domain, idstr, instance_id); + if (se == NULL) { + printk("warnings: hvm load can't find device %s's instance %d!\n", + idstr, instance_id); + } else { + ret = se->load_state(h, se->opaque, version_id); + if (ret < 0) + printk("warnings: loading state fail for device %s instance %d!\n", + idstr, instance_id); + } + + + /* make sure to jump end of record */ + if ( hvm_ctxt_tell(h) - rec_pos != rec_len) { + printk("wrong hvm record size, maybe some dismatch between save&restore handler!\n"); + } + hvm_ctxt_seek(h, rec_pos + rec_len); + } + + return 0; +} + +int arch_gethvm_ctxt( + struct vcpu *v, struct hvm_domain_context *c) +{ + if ( !is_hvm_vcpu(v) ) + return -1; + + return hvm_save(v, c); + +} + +int arch_sethvm_ctxt( + struct vcpu *v, struct hvm_domain_context *c) +{ + return hvm_load(v, c); +} + +#ifdef HVM_DEBUG_SUSPEND +static void shpage_info(shared_iopage_t *sh) +{ + + vcpu_iodata_t *p = &sh->vcpu_iodata[0]; + ioreq_t *req = &p->vp_ioreq; + printk("*****sharepage_info******!\n"); + printk("vp_eport=%d\n", p->vp_eport); + printk("io packet: " + "state:%x, pvalid: %x, dir:%x, port: %"PRIx64", " + "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", + req->state, req->data_is_ptr, req->dir, req->addr, + req->data, req->count, req->size); +} +#else +static void shpage_info(shared_iopage_t *sh) +{ +} +#endif + +static void shpage_save(hvm_domain_context_t *h, void *opaque) +{ + /* XXX:no action required for shpage save/restore, since it's in guest memory + * keep it for debug purpose only */ + +#if 0 + struct shared_iopage *s = opaque; + /* XXX:smp */ + struct ioreq *req = &s->vcpu_iodata[0].vp_ioreq; + + shpage_info(s); + + hvm_put_buffer(h, (char*)req, sizeof(struct ioreq)); +#endif +} + +static int shpage_load(hvm_domain_context_t *h, void *opaque, int version_id) +{ + struct shared_iopage *s = opaque; +#if 0 + /* XXX:smp */ + struct ioreq *req = &s->vcpu_iodata[0].vp_ioreq; + + if (version_id != 1) + return -EINVAL; + + hvm_get_buffer(h, (char*)req, sizeof(struct ioreq)); + + +#endif + shpage_info(s); + return 0; +} + +void shpage_init(struct domain *d, shared_iopage_t *sp) +{ + hvm_register_savevm(d, "xen_hvm_shpage", 0x10, 1, shpage_save, shpage_load, sp); +} + int hvm_buffered_io_intercept(ioreq_t *p) { struct vcpu *v = current; diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/rtc.c --- a/xen/arch/x86/hvm/rtc.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/rtc.c Thu Jan 18 18:25:04 2007 +0000 @@ -62,7 +62,7 @@ static void rtc_timer_update(RTCState *s #ifdef DEBUG_RTC printk("HVM_RTC: period = %uns\n", period); #endif - create_periodic_time(&s->pt, period, RTC_IRQ, 0, rtc_periodic_cb, s); + create_periodic_time(current, &s->pt, period, RTC_IRQ, 0, rtc_periodic_cb, s); } else destroy_periodic_time(&s->pt); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/vioapic.c Thu Jan 18 18:25:04 2007 +0000 @@ -473,10 +473,142 @@ void vioapic_update_EOI(struct domain *d spin_unlock(&hvm_irq->lock); } +#ifdef HVM_DEBUG_SUSPEND +static void ioapic_info(struct vioapic *s) +{ + int i; + printk("*****ioapic state:*****\n"); + printk("ioapic 0x%x.\n", s->ioregsel); + printk("ioapic 0x%x.\n", s->id); + printk("ioapic 0x%lx.\n", s->base_address); + for (i = 0; i < VIOAPIC_NUM_PINS; i++) { + printk("ioapic redirtbl[%d]:0x%"PRIx64"\n", i, s->redirtbl[i].bits); + } + +} +static void hvmirq_info(struct hvm_irq *hvm_irq) +{ + int i; + printk("*****hvmirq state:*****\n"); + for (i = 0; i < BITS_TO_LONGS(32*4); i++) + printk("hvmirq pci_intx[%d]:0x%lx.\n", i, hvm_irq->pci_intx[i]); + + for (i = 0; i < BITS_TO_LONGS(16); i++) + printk("hvmirq isa_irq[%d]:0x%lx.\n", i, hvm_irq->isa_irq[i]); + + for (i = 0; i < BITS_TO_LONGS(1); i++) + printk("hvmirq callback_irq_wire[%d]:0x%lx.\n", i, hvm_irq->callback_irq_wire[i]); + + printk("hvmirq callback_via_type:0x%x.\n", hvm_irq->callback_via_type); + printk("hvmirq callback_via:0x%x.\n", hvm_irq->callback_via.gsi); + + + for (i = 0; i < 4; i++) + printk("hvmirq pci_link_route[%d]:0x%"PRIx8".\n", i, hvm_irq->pci_link_route[i]); + + for (i = 0; i < 4; i++) + printk("hvmirq pci_link_assert_count[%d]:0x%"PRIx8".\n", i, hvm_irq->pci_link_assert_count[i]); + + for (i = 0; i < VIOAPIC_NUM_PINS; i++) + printk("hvmirq gsi_assert_count[%d]:0x%"PRIx8".\n", i, hvm_irq->gsi_assert_count[i]); + + printk("hvmirq round_robin_prev_vcpu:0x%"PRIx8".\n", hvm_irq->round_robin_prev_vcpu); +} +#else +static void ioapic_info(struct vioapic *s) +{ +} +static void hvmirq_info(struct hvm_irq *hvm_irq) +{ +} +#endif + +static void ioapic_save(hvm_domain_context_t *h, void *opaque) +{ + int i; + struct domain *d = opaque; + struct vioapic *s = domain_vioapic(d); + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + ioapic_info(s); + hvmirq_info(hvm_irq); + + /* save iopaic state*/ + hvm_put_32u(h, s->ioregsel); + hvm_put_32u(h, s->id); + hvm_put_64u(h, s->base_address); + for (i = 0; i < VIOAPIC_NUM_PINS; i++) { + hvm_put_64u(h, s->redirtbl[i].bits); + } + + /* save hvm irq state */ + hvm_put_buffer(h, (char*)hvm_irq->pci_intx, 16); + hvm_put_buffer(h, (char*)hvm_irq->isa_irq, 2); + hvm_put_32u(h, hvm_irq->callback_via_asserted); + hvm_put_32u(h, hvm_irq->callback_via_type); + hvm_put_32u(h, hvm_irq->callback_via.gsi); + + for (i = 0; i < 4; i++) + hvm_put_8u(h, hvm_irq->pci_link_route[i]); + + for (i = 0; i < 4; i++) + hvm_put_8u(h, hvm_irq->pci_link_assert_count[i]); + + for (i = 0; i < VIOAPIC_NUM_PINS; i++) + hvm_put_8u(h, hvm_irq->gsi_assert_count[i]); + + hvm_put_8u(h, hvm_irq->round_robin_prev_vcpu); + +} + +static int ioapic_load(hvm_domain_context_t *h, void *opaque, int version_id) +{ + int i; + struct domain *d = opaque; + struct vioapic *s = domain_vioapic(d); + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + if (version_id != 1) + return -EINVAL; + + /* restore ioapic state */ + s->ioregsel = hvm_get_32u(h); + s->id = hvm_get_32u(h); + s->base_address = hvm_get_64u(h); + for (i = 0; i < VIOAPIC_NUM_PINS; i++) { + s->redirtbl[i].bits = hvm_get_64u(h); + } + + /* restore irq state */ + hvm_get_buffer(h, (char*)hvm_irq->pci_intx, 16); + hvm_get_buffer(h, (char*)hvm_irq->isa_irq, 2); + hvm_irq->callback_via_asserted = hvm_get_32u(h); + hvm_irq->callback_via_type = hvm_get_32u(h); + hvm_irq->callback_via.gsi = hvm_get_32u(h); + + for (i = 0; i < 4; i++) + hvm_irq->pci_link_route[i] = hvm_get_8u(h); + + for (i = 0; i < 4; i++) + hvm_irq->pci_link_assert_count[i] = hvm_get_8u(h); + + for (i = 0; i < VIOAPIC_NUM_PINS; i++) + hvm_irq->gsi_assert_count[i] = hvm_get_8u(h); + + hvm_irq->round_robin_prev_vcpu = hvm_get_8u(h); + + ioapic_info(s); + hvmirq_info(hvm_irq); + + return 0; +} + void vioapic_init(struct domain *d) { struct vioapic *vioapic = domain_vioapic(d); int i; + + hvm_register_savevm(d, "xen_hvm_ioapic", 0, 1, ioapic_save, ioapic_load, d); memset(vioapic, 0, sizeof(*vioapic)); for ( i = 0; i < VIOAPIC_NUM_PINS; i++ ) diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/vlapic.c Thu Jan 18 18:25:04 2007 +0000 @@ -659,7 +659,7 @@ static void vlapic_write(struct vcpu *v, uint64_t period = APIC_BUS_CYCLE_NS * (uint32_t)val * vlapic->timer_divisor; vlapic_set_reg(vlapic, APIC_TMICT, val); - create_periodic_time(&vlapic->pt, period, vlapic->pt.irq, + create_periodic_time(current, &vlapic->pt, period, vlapic->pt.irq, vlapic_lvtt_period(vlapic), NULL, vlapic); HVM_DBG_LOG(DBG_LEVEL_VLAPIC, @@ -795,6 +795,77 @@ static int vlapic_reset(struct vlapic *v return 1; } +#ifdef HVM_DEBUG_SUSPEND +static void lapic_info(struct vlapic *s) +{ + printk("*****lapic state:*****\n"); + printk("lapic 0x%"PRIx64".\n", s->apic_base_msr); + printk("lapic 0x%x.\n", s->disabled); + printk("lapic 0x%x.\n", s->timer_divisor); + printk("lapic 0x%x.\n", s->timer_pending_count); +} +#else +static void lapic_info(struct vlapic *s) +{ +} +#endif + +static void lapic_save(hvm_domain_context_t *h, void *opaque) +{ + struct vlapic *s = opaque; + + lapic_info(s); + + hvm_put_64u(h, s->apic_base_msr); + hvm_put_32u(h, s->disabled); + hvm_put_32u(h, s->timer_divisor); + + /*XXX: need this?*/ + hvm_put_32u(h, s->timer_pending_count); + + hvm_put_buffer(h, (char*)s->regs, 0x3f0); + +} + +static int lapic_load(hvm_domain_context_t *h, void *opaque, int version_id) +{ + struct vlapic *s = opaque; + struct vcpu *v = vlapic_vcpu(s); + unsigned long tmict; + + if (version_id != 1) + return -EINVAL; + + s->apic_base_msr = hvm_get_64u(h); + s->disabled = hvm_get_32u(h); + s->timer_divisor = hvm_get_32u(h); + + /*XXX: need this?*/ + s->timer_pending_count = hvm_get_32u(h); + + hvm_get_buffer(h, (char*)s->regs, 0x3f0); + + /* rearm the actiemr if needed */ + tmict = vlapic_get_reg(s, APIC_TMICT); + if (tmict > 0) { + uint64_t period = APIC_BUS_CYCLE_NS * (uint32_t)tmict * s->timer_divisor; + + create_periodic_time(v, &s->pt, period, s->pt.irq, + vlapic_lvtt_period(s), NULL, s); + + printk("lapic_load to rearm the actimer:" + "bus cycle is %uns, " + "saved tmict count %lu, period %"PRIu64"ns\n", + APIC_BUS_CYCLE_NS, tmict, period); + + } + + + lapic_info(s); + + return 0; +} + int vlapic_init(struct vcpu *v) { struct vlapic *vlapic = vcpu_vlapic(v); @@ -813,6 +884,7 @@ int vlapic_init(struct vcpu *v) vlapic->regs = map_domain_page_global(page_to_mfn(vlapic->regs_page)); memset(vlapic->regs, 0, PAGE_SIZE); + hvm_register_savevm(v->domain, "xen_hvm_lapic", v->vcpu_id, 1, lapic_save, lapic_load, vlapic); vlapic_reset(vlapic); vlapic->apic_base_msr = MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 18 18:25:04 2007 +0000 @@ -363,6 +363,299 @@ static inline void __restore_debug_regis /* DR7 is loaded from the VMCS. */ } +static int __get_instruction_length(void); +int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c) +{ + unsigned long inst_len; + + inst_len = __get_instruction_length(); + c->eip = __vmread(GUEST_RIP); + +#ifdef HVM_DEBUG_SUSPEND + printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n", + inst_len, c->eip); +#endif + + c->esp = __vmread(GUEST_RSP); + c->eflags = __vmread(GUEST_RFLAGS); + + c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; + c->cr3 = v->arch.hvm_vmx.cpu_cr3; + c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4; + +#ifdef HVM_DEBUG_SUSPEND + printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", + c->cr3, + c->cr0, + c->cr4); +#endif + + c->idtr_limit = __vmread(GUEST_IDTR_LIMIT); + c->idtr_base = __vmread(GUEST_IDTR_BASE); + + c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT); + c->gdtr_base = __vmread(GUEST_GDTR_BASE); + + c->cs_sel = __vmread(GUEST_CS_SELECTOR); + c->cs_limit = __vmread(GUEST_CS_LIMIT); + c->cs_base = __vmread(GUEST_CS_BASE); + c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES); + + c->ds_sel = __vmread(GUEST_DS_SELECTOR); + c->ds_limit = __vmread(GUEST_DS_LIMIT); + c->ds_base = __vmread(GUEST_DS_BASE); + c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES); + + c->es_sel = __vmread(GUEST_ES_SELECTOR); + c->es_limit = __vmread(GUEST_ES_LIMIT); + c->es_base = __vmread(GUEST_ES_BASE); + c->es_arbytes = __vmread(GUEST_ES_AR_BYTES); + + c->ss_sel = __vmread(GUEST_SS_SELECTOR); + c->ss_limit = __vmread(GUEST_SS_LIMIT); + c->ss_base = __vmread(GUEST_SS_BASE); + c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES); + + c->fs_sel = __vmread(GUEST_FS_SELECTOR); + c->fs_limit = __vmread(GUEST_FS_LIMIT); + c->fs_base = __vmread(GUEST_FS_BASE); + c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES); + + c->gs_sel = __vmread(GUEST_GS_SELECTOR); + c->gs_limit = __vmread(GUEST_GS_LIMIT); + c->gs_base = __vmread(GUEST_GS_BASE); + c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES); + + c->tr_sel = __vmread(GUEST_TR_SELECTOR); + c->tr_limit = __vmread(GUEST_TR_LIMIT); + c->tr_base = __vmread(GUEST_TR_BASE); + c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES); + + c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR); + c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT); + c->ldtr_base = __vmread(GUEST_LDTR_BASE); + c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES); + + c->sysenter_cs = __vmread(GUEST_SYSENTER_CS); + c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP); + c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP); + + return 1; +} + +int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c) +{ + unsigned long mfn, old_base_mfn; + + vmx_vmcs_enter(v); + + __vmwrite(GUEST_RIP, c->eip); + __vmwrite(GUEST_RSP, c->esp); + __vmwrite(GUEST_RFLAGS, c->eflags); + + v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0; + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); + +#ifdef HVM_DEBUG_SUSPEND + printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", + c->cr3, + c->cr0, + c->cr4); +#endif + + if (!vmx_paging_enabled(v)) { + printk("vmx_vmcs_restore: paging not enabled."); + goto skip_cr3; + } + + if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) { + /* + * This is simple TLB flush, implying the guest has + * removed some translation or changed page attributes. + * We simply invalidate the shadow. + */ + mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); + if (mfn != pagetable_get_pfn(v->arch.guest_table)) { + goto bad_cr3; + } + } else { + /* + * If different, make a shadow. Check if the PDBR is valid + * first. + */ + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3); + /* current!=vcpu as not called by arch_vmx_do_launch */ + mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); + if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { + goto bad_cr3; + } + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); + if (old_base_mfn) + put_page(mfn_to_page(old_base_mfn)); + /* + * arch.shadow_table should now hold the next CR3 for shadow + */ + v->arch.hvm_vmx.cpu_cr3 = c->cr3; + } + + skip_cr3: +#if defined(__x86_64__) + if (vmx_long_mode_enabled(v)) { + unsigned long vm_entry_value; + vm_entry_value = __vmread(VM_ENTRY_CONTROLS); + vm_entry_value |= VM_ENTRY_IA32E_MODE; + __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); + } +#endif + + __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK)); + v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4; + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); + + __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit); + __vmwrite(GUEST_IDTR_BASE, c->idtr_base); + + __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit); + __vmwrite(GUEST_GDTR_BASE, c->gdtr_base); + + __vmwrite(GUEST_CS_SELECTOR, c->cs_sel); + __vmwrite(GUEST_CS_LIMIT, c->cs_limit); + __vmwrite(GUEST_CS_BASE, c->cs_base); + __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes); + + __vmwrite(GUEST_DS_SELECTOR, c->ds_sel); + __vmwrite(GUEST_DS_LIMIT, c->ds_limit); + __vmwrite(GUEST_DS_BASE, c->ds_base); + __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes); + + __vmwrite(GUEST_ES_SELECTOR, c->es_sel); + __vmwrite(GUEST_ES_LIMIT, c->es_limit); + __vmwrite(GUEST_ES_BASE, c->es_base); + __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes); + + __vmwrite(GUEST_SS_SELECTOR, c->ss_sel); + __vmwrite(GUEST_SS_LIMIT, c->ss_limit); + __vmwrite(GUEST_SS_BASE, c->ss_base); + __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes); + + __vmwrite(GUEST_FS_SELECTOR, c->fs_sel); + __vmwrite(GUEST_FS_LIMIT, c->fs_limit); + __vmwrite(GUEST_FS_BASE, c->fs_base); + __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes); + + __vmwrite(GUEST_GS_SELECTOR, c->gs_sel); + __vmwrite(GUEST_GS_LIMIT, c->gs_limit); + __vmwrite(GUEST_GS_BASE, c->gs_base); + __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes); + + __vmwrite(GUEST_TR_SELECTOR, c->tr_sel); + __vmwrite(GUEST_TR_LIMIT, c->tr_limit); + __vmwrite(GUEST_TR_BASE, c->tr_base); + __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes); + + __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel); + __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit); + __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); + __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes); + + __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs); + __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp); + __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip); + + vmx_vmcs_exit(v); + + shadow_update_paging_modes(v); + return 0; + + bad_cr3: + gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3); + vmx_vmcs_exit(v); + return -EINVAL; +} + +#ifdef HVM_DEBUG_SUSPEND +static void dump_msr_state(struct vmx_msr_state *m) +{ + int i = 0; + printk("**** msr state ****\n"); + printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags); + for (i = 0; i < VMX_MSR_COUNT; i++) + printk("0x%lx,", m->msrs[i]); + printk("\n"); +} +#else +static void dump_msr_state(struct vmx_msr_state *m) +{ +} +#endif + +void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + struct vmcs_data *data = &ctxt->data; + struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state; + unsigned long guest_flags = guest_state->flags; + int i = 0; + + data->shadow_gs = guest_state->shadow_gs; + data->vmxassist_enabled = v->arch.hvm_vmx.vmxassist_enabled; + /* save msrs */ + data->flags = guest_flags; + for (i = 0; i < VMX_MSR_COUNT; i++) + data->msr_items[i] = guest_state->msrs[i]; + + dump_msr_state(guest_state); +} + +void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + int i = 0; + struct vmcs_data *data = &ctxt->data; + struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state; + + /* restore msrs */ + guest_state->flags = data->flags; + for (i = 0; i < VMX_MSR_COUNT; i++) + guest_state->msrs[i] = data->msr_items[i]; + + guest_state->shadow_gs = data->shadow_gs; + + /*XXX:no need to restore msrs, current!=vcpu as not called by arch_vmx_do_launch */ +/* vmx_restore_guest_msrs(v);*/ + + v->arch.hvm_vmx.vmxassist_enabled = data->vmxassist_enabled; + + dump_msr_state(guest_state); +} + +void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + struct vmcs_data *data = &ctxt->data; + + vmx_save_cpu_state(v, ctxt); + + vmx_vmcs_enter(v); + + vmx_vmcs_save(v, data); + + vmx_vmcs_exit(v); + +} + +void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + vmx_load_cpu_state(v, ctxt); + + if (vmx_vmcs_restore(v, &ctxt->data)) { + printk("vmx_vmcs restore failed!\n"); + domain_crash(v->domain); + } + + /* only load vmcs once */ + ctxt->valid = 0; + +} + /* * DR7 is saved and restored on every vmexit. Other debug registers only * need to be restored if their value is going to affect execution -- i.e., @@ -720,6 +1013,9 @@ static void vmx_setup_hvm_funcs(void) hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; + + hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt; + hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt; hvm_funcs.paging_enabled = vmx_paging_enabled; hvm_funcs.long_mode_enabled = vmx_long_mode_enabled; diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/vpic.c --- a/xen/arch/x86/hvm/vpic.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/vpic.c Thu Jan 18 18:25:04 2007 +0000 @@ -378,6 +378,87 @@ static int vpic_intercept_elcr_io(ioreq_ return 1; } +#ifdef HVM_DEBUG_SUSPEND +static void vpic_info(struct vpic *s) +{ + printk("*****pic state:*****\n"); + printk("pic 0x%x.\n", s->irr); + printk("pic 0x%x.\n", s->imr); + printk("pic 0x%x.\n", s->isr); + printk("pic 0x%x.\n", s->irq_base); + printk("pic 0x%x.\n", s->init_state); + printk("pic 0x%x.\n", s->priority_add); + printk("pic 0x%x.\n", s->readsel_isr); + printk("pic 0x%x.\n", s->poll); + printk("pic 0x%x.\n", s->auto_eoi); + printk("pic 0x%x.\n", s->rotate_on_auto_eoi); + printk("pic 0x%x.\n", s->special_fully_nested_mode); + printk("pic 0x%x.\n", s->special_mask_mode); + printk("pic 0x%x.\n", s->elcr); + printk("pic 0x%x.\n", s->int_output); + printk("pic 0x%x.\n", s->is_master); +} +#else +static void vpic_info(struct vpic *s) +{ +} +#endif + +static void vpic_save(hvm_domain_context_t *h, void *opaque) +{ + struct vpic *s = opaque; + + vpic_info(s); + + hvm_put_8u(h, s->irr); + hvm_put_8u(h, s->imr); + hvm_put_8u(h, s->isr); + hvm_put_8u(h, s->irq_base); + hvm_put_8u(h, s->init_state); + hvm_put_8u(h, s->priority_add); + hvm_put_8u(h, s->readsel_isr); + + hvm_put_8u(h, s->poll); + hvm_put_8u(h, s->auto_eoi); + + hvm_put_8u(h, s->rotate_on_auto_eoi); + hvm_put_8u(h, s->special_fully_nested_mode); + hvm_put_8u(h, s->special_mask_mode); + + hvm_put_8u(h, s->elcr); + hvm_put_8u(h, s->int_output); +} + +static int vpic_load(hvm_domain_context_t *h, void *opaque, int version_id) +{ + struct vpic *s = opaque; + + if (version_id != 1) + return -EINVAL; + + s->irr = hvm_get_8u(h); + s->imr = hvm_get_8u(h); + s->isr = hvm_get_8u(h); + s->irq_base = hvm_get_8u(h); + s->init_state = hvm_get_8u(h); + s->priority_add = hvm_get_8u(h); + s->readsel_isr = hvm_get_8u(h); + + s->poll = hvm_get_8u(h); + s->auto_eoi = hvm_get_8u(h); + + s->rotate_on_auto_eoi = hvm_get_8u(h); + s->special_fully_nested_mode = hvm_get_8u(h); + s->special_mask_mode = hvm_get_8u(h); + + s->elcr = hvm_get_8u(h); + s->int_output = hvm_get_8u(h); + + vpic_info(s); + + return 0; +} + void vpic_init(struct domain *d) { struct vpic *vpic; @@ -387,12 +468,14 @@ void vpic_init(struct domain *d) memset(vpic, 0, sizeof(*vpic)); vpic->is_master = 1; vpic->elcr = 1 << 2; + hvm_register_savevm(d, "xen_hvm_i8259", 0x20, 1, vpic_save, vpic_load, vpic); register_portio_handler(d, 0x20, 2, vpic_intercept_pic_io); register_portio_handler(d, 0x4d0, 1, vpic_intercept_elcr_io); /* Slave PIC. */ vpic++; memset(vpic, 0, sizeof(*vpic)); + hvm_register_savevm(d, "xen_hvm_i8259", 0xa0, 1, vpic_save, vpic_load, vpic); register_portio_handler(d, 0xa0, 2, vpic_intercept_pic_io); register_portio_handler(d, 0x4d1, 1, vpic_intercept_elcr_io); } diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/hvm/vpt.c Thu Jan 18 18:25:04 2007 +0000 @@ -195,7 +195,7 @@ void pt_reset(struct vcpu *v) } } -void create_periodic_time(struct periodic_time *pt, uint64_t period, +void create_periodic_time(struct vcpu *v, struct periodic_time *pt, uint64_t period, uint8_t irq, char one_shot, time_cb *cb, void *data) { destroy_periodic_time(pt); @@ -209,7 +209,7 @@ void create_periodic_time(struct periodi period = 900000; /* force to 0.9ms */ } pt->period = period; - pt->vcpu = current; + pt->vcpu = v; pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu); pt->irq = irq; pt->period_cycles = (u64)period * cpu_khz / 1000000L; @@ -218,7 +218,7 @@ void create_periodic_time(struct periodi pt->cb = cb; pt->priv = data; - list_add(&pt->list, ¤t->arch.hvm_vcpu.tm_list); + list_add(&pt->list, &v->arch.hvm_vcpu.tm_list); set_timer(&pt->timer, pt->scheduled); } diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/mm.c Thu Jan 18 18:25:04 2007 +0000 @@ -154,6 +154,15 @@ l2_pgentry_t *compat_idle_pg_table_l2 = #define l3_disallow_mask(d) L3_DISALLOW_MASK #endif +static void queue_deferred_ops(struct domain *d, unsigned int ops) +{ + if ( d == current->domain ) + this_cpu(percpu_mm_info).deferred_ops |= ops; + else + BUG_ON(!test_bit(_DOMF_paused, &d->domain_flags) || + !cpus_empty(d->domain_dirty_cpumask)); +} + void __init init_frametable(void) { unsigned long nr_pages, page_step, i, mfn; @@ -416,8 +425,7 @@ void invalidate_shadow_ldt(struct vcpu * } /* Dispose of the (now possibly invalid) mappings from the TLB. */ - ASSERT(v->processor == smp_processor_id()); - this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; + queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT); } @@ -945,7 +953,8 @@ static int create_pae_xen_mappings(struc } #else memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], - &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + &compat_idle_pg_table_l2[ + l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e)); #endif unmap_domain_page(pl2e); @@ -1561,7 +1570,7 @@ void free_page_type(struct page_info *pa * (e.g., update_va_mapping()) or we could end up modifying a page * that is no longer a page table (and hence screw up ref counts). */ - this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; + queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); if ( unlikely(shadow_mode_enabled(owner)) ) { @@ -1759,24 +1768,14 @@ int new_guest_cr3(unsigned long mfn) int okay; unsigned long old_base_mfn; - if ( is_hvm_domain(d) && !hvm_paging_enabled(v) ) - return 0; - #ifdef CONFIG_COMPAT if ( IS_COMPAT(d) ) { - l4_pgentry_t l4e = l4e_from_pfn(mfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); - - if ( shadow_mode_refcounts(d) ) - { - okay = get_page_from_pagenr(mfn, d); - old_base_mfn = l4e_get_pfn(l4e); - if ( okay && old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - } - else - okay = mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), - l4e, 0); + okay = shadow_mode_refcounts(d) + ? 0 /* Old code was broken, but what should it be? */ + : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), + l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW| + _PAGE_USER|_PAGE_ACCESSED)), 0); if ( unlikely(!okay) ) { MEM_LOG("Error while installing new compat baseptr %lx", mfn); @@ -1789,41 +1788,13 @@ int new_guest_cr3(unsigned long mfn) return 1; } #endif - if ( shadow_mode_refcounts(d) ) - { - okay = get_page_from_pagenr(mfn, d); - if ( unlikely(!okay) ) - { - MEM_LOG("Error while installing new baseptr %lx", mfn); - return 0; - } - } - else - { - okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); - if ( unlikely(!okay) ) - { - /* Switch to idle pagetable: this VCPU has no active p.t. now. */ - MEM_LOG("New baseptr %lx: slow path via idle pagetables", mfn); - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); - update_cr3(v); - write_cr3(__pa(idle_pg_table)); - if ( old_base_mfn != 0 ) - put_page_and_type(mfn_to_page(old_base_mfn)); - - /* Retry the validation with no active p.t. for this VCPU. */ - okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); - if ( !okay ) - { - /* Failure here is unrecoverable: the VCPU has no pagetable! */ - MEM_LOG("Fatal error while installing new baseptr %lx", mfn); - domain_crash(d); - ASSERT(v->processor == smp_processor_id()); - this_cpu(percpu_mm_info).deferred_ops = 0; - return 0; - } - } + okay = shadow_mode_refcounts(d) + ? get_page_from_pagenr(mfn, d) + : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new baseptr %lx", mfn); + return 0; } invalidate_shadow_ldt(v); @@ -1831,7 +1802,7 @@ int new_guest_cr3(unsigned long mfn) old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - update_cr3(v); /* update shadow_table and cr3 fields of vcpu struct */ + update_cr3(v); write_ptbase(v); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/mm/shadow/common.c Thu Jan 18 18:25:04 2007 +0000 @@ -2184,10 +2184,11 @@ int sh_remove_all_mappings(struct vcpu * expected_count = (page->count_info & PGC_allocated) ? 1 : 0; if ( (page->count_info & PGC_count_mask) != expected_count ) { - /* Don't complain if we're in HVM and there's one extra mapping: - * The qemu helper process has an untyped mapping of this dom's RAM */ + /* Don't complain if we're in HVM and there are some extra mappings: + * The qemu helper process has an untyped mapping of this dom's RAM + * and the HVM restore program takes another. */ if ( !(shadow_mode_external(v->domain) - && (page->count_info & PGC_count_mask) <= 2 + && (page->count_info & PGC_count_mask) <= 3 && (page->u.inuse.type_info & PGT_count_mask) == 0) ) { SHADOW_ERROR("can't find all mappings of mfn %lx: " diff -r 58637a0a7c7e -r e2ca6bd16046 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Jan 18 18:25:04 2007 +0000 @@ -1627,7 +1627,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf default: /* Do nothing */ break; } } - + shadow_promote(v, gmfn, shadow_type); set_shadow_status(v, gmfn, shadow_type, smfn); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/common/domain.c --- a/xen/common/domain.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/common/domain.c Thu Jan 18 18:25:04 2007 +0000 @@ -24,6 +24,7 @@ #include <xen/percpu.h> #include <xen/multicall.h> #include <asm/debugger.h> +#include <asm/hvm/support.h> #include <public/sched.h> #include <public/vcpu.h> #ifdef CONFIG_COMPAT @@ -505,6 +506,14 @@ int set_info_guest(struct domain *d, if ( rc == 0 ) rc = arch_set_info_guest(v, c); + /*XXX: hvm smp guest restore support */ + if ( rc == 0 && + v->vcpu_id != 0 && + is_hvm_vcpu(v) && + test_and_clear_bit(_VCPUF_down, &v->vcpu_flags) ) { + vcpu_wake(v); + } + domain_unpause(d); xfree(c.nat); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/common/domctl.c --- a/xen/common/domctl.c Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/common/domctl.c Thu Jan 18 18:25:04 2007 +0000 @@ -215,6 +215,39 @@ ret_t do_domctl(XEN_GUEST_HANDLE(xen_dom } break; + case XEN_DOMCTL_sethvmcontext: + { + struct hvm_domain_context *c; + struct domain *d; + struct vcpu *v; + + ret = -ESRCH; + if ( (d = find_domain_by_id(op->domain)) == NULL ) + break; + + ret = -ENOMEM; + if ( (c = xmalloc(struct hvm_domain_context)) == NULL ) + goto sethvmcontext_out; + + v = d->vcpu[0]; + + ret = -EFAULT; + +#ifndef CONFIG_COMPAT + if ( copy_from_guest(c, op->u.hvmcontext.ctxt, 1) != 0 ) + goto sethvmcontext_out; + + ret = arch_sethvm_ctxt(v, c); +#endif + + xfree(c); + + sethvmcontext_out: + put_domain(d); + + } + break; + case XEN_DOMCTL_pausedomain: { struct domain *d = find_domain_by_id(op->domain); @@ -552,6 +585,46 @@ ret_t do_domctl(XEN_GUEST_HANDLE(xen_dom } break; + case XEN_DOMCTL_gethvmcontext: + { + struct hvm_domain_context *c; + struct domain *d; + struct vcpu *v; + + ret = -ESRCH; + if ( (d = find_domain_by_id(op->domain)) == NULL ) + break; + + ret = -ENOMEM; + if ( (c = xmalloc(struct hvm_domain_context)) == NULL ) + goto gethvmcontext_out; + + v = d->vcpu[0]; + + ret = -ENODATA; + if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + goto gethvmcontext_out; + + ret = 0; + if (arch_gethvm_ctxt(v, c) == -1) + ret = -EFAULT; + +#ifndef CONFIG_COMPAT + if ( copy_to_guest(op->u.hvmcontext.ctxt, c, 1) ) + ret = -EFAULT; + + xfree(c); +#endif + + if ( copy_to_guest(u_domctl, op, 1) ) + ret = -EFAULT; + + gethvmcontext_out: + put_domain(d); + + } + break; + case XEN_DOMCTL_getvcpuinfo: { struct domain *d; diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/asm-x86/hvm/domain.h Thu Jan 18 18:25:04 2007 +0000 @@ -27,6 +27,20 @@ #include <asm/hvm/io.h> #include <public/hvm/params.h> +typedef void SaveStateHandler(hvm_domain_context_t *h, void *opaque); +typedef int LoadStateHandler(hvm_domain_context_t *h, void *opaque, int version_id); + +#define HVM_SE_IDSTR_LEN 32 +typedef struct HVMStateEntry { + char idstr[HVM_SE_IDSTR_LEN]; + int instance_id; + int version_id; + SaveStateHandler *save_state; + LoadStateHandler *load_state; + void *opaque; + struct HVMStateEntry *next; +} HVMStateEntry; + struct hvm_domain { unsigned long shared_page_va; unsigned long buffered_io_va; @@ -44,6 +58,9 @@ struct hvm_domain { spinlock_t pbuf_lock; uint64_t params[HVM_NR_PARAMS]; + + struct hvm_domain_context *hvm_ctxt; + HVMStateEntry *first_se; }; #endif /* __ASM_X86_HVM_DOMAIN_H__ */ diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Jan 18 18:25:04 2007 +0000 @@ -79,6 +79,13 @@ struct hvm_function_table { struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs); void (*load_cpu_guest_regs)( struct vcpu *v, struct cpu_user_regs *r); + + /* save and load hvm guest cpu context for save/restore */ + void (*save_cpu_ctxt)( + struct vcpu *v, struct hvmcpu_context *ctxt); + void (*load_cpu_ctxt)( + struct vcpu *v, struct hvmcpu_context *ctxt); + /* * Examine specifics of the guest state: * 1) determine whether paging is enabled, @@ -157,6 +164,35 @@ hvm_load_cpu_guest_regs(struct vcpu *v, hvm_funcs.load_cpu_guest_regs(v, r); } +void hvm_set_guest_time(struct vcpu *v, u64 gtime); +u64 hvm_get_guest_time(struct vcpu *v); + +static inline void +hvm_save_cpu_context( + struct vcpu *v, struct hvmcpu_context *ctxt) +{ + hvm_funcs.save_cpu_ctxt(v, ctxt); + + /* save guest time */ + ctxt->gtime = hvm_get_guest_time(v); + + /* set valid flag to recover whole vmcs when restore */ + ctxt->valid = 0x55885588; +} + +static inline void +hvm_load_cpu_context( + struct vcpu *v, struct hvmcpu_context *ctxt) +{ + if ( ctxt->valid != 0x55885588) + return; + + hvm_funcs.load_cpu_ctxt(v, ctxt); + + /* restore guest time*/ + hvm_set_guest_time(v, ctxt->gtime); +} + static inline int hvm_paging_enabled(struct vcpu *v) { @@ -222,8 +258,6 @@ void hvm_cpuid(unsigned int input, unsig void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); void hvm_stts(struct vcpu *v); -void hvm_set_guest_time(struct vcpu *v, u64 gtime); -u64 hvm_get_guest_time(struct vcpu *v); void hvm_migrate_timers(struct vcpu *v); void hvm_do_resume(struct vcpu *v); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/asm-x86/hvm/support.h Thu Jan 18 18:25:04 2007 +0000 @@ -121,6 +121,133 @@ extern unsigned int opt_hvm_debug_level; #define TRACE_VMEXIT(index, value) \ current->arch.hvm_vcpu.hvm_trace_values[index] = (value) +/* save/restore support */ + +//#define HVM_DEBUG_SUSPEND + +extern int hvm_register_savevm(struct domain *d, + const char *idstr, + int instance_id, + int version_id, + SaveStateHandler *save_state, + LoadStateHandler *load_state, + void *opaque); + +static inline void hvm_ctxt_seek(hvm_domain_context_t *h, unsigned int pos) +{ + h->cur = pos; +} + +static inline uint32_t hvm_ctxt_tell(hvm_domain_context_t *h) +{ + return h->cur; +} + +static inline int hvm_ctxt_end(hvm_domain_context_t *h) +{ + return (h->cur >= h->size || h->cur >= HVM_CTXT_SIZE); +} + +static inline void hvm_put_byte(hvm_domain_context_t *h, unsigned int i) +{ + if (h->cur >= HVM_CTXT_SIZE) { + h->cur++; + return; + } + h->data[h->cur++] = (char)i; +} + +static inline void hvm_put_8u(hvm_domain_context_t *h, uint8_t b) +{ + hvm_put_byte(h, b); +} + +static inline void hvm_put_16u(hvm_domain_context_t *h, uint16_t b) +{ + hvm_put_8u(h, b >> 8); + hvm_put_8u(h, b); +} + +static inline void hvm_put_32u(hvm_domain_context_t *h, uint32_t b) +{ + hvm_put_16u(h, b >> 16); + hvm_put_16u(h, b); +} + +static inline void hvm_put_64u(hvm_domain_context_t *h, uint64_t b) +{ + hvm_put_32u(h, b >> 32); + hvm_put_32u(h, b); +} + +static inline void hvm_put_buffer(hvm_domain_context_t *h, const char *buf, int len) +{ + memcpy(&h->data[h->cur], buf, len); + h->cur += len; +} + + +static inline char hvm_get_byte(hvm_domain_context_t *h) +{ + if (h->cur >= HVM_CTXT_SIZE) { + printk("hvm_get_byte overflow.\n"); + return -1; + } + + if (h->cur >= h->size) { + printk("hvm_get_byte exceed data area.\n"); + return -1; + } + + return h->data[h->cur++]; +} + +static inline uint8_t hvm_get_8u(hvm_domain_context_t *h) +{ + return hvm_get_byte(h); +} + +static inline uint16_t hvm_get_16u(hvm_domain_context_t *h) +{ + uint16_t v; + v = hvm_get_8u(h) << 8; + v |= hvm_get_8u(h); + + return v; +} + +static inline uint32_t hvm_get_32u(hvm_domain_context_t *h) +{ + uint32_t v; + v = hvm_get_16u(h) << 16; + v |= hvm_get_16u(h); + + return v; +} + +static inline uint64_t hvm_get_64u(hvm_domain_context_t *h) +{ + uint64_t v; + v = (uint64_t)hvm_get_32u(h) << 32; + v |= hvm_get_32u(h); + + return v; +} + +static inline void hvm_get_buffer(hvm_domain_context_t *h, char *buf, int len) +{ + memcpy(buf, &h->data[h->cur], len); + h->cur += len; +} + +extern int hvm_save(struct vcpu*, hvm_domain_context_t *h); +extern int hvm_load(struct vcpu*, hvm_domain_context_t *h); + +extern int arch_sethvm_ctxt(struct vcpu *v, struct hvm_domain_context *c); +extern int arch_gethvm_ctxt(struct vcpu *v, struct hvm_domain_context *c); + +extern void shpage_init(struct domain *d, shared_iopage_t *sp); + extern int hvm_enabled; int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/asm-x86/hvm/vpt.h --- a/xen/include/asm-x86/hvm/vpt.h Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/asm-x86/hvm/vpt.h Thu Jan 18 18:25:04 2007 +0000 @@ -152,7 +152,7 @@ struct periodic_time *is_pt_irq(struct v struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type); void pt_intr_post(struct vcpu *v, int vector, int type); void pt_reset(struct vcpu *v); -void create_periodic_time(struct periodic_time *pt, uint64_t period, +void create_periodic_time(struct vcpu *v, struct periodic_time *pt, uint64_t period, uint8_t irq, char one_shot, time_cb *cb, void *data); void destroy_periodic_time(struct periodic_time *pt); diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/public/arch-x86/xen.h --- a/xen/include/public/arch-x86/xen.h Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/public/arch-x86/xen.h Thu Jan 18 18:25:04 2007 +0000 @@ -107,6 +107,70 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t); DEFINE_XEN_GUEST_HANDLE(trap_info_t); typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * World vmcs state + */ +struct vmcs_data { + uint64_t eip; /* execution pointer */ + uint64_t esp; /* stack pointer */ + uint64_t eflags; /* flags register */ + uint64_t cr0; + uint64_t cr3; /* page table directory */ + uint64_t cr4; + uint32_t idtr_limit; /* idt */ + uint64_t idtr_base; + uint32_t gdtr_limit; /* gdt */ + uint64_t gdtr_base; + uint32_t cs_sel; /* cs selector */ + uint32_t cs_limit; + uint64_t cs_base; + uint32_t cs_arbytes; + uint32_t ds_sel; /* ds selector */ + uint32_t ds_limit; + uint64_t ds_base; + uint32_t ds_arbytes; + uint32_t es_sel; /* es selector */ + uint32_t es_limit; + uint64_t es_base; + uint32_t es_arbytes; + uint32_t ss_sel; /* ss selector */ + uint32_t ss_limit; + uint64_t ss_base; + uint32_t ss_arbytes; + uint32_t fs_sel; /* fs selector */ + uint32_t fs_limit; + uint64_t fs_base; + uint32_t fs_arbytes; + uint32_t gs_sel; /* gs selector */ + uint32_t gs_limit; + uint64_t gs_base; + uint32_t gs_arbytes; + uint32_t tr_sel; /* task selector */ + uint32_t tr_limit; + uint64_t tr_base; + uint32_t tr_arbytes; + uint32_t ldtr_sel; /* ldtr selector */ + uint32_t ldtr_limit; + uint64_t ldtr_base; + uint32_t ldtr_arbytes; + uint32_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + /* msr for em64t */ + uint64_t shadow_gs; + uint64_t flags; + /* same size as VMX_MSR_COUNT */ + uint64_t msr_items[6]; + uint64_t vmxassist_enabled; +}; +typedef struct vmcs_data vmcs_data_t; + +struct hvmcpu_context { + uint32_t valid; + struct vmcs_data data; + uint64_t gtime; +}; /* * The following is all CPU context. Note that the fpu_ctxt block is filled @@ -154,6 +218,7 @@ struct vcpu_guest_context { #endif #endif unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ + struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */ #ifdef __x86_64__ /* Segment base addresses. */ uint64_t fs_base; diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/public/domctl.h Thu Jan 18 18:25:04 2007 +0000 @@ -386,6 +386,21 @@ struct xen_domctl_settimeoffset { }; typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); + +#define HVM_CTXT_SIZE 6144 +typedef struct hvm_domain_context { + uint32_t cur; + uint32_t size; + uint8_t data[HVM_CTXT_SIZE]; +} hvm_domain_context_t; +DEFINE_XEN_GUEST_HANDLE(hvm_domain_context_t); + +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +typedef struct xen_domctl_hvmcontext { + XEN_GUEST_HANDLE(hvm_domain_context_t) ctxt; /* IN/OUT */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); #define XEN_DOMCTL_real_mode_area 26 struct xen_domctl_real_mode_area { @@ -423,6 +438,7 @@ struct xen_domctl { struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_hvmcontext hvmcontext; uint8_t pad[128]; } u; }; diff -r 58637a0a7c7e -r e2ca6bd16046 xen/include/xlat.lst --- a/xen/include/xlat.lst Wed Jan 17 21:45:34 2007 -0700 +++ b/xen/include/xlat.lst Thu Jan 18 18:25:04 2007 +0000 @@ -8,6 +8,8 @@ ? vcpu_time_info xen.h ! cpu_user_regs arch-x86/xen-@arch@.h ! trap_info arch-x86/xen.h +! hvmcpu_context arch-x86/xen.h +! vmcs_data arch-x86/xen.h ! vcpu_guest_context arch-x86/xen.h ? acm_getdecision acm_ops.h ! ctl_cpumap domctl.h _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |