because recent changes in unstable make rebase hard, we submit this patch now
with hope that check in first and fix bug over time if possible.
thanks,
--
best rgds,
edwin
------------------------------------------------------------------------
Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx>
Signed-off-by: Nakajima Jun <jun.nakajima@xxxxxxxxx>
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/ioemu/hw/cirrus_vga.c Wed Dec 13 22:52:02 2006 +0800
@@ -3010,11 +3010,44 @@ static CPUWriteMemoryFunc *cirrus_mmio_w
cirrus_mmio_writel,
};
+void cirrus_stop_acc(CirrusVGAState *s)
+{
+ if (s->map_addr){
+ int error;
+ s->map_addr = 0;
+ error = unset_vram_mapping(s->cirrus_lfb_addr,
+ s->cirrus_lfb_end);
+ fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
+
+ munmap(s->vram_ptr, VGA_RAM_SIZE);
+ }
+}
+
+void cirrus_restart_acc(CirrusVGAState *s)
+{
+ if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+ void *vram_pointer, *old_vram;
+ fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx,
lfb_end=0x%lx.\n",
+ s->cirrus_lfb_addr, s->cirrus_lfb_end);
+ vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
+ if (!vram_pointer){
+ fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
+ } else {
+ old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+ VGA_RAM_SIZE);
+ qemu_free(old_vram);
+ s->map_addr = s->cirrus_lfb_addr;
+ s->map_end = s->cirrus_lfb_end;
+ }
+ }
+}
+
/* load/save state */
static void cirrus_vga_save(QEMUFile *f, void *opaque)
{
CirrusVGAState *s = opaque;
+ uint8_t vga_acc;
qemu_put_be32s(f, &s->latch);
qemu_put_8s(f, &s->sr_index);
@@ -3049,11 +3082,20 @@ static void cirrus_vga_save(QEMUFile *f,
qemu_put_be32s(f, &s->hw_cursor_y);
/* XXX: we do not save the bitblt state - we assume we do not save
the state when the blitter is active */
+
+ vga_acc = (!!s->map_addr);
+ qemu_put_8s(f, &vga_acc);
+ qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+ qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+ qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
+ if (vga_acc)
+ cirrus_stop_acc(s);
}
static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
{
CirrusVGAState *s = opaque;
+ uint8_t vga_acc = 0;
if (version_id != 1)
return -EINVAL;
@@ -3091,6 +3133,14 @@ static int cirrus_vga_load(QEMUFile *f,
qemu_get_be32s(f, &s->hw_cursor_x);
qemu_get_be32s(f, &s->hw_cursor_y);
+
+ qemu_get_8s(f, &vga_acc);
+ qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+ qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+ qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
+ if (vga_acc){
+ cirrus_restart_acc(s);
+ }
/* force refresh */
s->graphic_mode = -1;
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/ioemu/target-i386-dm/helper2.c Wed Dec 13 22:52:02 2006 +0800
@@ -525,6 +525,7 @@ int main_loop(void)
{
extern int vm_running;
extern int shutdown_requested;
+ extern int suspend_requested;
CPUState *env = cpu_single_env;
int evtchn_fd = xc_evtchn_fd(xce_handle);
@@ -542,12 +543,24 @@ int main_loop(void)
qemu_system_reset();
reset_requested = 0;
}
+ if (suspend_requested) {
+ fprintf(logfile, "device model received suspend signal!\n");
+ break;
+ }
}
/* Wait up to 10 msec. */
main_loop_wait(10);
}
- destroy_hvm_domain();
+ if (!suspend_requested)
+ destroy_hvm_domain();
+ else {
+ char qemu_file[20];
+ sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid);
+ if (qemu_savevm(qemu_file) < 0)
+ fprintf(stderr, "qemu save fail.\n");
+ }
+
return 0;
}
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/ioemu/target-i386-dm/piix_pci-dm.c
--- a/tools/ioemu/target-i386-dm/piix_pci-dm.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/ioemu/target-i386-dm/piix_pci-dm.c Wed Dec 13 22:52:02 2006 +0800
@@ -83,6 +83,11 @@ PCIBus *i440fx_init(void)
/* PIIX3 PCI to ISA bridge */
static PCIDevice *piix3_dev;
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+{
+ /* This is the barber's pole mapping used by Xen. */
+ return (irq_num + (pci_dev->devfn >> 3)) & 3;
+}
static void piix3_write_config(PCIDevice *d,
uint32_t address, uint32_t val, int len)
@@ -150,3 +155,227 @@ int piix3_init(PCIBus *bus)
}
void pci_bios_init(void) {}
+
+/***********************************************************/
+/* XXX: the following should be moved to the PC BIOS */
+
+static __attribute__((unused)) uint32_t isa_inb(uint32_t addr)
+{
+ return cpu_inb(NULL, addr);
+}
+
+static void isa_outb(uint32_t val, uint32_t addr)
+{
+ cpu_outb(NULL, addr, val);
+}
+
+static __attribute__((unused)) uint32_t isa_inw(uint32_t addr)
+{
+ return cpu_inw(NULL, addr);
+}
+
+static __attribute__((unused)) void isa_outw(uint32_t val, uint32_t addr)
+{
+ cpu_outw(NULL, addr, val);
+}
+
+static __attribute__((unused)) uint32_t isa_inl(uint32_t addr)
+{
+ return cpu_inl(NULL, addr);
+}
+
+static __attribute__((unused)) void isa_outl(uint32_t val, uint32_t addr)
+{
+ cpu_outl(NULL, addr, val);
+}
+
+static uint32_t pci_bios_io_addr;
+static uint32_t pci_bios_mem_addr;
+/* host irqs corresponding to PCI irqs A-D */
+static uint8_t pci_irqs[4] = { 5, 6, 10, 11 };
+
+static void pci_config_writel(PCIDevice *d, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = d->bus;
+ addr |= (pci_bus_num(s) << 16) | (d->devfn << 8);
+ pci_data_write(s, addr, val, 4);
+}
+
+static void pci_config_writew(PCIDevice *d, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = d->bus;
+ addr |= (pci_bus_num(s) << 16) | (d->devfn << 8);
+ pci_data_write(s, addr, val, 2);
+}
+
+static void pci_config_writeb(PCIDevice *d, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = d->bus;
+ addr |= (pci_bus_num(s) << 16) | (d->devfn << 8);
+ pci_data_write(s, addr, val, 1);
+}
+
+static __attribute__((unused)) uint32_t pci_config_readl(PCIDevice *d,
uint32_t addr)
+{
+ PCIBus *s = d->bus;
+ addr |= (pci_bus_num(s) << 16) | (d->devfn << 8);
+ return pci_data_read(s, addr, 4);
+}
+
+static uint32_t pci_config_readw(PCIDevice *d, uint32_t addr)
+{
+ PCIBus *s = d->bus;
+ addr |= (pci_bus_num(s) << 16) | (d->devfn << 8);
+ return pci_data_read(s, addr, 2);
+}
+
+static uint32_t pci_config_readb(PCIDevice *d, uint32_t addr)
+{
+ PCIBus *s = d->bus;
+ addr |= (pci_bus_num(s) << 16) | (d->devfn << 8);
+ return pci_data_read(s, addr, 1);
+}
+
+static void pci_set_io_region_addr(PCIDevice *d, int region_num, uint32_t addr)
+{
+ PCIIORegion *r;
+ uint16_t cmd;
+ uint32_t ofs;
+
+ if ( region_num == PCI_ROM_SLOT ) {
+ ofs = 0x30;
+ }else{
+ ofs = 0x10 + region_num * 4;
+ }
+
+ pci_config_writel(d, ofs, addr);
+ r = &d->io_regions[region_num];
+
+ /* enable memory mappings */
+ cmd = pci_config_readw(d, PCI_COMMAND);
+ if ( region_num == PCI_ROM_SLOT )
+ cmd |= 2;
+ else if (r->type & PCI_ADDRESS_SPACE_IO)
+ cmd |= 1;
+ else
+ cmd |= 2;
+ pci_config_writew(d, PCI_COMMAND, cmd);
+}
+
+static void pci_bios_init_device(PCIDevice *d)
+{
+ int class;
+ PCIIORegion *r;
+ uint32_t *paddr;
+ int i, pin, pic_irq, vendor_id, device_id;
+
+ class = pci_config_readw(d, PCI_CLASS_DEVICE);
+ vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
+ device_id = pci_config_readw(d, PCI_DEVICE_ID);
+ switch(class) {
+ case 0x0101:
+ if (vendor_id == 0x8086 && device_id == 0x7010) {
+ /* PIIX3 IDE */
+ pci_config_writew(d, 0x40, 0x8000); // enable IDE0
+ pci_config_writew(d, 0x42, 0x8000); // enable IDE1
+ goto default_map;
+ } else {
+ /* IDE: we map it as in ISA mode */
+ pci_set_io_region_addr(d, 0, 0x1f0);
+ pci_set_io_region_addr(d, 1, 0x3f4);
+ pci_set_io_region_addr(d, 2, 0x170);
+ pci_set_io_region_addr(d, 3, 0x374);
+ }
+ break;
+ case 0x0680:
+ if (vendor_id == 0x8086 && device_id == 0x7113) {
+ /*
+ * PIIX4 ACPI PM.
+ * Special device with special PCI config space. No ordinary BARs.
+ */
+ pci_config_writew(d, 0x20, 0x0000); // No smb bus IO enable
+ pci_config_writew(d, 0x22, 0x0000);
+ pci_config_writew(d, 0x3c, 0x0009); // Hardcoded IRQ9
+ pci_config_writew(d, 0x3d, 0x0001);
+ }
+ break;
+ case 0x0300:
+ if (vendor_id != 0x1234)
+ goto default_map;
+ /* VGA: map frame buffer to default Bochs VBE address */
+ pci_set_io_region_addr(d, 0, 0xE0000000);
+ break;
+ case 0x0800:
+ /* PIC */
+ vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
+ device_id = pci_config_readw(d, PCI_DEVICE_ID);
+ if (vendor_id == 0x1014) {
+ /* IBM */
+ if (device_id == 0x0046 || device_id == 0xFFFF) {
+ /* MPIC & MPIC2 */
+ pci_set_io_region_addr(d, 0, 0x80800000 + 0x00040000);
+ }
+ }
+ break;
+ case 0xff00:
+ if (vendor_id == 0x0106b &&
+ (device_id == 0x0017 || device_id == 0x0022)) {
+ /* macio bridge */
+ pci_set_io_region_addr(d, 0, 0x80800000);
+ }
+ break;
+ default:
+ default_map:
+ /* default memory mappings */
+ for(i = 0; i < PCI_NUM_REGIONS; i++) {
+ r = &d->io_regions[i];
+ if (r->size) {
+ if (r->type & PCI_ADDRESS_SPACE_IO)
+ paddr = &pci_bios_io_addr;
+ else
+ paddr = &pci_bios_mem_addr;
+ *paddr = (*paddr + r->size - 1) & ~(r->size - 1);
+ pci_set_io_region_addr(d, i, *paddr);
+ *paddr += r->size;
+ }
+ }
+ break;
+ }
+
+ /* map the interrupt */
+ pin = pci_config_readb(d, PCI_INTERRUPT_PIN);
+ if (pin != 0) {
+ pin = pci_slot_get_pirq(d, pin - 1);
+ pic_irq = pci_irqs[pin];
+ pci_config_writeb(d, PCI_INTERRUPT_LINE, pic_irq);
+ }
+}
+
+/*
+ * This function initializes the PCI devices as a normal PCI BIOS
+ * would do. It is provided just in case the BIOS has no support for
+ * PCI.
+ */
+void pci_setup(void)
+{
+ int i, irq;
+ uint8_t elcr[2];
+
+ pci_bios_io_addr = 0xc000;
+ pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START;
+
+ /* activate IRQ mappings */
+ elcr[0] = 0x00;
+ elcr[1] = 0x00;
+ for(i = 0; i < 4; i++) {
+ irq = pci_irqs[i];
+ /* set to trigger level */
+ elcr[irq >> 3] |= (1 << (irq & 7));
+ /* activate irq remapping in PIIX */
+ pci_config_writeb(piix3_dev, 0x60 + i, irq);
+ }
+ isa_outb(elcr[0], 0x4d0);
+ isa_outb(elcr[1], 0x4d1);
+
+ pci_for_each_device(pci_bios_init_device);
+}
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/ioemu/vl.c Wed Dec 13 22:52:02 2006 +0800
@@ -4441,6 +4441,11 @@ int qemu_loadvm(const char *filename)
qemu_fseek(f, cur_pos + record_len, SEEK_SET);
}
fclose(f);
+
+ /* del tmp file */
+ if (unlink(filename) == -1)
+ fprintf(stderr, "delete tmp qemu state file failed.\n");
+
ret = 0;
the_end:
if (saved_vm_running)
@@ -5027,6 +5032,7 @@ static QEMUResetEntry *first_reset_entry
static QEMUResetEntry *first_reset_entry;
int reset_requested;
int shutdown_requested;
+int suspend_requested;
static int powerdown_requested;
void qemu_register_reset(QEMUResetHandler *func, void *opaque)
@@ -5806,6 +5812,14 @@ int set_mm_mapping(int xc_handle, uint32
}
return 0;
+}
+
+void suspend(int sig)
+{
+ fprintf(logfile, "suspend sig handler called with requested=%d!\n",
suspend_requested);
+ if (sig != SIGUSR1)
+ fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
+ suspend_requested = 1;
}
#if defined(__i386__) || defined(__x86_64__)
@@ -6709,8 +6723,12 @@ int main(int argc, char **argv)
}
} else
#endif
- if (loadvm)
+ if (loadvm) {
+ /*XXX: ugly, since pci_bios_init are moved to hvmloader*/
+ extern void pci_setup(void);
+ pci_setup();
qemu_loadvm(loadvm);
+ }
{
/* XXX: simplify init */
@@ -6719,6 +6737,26 @@ int main(int argc, char **argv)
vm_start();
}
}
+
+ /* register signal for the suspend request when save */
+ {
+ struct sigaction act;
+ sigset_t set;
+ act.sa_handler = suspend;
+ act.sa_flags = SA_RESTART;
+ sigemptyset(&act.sa_mask);
+
+ sigaction(SIGUSR1, &act, NULL);
+
+ /* control panel mask some signals when spawn qemu, need unmask here*/
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGTERM);
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
+ fprintf(stderr, "unblock signal fail, possible issue for HVM
save!\n");
+
+ }
+
main_loop();
quit_timers();
return 0;
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/Makefile
--- a/tools/libxc/Makefile Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/Makefile Wed Dec 13 22:52:02 2006 +0800
@@ -27,7 +27,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_linux_bui
GUEST_SRCS-$(CONFIG_X86) += xc_linux_build.c
GUEST_SRCS-$(CONFIG_IA64) += xc_linux_build.c
GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c
-include $(XEN_TARGET_ARCH)/Makefile
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/xc_domain.c Wed Dec 13 22:52:02 2006 +0800
@@ -233,6 +233,50 @@ int xc_domain_getinfolist(int xc_handle,
unlock_pages(info, max_domains*sizeof(xc_domaininfo_t));
return ret;
+}
+
+/* get info from hvm guest for save */
+int xc_domain_hvm_getcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt)
+{
+ int rc;
+ DECLARE_DOMCTL;
+
+ domctl.cmd = XEN_DOMCTL_gethvmcontext;
+ domctl.domain = (domid_t)domid;
+ set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt);
+
+ if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+ return rc;
+
+ rc = do_domctl(xc_handle, &domctl);
+
+ safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+ return rc;
+}
+
+/* set info to hvm guest for restore */
+int xc_domain_hvm_setcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt)
+{
+ int rc;
+ DECLARE_DOMCTL;
+
+ domctl.cmd = XEN_DOMCTL_sethvmcontext;
+ domctl.domain = domid;
+ set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt);
+
+ if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+ return rc;
+
+ rc = do_domctl(xc_handle, &domctl);
+
+ safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+ return rc;
}
int xc_vcpu_getcontext(int xc_handle,
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/xc_hvm_build.c Wed Dec 13 22:52:02 2006 +0800
@@ -86,7 +86,7 @@ static void build_e820map(void *e820_pag
/* 0x0-0x9F000: Ordinary RAM. */
e820entry[nr_map].addr = 0x0;
- e820entry[nr_map].size = 0x9F000;
+ e820entry[nr_map].size = 0x90000;
e820entry[nr_map].type = E820_RAM;
nr_map++;
@@ -96,7 +96,7 @@ static void build_e820map(void *e820_pag
* TODO: SMBIOS tables should be moved higher (>=0xE0000).
* They are unusually low in our memory map: could cause problems?
*/
- e820entry[nr_map].addr = 0x9F000;
+ e820entry[nr_map].addr = 0x90000;
e820entry[nr_map].size = 0x1000;
e820entry[nr_map].type = E820_RESERVED;
nr_map++;
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/xc_linux_save.c Wed Dec 13 22:52:02 2006 +0800
@@ -261,15 +261,6 @@ static int ratewrite(int io_fd, void *bu
#endif
-static inline ssize_t write_exact(int fd, void *buf, size_t count)
-{
- if(write(fd, buf, count) != count)
- return 0;
- return 1;
-}
-
-
-
static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
xc_shadow_op_stats_t *stats, int print)
{
@@ -356,7 +347,7 @@ static int analysis_phase(int xc_handle,
}
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
int dom, xc_dominfo_t *info,
vcpu_guest_context_t *ctxt)
{
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/xenctrl.h Wed Dec 13 22:52:02 2006 +0800
@@ -313,6 +313,30 @@ int xc_domain_getinfolist(int xc_handle,
xc_domaininfo_t *info);
/**
+ * This function returns information about the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm hvm_ctxt a pointer to a structure to store the execution context of
the
+ * hvm domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt);
+
+/**
+ * This function will set the context for hvm domain
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to set the hvm domain context for
+ * @parm hvm_ctxt pointer to the the hvm context with the values to set
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_setcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt);
+
+/**
* This function returns information about the execution context of a
* particular vcpu of a domain.
*
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/xenguest.h Wed Dec 13 22:52:02 2006 +0800
@@ -11,6 +11,7 @@
#define XCFLAGS_LIVE 1
#define XCFLAGS_DEBUG 2
+#define XCFLAGS_HVM 4
/**
@@ -25,6 +26,13 @@ int xc_linux_save(int xc_handle, int io_
uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
int (*suspend)(int domid));
+/**
+ * This function will save a hvm domain running unmodified guest.
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ int (*suspend)(int domid));
/**
* This function will restore a saved domain running Linux.
@@ -41,6 +49,18 @@ int xc_linux_restore(int xc_handle, int
unsigned long nr_pfns, unsigned int store_evtchn,
unsigned long *store_mfn, unsigned int console_evtchn,
unsigned long *console_mfn);
+
+/**
+ * This function will restore a saved hvm domain running unmodified guest.
+ *
+ * @parm store_mfn pass mem size & returned with the mfn of the store page
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long nr_pfns, unsigned int store_evtchn,
+ unsigned long *store_mfn, unsigned int console_evtchn,
+ unsigned long *console_mfn,
+ unsigned int pae, unsigned int apic);
/**
* This function will create a domain for a paravirtualized Linux
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/libxc/xg_save_restore.h Wed Dec 13 22:52:02 2006 +0800
@@ -65,6 +65,16 @@ static int get_platform_info(int xc_hand
return 1;
}
+static inline ssize_t write_exact(int fd, void *buf, size_t count)
+{
+ if(write(fd, buf, count) != count)
+ return 0;
+ return 1;
+}
+
+extern int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+ int dom, xc_dominfo_t *info,
+ vcpu_guest_context_t *ctxt);
/*
** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables.
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Dec 13 22:52:02 2006 +0800
@@ -158,6 +158,20 @@ static PyObject *pyxc_domain_destroy(XcO
static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args)
{
return dom_op(self, args, xc_domain_destroy);
+}
+
+static PyObject *pyxc_domain_shutdown(XcObject *self, PyObject *args)
+{
+ uint32_t dom, reason;
+
+ if (!PyArg_ParseTuple(args, "ii", &dom, &reason))
+ return NULL;
+
+ if (xc_domain_shutdown(self->xc_handle, dom, reason) != 0)
+ return pyxc_error_to_exception();
+
+ Py_INCREF(zero);
+ return zero;
}
@@ -969,6 +983,14 @@ static PyMethodDef pyxc_methods[] = {
METH_VARARGS, "\n"
"Destroy a domain.\n"
" dom [int]: Identifier of domain to be destroyed.\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "domain_shutdown",
+ (PyCFunction)pyxc_domain_shutdown,
+ METH_VARARGS, "\n"
+ "Shutdown a domain.\n"
+ " dom [int, 0]: Domain identifier to use.\n"
+ " reason [int, 0]: Reason for shutdown.\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "vcpu_setaffinity",
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/python/xen/xend/XendCheckpoint.py Wed Dec 13 22:52:02 2006 +0800
@@ -22,11 +22,14 @@ from xen.xend.XendConstants import *
from xen.xend.XendConstants import *
SIGNATURE = "LinuxGuestRecord"
+QEMU_SIGNATURE = "QemuDeviceModelRecord"
+dm_batch = 512
XC_SAVE = "xc_save"
XC_RESTORE = "xc_restore"
sizeof_int = calcsize("i")
+sizeof_unsigned_int = calcsize("I")
sizeof_unsigned_long = calcsize("L")
@@ -69,6 +72,11 @@ def save(fd, dominfo, network, live, dst
"could not write guest state file: config len")
write_exact(fd, config, "could not write guest state file: config")
+ image_cfg = dominfo.info.get('image', {})
+ hvm = image_cfg.has_key('hvm')
+
+ if hvm:
+ log.info("save hvm domain")
# xc_save takes three customization parameters: maxit, max_f, and
# flags the last controls whether or not save is 'live', while the
# first two further customize behaviour when 'live' save is
@@ -76,7 +84,7 @@ def save(fd, dominfo, network, live, dst
# libxenguest; see the comments and/or code in xc_linux_save() for
# more information.
cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
- str(dominfo.getDomid()), "0", "0", str(int(live)) ]
+ str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) <<
2)) ]
log.debug("[xc_save]: %s", string.join(cmd))
def saveInputHandler(line, tochild):
@@ -90,11 +98,28 @@ def save(fd, dominfo, network, live, dst
log.info("Domain %d suspended.", dominfo.getDomid())
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
domain_name)
+ #send signal to device model for save
+ if hvm:
+ log.info("release_devices for hvm domain")
+ dominfo._releaseDevices(True)
tochild.write("done\n")
tochild.flush()
log.debug('Written done')
forkHelper(cmd, fd, saveInputHandler, False)
+
+ # put qemu device model state
+ if hvm:
+ write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
+ qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
os.O_RDONLY)
+ while True:
+ buf = os.read(qemu_fd, dm_batch)
+ if len(buf):
+ write_exact(fd, buf, "could not write device model state")
+ else:
+ break
+ os.close(qemu_fd)
+ os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid())
dominfo.destroyDomain()
try:
@@ -147,19 +172,38 @@ def restore(xd, fd, dominfo = None, paus
assert store_port
assert console_port
+ #if hvm, pass mem size to calculate the store_mfn
+ hvm = 0
+ apic = 0
+ pae = 0
+ image_cfg = dominfo.info.get('image', {})
+ hvm = image_cfg.has_key('hvm')
+ if hvm:
+ #the 'memory' in config has been removed
+ hvm = dominfo.info['memory_static_min']
+ apic = dominfo.info['image']['hvm'].get('apic', 0)
+ pae = dominfo.info['image']['hvm'].get('pae', 0)
+ log.info("restore hvm domain %d, mem=%d, apic=%d, pae=%d",
dominfo.domid, hvm, apic, pae)
+
try:
- l = read_exact(fd, sizeof_unsigned_long,
- "not a valid guest state file: pfn count read")
- nr_pfns = unpack("L", l)[0] # native sizeof long
+ if hvm:
+ l = read_exact(fd, sizeof_unsigned_int,
+ "not a valid hvm guest state file: pfn count read")
+ nr_pfns = unpack("I", l)[0] # native sizeof int
+ else:
+ l = read_exact(fd, sizeof_unsigned_long,
+ "not a valid guest state file: pfn count read")
+ nr_pfns = unpack("L", l)[0] # native sizeof long
if nr_pfns > 16*1024*1024: # XXX
raise XendError(
"not a valid guest state file: pfn count out of range")
balloon.free(xc.pages_to_kib(nr_pfns))
+ log.info("HVM restore:balloon free 0x%x pages.", nr_pfns)
cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
fd, dominfo.getDomid(), nr_pfns,
- store_port, console_port])
+ store_port, console_port, hvm, pae, apic])
log.debug("[xc_restore]: %s", string.join(cmd))
handler = RestoreInputHandler()
@@ -169,10 +213,29 @@ def restore(xd, fd, dominfo = None, paus
if handler.store_mfn is None or handler.console_mfn is None:
raise XendError('Could not read store/console MFN')
- os.read(fd, 1) # Wait for source to close connection
dominfo.waitForDevices() # Wait for backends to set up
if not paused:
dominfo.unpause()
+
+ # get qemu state and create a tmp file for dm restore
+ if hvm:
+ qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
+ "not a valid device model state: signature read")
+ if qemu_signature != QEMU_SIGNATURE:
+ raise XendError("not a valid device model state: found '%s'" %
+ qemu_signature)
+ qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
+ os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+ while True:
+ buf = os.read(fd, dm_batch)
+ if len(buf):
+ write_exact(qemu_fd, buf, "could not write dm state to tmp
file")
+ else:
+ break
+ os.close(qemu_fd)
+
+ os.read(fd, 1) # Wait for source to close connection
+
dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Dec 13 22:52:02 2006 +0800
@@ -488,6 +488,16 @@ class XendDomainInfo:
self._removeVm('xend/previous_restart_time')
self.storeDom("control/shutdown", reason)
+ ## shutdown hypercall for hvm domain desides xenstore write
+ image_cfg = self.info.get('image', {})
+ hvm = image_cfg.has_key('hvm')
+ if hvm:
+ for code in DOMAIN_SHUTDOWN_REASONS.keys():
+ if DOMAIN_SHUTDOWN_REASONS[code] == reason:
+ break
+ xc.domain_shutdown(self.domid, code)
+
+
def pause(self):
"""Pause domain
@@ -1203,8 +1213,11 @@ class XendDomainInfo:
if self.image:
self.image.createDeviceModel()
- def _releaseDevices(self):
+ def _releaseDevices(self, suspend = False):
"""Release all domain's devices. Nothrow guarantee."""
+ if suspend and self.image:
+ self.image.destroy(suspend)
+ return
while True:
t = xstransact("%s/device" % self.dompath)
@@ -1473,6 +1486,16 @@ class XendDomainInfo:
self.console_mfn = console_mfn
self._introduceDomain()
+ image_cfg = self.info.get('image', {})
+ hvm = image_cfg.has_key('hvm')
+ if hvm:
+ self.image = image.create(self,
+ self.info,
+ self.info['image'],
+ self.info['devices'])
+ if self.image:
+ self.image.createDeviceModel(True)
+ self.image.register_shutdown_watch()
self._storeDomDetails()
self._registerWatches()
self.refreshShutdown()
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/python/xen/xend/image.py Wed Dec 13 22:52:02 2006 +0800
@@ -157,7 +157,7 @@ class ImageHandler:
"""Build the domain. Define in subclass."""
raise NotImplementedError()
- def createDeviceModel(self):
+ def createDeviceModel(self, restore = False):
"""Create device model for the domain (define in subclass if
needed)."""
pass
@@ -405,7 +405,7 @@ class HVMImageHandler(ImageHandler):
return ret
- def createDeviceModel(self):
+ def createDeviceModel(self, restore = False):
if self.pid:
return
# Execute device model.
@@ -414,6 +414,8 @@ class HVMImageHandler(ImageHandler):
args = args + ([ "-d", "%d" % self.vm.getDomid(),
"-m", "%s" % (self.getRequiredInitialReservation() / 1024)])
args = args + self.dmargs
+ if restore:
+ args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" %
self.vm.getDomid() ])
env = dict(os.environ)
if self.display:
env['DISPLAY'] = self.display
@@ -432,12 +434,16 @@ class HVMImageHandler(ImageHandler):
self.register_reboot_feature_watch()
self.pid = self.vm.gatherDom(('image/device-model-pid', int))
- def destroy(self):
+ def destroy(self, suspend = False):
self.unregister_shutdown_watch()
self.unregister_reboot_feature_watch();
if self.pid:
try:
- os.kill(self.pid, signal.SIGKILL)
+ sig = signal.SIGKILL
+ if suspend:
+ log.info("use sigusr1 to signal qemu %d", self.pid)
+ sig = signal.SIGUSR1
+ os.kill(self.pid, sig)
except OSError, exn:
log.exception(exn)
try:
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/xcutils/xc_restore.c Wed Dec 13 22:52:02 2006 +0800
@@ -19,12 +19,13 @@ main(int argc, char **argv)
main(int argc, char **argv)
{
unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn;
+ unsigned int hvm, pae, apic;
int ret;
unsigned long store_mfn, console_mfn;
- if (argc != 6)
+ if (argc != 9)
errx(1,
- "usage: %s iofd domid nr_pfns store_evtchn console_evtchn",
+ "usage: %s iofd domid nr_pfns store_evtchn console_evtchn hvm pae
apic",
argv[0]);
xc_fd = xc_interface_open();
@@ -36,9 +37,19 @@ main(int argc, char **argv)
nr_pfns = atoi(argv[3]);
store_evtchn = atoi(argv[4]);
console_evtchn = atoi(argv[5]);
+ hvm = atoi(argv[6]);
+ pae = atoi(argv[7]);
+ apic = atoi(argv[8]);
- ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
- &store_mfn, console_evtchn, &console_mfn);
+ if (hvm) {
+ /* pass the memsize to xc_hvm_restore to find the store_mfn */
+ store_mfn = hvm;
+ ret = xc_hvm_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+ &store_mfn, console_evtchn, &console_mfn, pae, apic);
+ } else
+ ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+ &store_mfn, console_evtchn, &console_mfn);
+
if (ret == 0) {
printf("store-mfn %li\n", store_mfn);
printf("console-mfn %li\n", console_mfn);
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Fri Sep 15 17:05:38 2006 +0800
+++ b/tools/xcutils/xc_save.c Wed Dec 13 22:52:02 2006 +0800
@@ -51,7 +51,10 @@ main(int argc, char **argv)
max_f = atoi(argv[4]);
flags = atoi(argv[5]);
- ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+ if (flags & XCFLAGS_HVM)
+ ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+ else
+ ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags,
&suspend);
xc_interface_close(xc_fd);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/domain.c Wed Dec 13 22:52:02 2006 +0800
@@ -330,6 +330,7 @@ int arch_set_info_guest(
else
{
hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs);
+ hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt);
}
if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/domctl.c Wed Dec 13 22:52:02 2006 +0800
@@ -297,6 +297,7 @@ void arch_getdomaininfo_ctxt(
if ( is_hvm_vcpu(v) )
{
hvm_store_cpu_guest_regs(v, &c->user_regs, c->ctrlreg);
+ hvm_save_cpu_context(v, &c->hvmcpu_ctxt);
}
else
{
@@ -314,6 +315,22 @@ void arch_getdomaininfo_ctxt(
c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
c->vm_assist = v->domain->vm_assist;
+}
+
+int arch_gethvm_ctxt(
+ struct vcpu *v, struct hvm_domain_context *c)
+{
+ if ( !is_hvm_vcpu(v) )
+ return -1;
+
+ return hvm_save(v, c);
+
+}
+
+int arch_sethvm_ctxt(
+ struct vcpu *v, struct hvm_domain_context *c)
+{
+ return hvm_load(v, c);
}
/*
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/hvm.c Wed Dec 13 22:52:02 2006 +0800
@@ -182,9 +182,18 @@ int hvm_domain_initialise(struct domain
void hvm_domain_destroy(struct domain *d)
{
+ HVMStateEntry *se, *dse;
kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
rtc_deinit(d);
pmtimer_deinit(d);
+
+ se = d->arch.hvm_domain.first_se;
+ while (se) {
+ dse = se;
+ se = se->next;
+ xfree(dse);
+ }
+
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -225,6 +234,9 @@ int hvm_vcpu_initialise(struct vcpu *v)
pit_init(v, cpu_khz);
rtc_init(v, RTC_PORT(0), RTC_IRQ);
pmtimer_init(v, ACPI_PM_TMR_BLK_ADDRESS);
+
+ /* init hvm sharepage */
+ shpage_init(v->domain, get_sp(v->domain));
/* Init guest TSC to start from zero. */
hvm_set_guest_time(v, 0);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/i8254.c Wed Dec 13 22:52:02 2006 +0800
@@ -203,11 +203,11 @@ static inline void pit_load_count(PITCha
switch (s->mode) {
case 2:
/* create periodic time */
- s->pt = create_periodic_time (period, 0, 0, pit_time_fired, s);
+ s->pt = create_periodic_time (current->domain, period, 0, 0,
pit_time_fired, s);
break;
case 1:
/* create one shot time */
- s->pt = create_periodic_time (period, 0, 1, pit_time_fired, s);
+ s->pt = create_periodic_time (current->domain, period, 0, 1,
pit_time_fired, s);
#ifdef DEBUG_PIT
printk("HVM_PIT: create one shot time.\n");
#endif
@@ -345,6 +345,152 @@ static uint32_t pit_ioport_read(void *op
return ret;
}
+#ifdef HVM_DEBUG_SUSPEND
+static void pit_info(PITState *pit)
+{
+ PITChannelState *s;
+ int i;
+
+ for(i = 0; i < 3; i++) {
+ printk("*****pit channel %d's state:*****\n", i);
+ s = &pit->channels[i];
+ printk("pit 0x%x.\n", s->count);
+ printk("pit 0x%x.\n", s->latched_count);
+ printk("pit 0x%x.\n", s->count_latched);
+ printk("pit 0x%x.\n", s->status_latched);
+ printk("pit 0x%x.\n", s->status);
+ printk("pit 0x%x.\n", s->read_state);
+ printk("pit 0x%x.\n", s->write_state);
+ printk("pit 0x%x.\n", s->write_latch);
+ printk("pit 0x%x.\n", s->rw_mode);
+ printk("pit 0x%x.\n", s->mode);
+ printk("pit 0x%x.\n", s->bcd);
+ printk("pit 0x%x.\n", s->gate);
+ printk("pit %"PRId64"\n", s->count_load_time);
+
+ if (s->pt) {
+ struct periodic_time *pt = s->pt;
+ printk("pit channel %d has a periodic timer:\n", i);
+ printk("pt %d.\n", pt->enabled);
+ printk("pt %d.\n", pt->one_shot);
+ printk("pt %d.\n", pt->irq);
+ printk("pt %d.\n", pt->first_injected);
+
+ printk("pt %d.\n", pt->pending_intr_nr);
+ printk("pt %d.\n", pt->period);
+ printk("pt %"PRId64"\n", pt->period_cycles);
+ printk("pt %"PRId64"\n", pt->last_plt_gtime);
+ }
+ }
+
+}
+#else
+static void pit_info(PITState *pit)
+{
+}
+#endif
+
+static void pit_save(hvm_domain_context_t *h, void *opaque)
+{
+ struct domain *d = opaque;
+ PITState *pit = &d->arch.hvm_domain.pl_time.vpit;
+ PITChannelState *s;
+ struct periodic_time *pt;
+ int i, pti = -1;
+
+ pit_info(pit);
+
+ for(i = 0; i < 3; i++) {
+ s = &pit->channels[i];
+ hvm_put_32u(h, s->count);
+ hvm_put_16u(h, s->latched_count);
+ hvm_put_8u(h, s->count_latched);
+ hvm_put_8u(h, s->status_latched);
+ hvm_put_8u(h, s->status);
+ hvm_put_8u(h, s->read_state);
+ hvm_put_8u(h, s->write_state);
+ hvm_put_8u(h, s->write_latch);
+ hvm_put_8u(h, s->rw_mode);
+ hvm_put_8u(h, s->mode);
+ hvm_put_8u(h, s->bcd);
+ hvm_put_8u(h, s->gate);
+ hvm_put_64u(h, s->count_load_time);
+
+ if (s->pt && pti == -1)
+ pti = i;
+ }
+
+ /* save guest time */
+ pt = pit->channels[pti].pt;
+ hvm_put_8u(h, pti);
+ hvm_put_8u(h, pt->first_injected);
+ hvm_put_32u(h, pt->pending_intr_nr);
+ hvm_put_64u(h, pt->last_plt_gtime);
+
+}
+
+static int pit_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ struct domain *d = opaque;
+ PITState *pit = &d->arch.hvm_domain.pl_time.vpit;
+ PITChannelState *s;
+ int i, pti;
+ u32 period;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ for(i = 0; i < 3; i++) {
+ s = &pit->channels[i];
+ s->count = hvm_get_32u(h);
+ s->latched_count = hvm_get_16u(h);
+ s->count_latched = hvm_get_8u(h);
+ s->status_latched = hvm_get_8u(h);
+ s->status = hvm_get_8u(h);
+ s->read_state = hvm_get_8u(h);
+ s->write_state = hvm_get_8u(h);
+ s->write_latch = hvm_get_8u(h);
+ s->rw_mode = hvm_get_8u(h);
+ s->mode = hvm_get_8u(h);
+ s->bcd = hvm_get_8u(h);
+ s->gate = hvm_get_8u(h);
+ s->count_load_time = hvm_get_64u(h);
+ }
+
+ pti = hvm_get_8u(h);
+ if ( pti < 0 || pti > 2) {
+ printk("pit load get a wrong channel %d when HVM resume.\n", pti);
+ return -EINVAL;
+ }
+
+ s = &pit->channels[pti];
+ period = DIV_ROUND((s->count * 1000000000ULL), PIT_FREQ);
+
+ printk("recreate periodic timer %d in mode %d, freq=%d.\n", pti, s->mode,
period);
+ switch (s->mode) {
+ case 2:
+ /* create periodic time */
+ s->pt = create_periodic_time (d, period, 0, 0, pit_time_fired, s);
+ s->pt->first_injected = hvm_get_8u(h);
+ s->pt->pending_intr_nr = hvm_get_32u(h);
+ s->pt->last_plt_gtime = hvm_get_64u(h);
+ break;
+ case 1:
+ /* create one shot time */
+ s->pt = create_periodic_time (d, period, 0, 1, pit_time_fired, s);
+ break;
+ default:
+ printk("pit mode %"PRId8" should not use periodic timer!\n",
s->mode);
+ return -EINVAL;
+ }
+
+ /*XXX: need set_guest_time here or do this when post_inject? */
+
+ pit_info(pit);
+
+ return 0;
+}
+
static void pit_reset(void *opaque)
{
PITState *pit = opaque;
@@ -373,6 +519,8 @@ void pit_init(struct vcpu *v, unsigned l
s->vcpu = v;
s++; s->vcpu = v;
s++; s->vcpu = v;
+
+ hvm_register_savevm(v->domain, "xen_hvm_i8254", PIT_BASE, 1, pit_save,
pit_load, v->domain);
register_portio_handler(v->domain, PIT_BASE, 4, handle_pit_io);
/* register the speaker port */
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/intercept.c Wed Dec 13 22:52:02 2006 +0800
@@ -29,6 +29,8 @@
#include <asm/current.h>
#include <io_ports.h>
#include <xen/event.h>
+#include <xen/compile.h>
+#include <public/version.h>
extern struct hvm_mmio_handler vlapic_mmio_handler;
@@ -314,13 +316,14 @@ void pickup_deactive_ticks(struct period
* period: fire frequency in ns.
*/
struct periodic_time * create_periodic_time(
+ struct domain *d,
u32 period,
char irq,
char one_shot,
time_cb *cb,
void *data)
{
- struct periodic_time *pt =
&(current->domain->arch.hvm_domain.pl_time.periodic_tm);
+ struct periodic_time *pt = &(d->arch.hvm_domain.pl_time.periodic_tm);
if ( pt->enabled ) {
stop_timer (&pt->timer);
pt->enabled = 0;
@@ -353,6 +356,278 @@ void destroy_periodic_time(struct period
stop_timer(&pt->timer);
pt->enabled = 0;
}
+}
+
+/* save/restore support */
+#define HVM_FILE_MAGIC 0x54381286
+#define HVM_FILE_VERSION 0x00000001
+
+int hvm_register_savevm(struct domain *d,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque)
+{
+ HVMStateEntry *se, **pse;
+
+ if ( (se = xmalloc(struct HVMStateEntry)) == NULL ){
+ printk("allocat hvmstate entry fail.\n");
+ return -1;
+ }
+
+ strncpy(se->idstr, idstr, HVM_SE_IDSTR_LEN);
+
+ se->instance_id = instance_id;
+ se->version_id = version_id;
+ se->save_state = save_state;
+ se->load_state = load_state;
+ se->opaque = opaque;
+ se->next = NULL;
+
+ /* add at the end of list */
+ pse = &d->arch.hvm_domain.first_se;
+ while (*pse != NULL)
+ pse = &(*pse)->next;
+ *pse = se;
+ return 0;
+}
+
+int hvm_save(struct vcpu *v, hvm_domain_context_t *h)
+{
+ uint32_t len, len_pos, cur_pos;
+ uint32_t eax, ebx, ecx, edx;
+ HVMStateEntry *se;
+ char *chgset;
+
+ if (!is_hvm_vcpu(v)) {
+ printk("hvm_save only for hvm guest!\n");
+ return -1;
+ }
+
+ memset(h, 0, sizeof(hvm_domain_context_t));
+ hvm_put_32u(h, HVM_FILE_MAGIC);
+ hvm_put_32u(h, HVM_FILE_VERSION);
+
+ /* save xen changeset */
+ chgset = strrchr(XEN_CHANGESET, ' ') + 1;
+
+ len = strlen(chgset);
+ hvm_put_8u(h, len);
+ hvm_put_buffer(h, chgset, len);
+
+ /* save cpuid */
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ hvm_put_32u(h, eax);
+
+ for(se = v->domain->arch.hvm_domain.first_se; se != NULL; se = se->next) {
+ /* ID string */
+ len = strnlen(se->idstr, HVM_SE_IDSTR_LEN);
+ hvm_put_8u(h, len);
+ hvm_put_buffer(h, se->idstr, len);
+
+ hvm_put_32u(h, se->instance_id);
+ hvm_put_32u(h, se->version_id);
+
+ /* record size */
+ len_pos = hvm_ctxt_tell(h);
+ hvm_put_32u(h, 0);
+
+ se->save_state(h, se->opaque);
+
+ cur_pos = hvm_ctxt_tell(h);
+ len = cur_pos - len_pos - 4;
+ hvm_ctxt_seek(h, len_pos);
+ hvm_put_32u(h, len);
+ hvm_ctxt_seek(h, cur_pos);
+
+ }
+
+ h->size = hvm_ctxt_tell(h);
+ hvm_ctxt_seek(h, 0);
+
+ if (h->size >= HVM_CTXT_SIZE) {
+ printk("hvm_domain_context overflow when hvm_save! need %"PRId32" bytes for
use.\n", h->size);
+ return -1;
+ }
+
+ return 0;
+
+}
+
+static HVMStateEntry *find_se(struct domain *d, const char *idstr, int
instance_id)
+{
+ HVMStateEntry *se;
+
+ for(se = d->arch.hvm_domain.first_se; se != NULL; se = se->next) {
+ if (!strncmp(se->idstr, idstr, HVM_SE_IDSTR_LEN) &&
+ instance_id == se->instance_id){
+ return se;
+ }
+ }
+ return NULL;
+}
+
+int hvm_load(struct vcpu *v, hvm_domain_context_t *h)
+{
+ uint32_t len, rec_len, rec_pos, magic, instance_id, version_id;
+ uint32_t eax, ebx, ecx, edx;
+ HVMStateEntry *se;
+ char idstr[HVM_SE_IDSTR_LEN];
+ xen_changeset_info_t chgset;
+ char *cur_chgset;
+ int ret;
+
+ if (!is_hvm_vcpu(v)) {
+ printk("hvm_load only for hvm guest!\n");
+ return -1;
+ }
+
+ if (h->size >= HVM_CTXT_SIZE) {
+ printk("hvm_load fail! seems hvm_domain_context overflow when hvm_save! need
%"PRId32" bytes.\n", h->size);
+ return -1;
+ }
+
+ hvm_ctxt_seek(h, 0);
+
+ magic = hvm_get_32u(h);
+ if (magic != HVM_FILE_MAGIC) {
+ printk("HVM restore magic dismatch!\n");
+ return -1;
+ }
+
+ magic = hvm_get_32u(h);
+ if (magic != HVM_FILE_VERSION) {
+ printk("HVM restore version dismatch!\n");
+ return -1;
+ }
+
+ /* check xen change set */
+ cur_chgset = strrchr(XEN_CHANGESET, ' ') + 1;
+
+ len = hvm_get_8u(h);
+ if (len > 20) { /*typical length is 18 -- "revision number:changeset id" */
+ printk("wrong change set length %d when hvm restore!\n", len);
+ return -1;
+ }
+
+ hvm_get_buffer(h, chgset, len);
+ chgset[len] = '\0';
+ if (strncmp(cur_chgset, chgset, len + 1))
+ printk("warnings: try to restore hvm guest(%s) on a different changeset
%s.\n",
+ chgset, cur_chgset);
+
+ /* check cpuid */
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ ebx = hvm_get_32u(h);
+ /*TODO: need difine how big difference is acceptable */
+ if (ebx != eax)
+ printk("warnings: try to restore hvm guest(0x%"PRIx32") "
+ "on a different type processor(0x%"PRIx32").\n",
+ ebx,
+ eax);
+
+ while(1) {
+ if (hvm_ctxt_end(h)) {
+ break;
+ }
+
+ /* ID string */
+ len = hvm_get_8u(h);
+ if (len > HVM_SE_IDSTR_LEN) {
+ printk("wrong HVM save entry idstr len %d!", len);
+ return -1;
+ }
+
+ hvm_get_buffer(h, idstr, len);
+ idstr[len] = '\0';
+
+ instance_id = hvm_get_32u(h);
+ version_id = hvm_get_32u(h);
+
+ rec_len = hvm_get_32u(h);
+ rec_pos = hvm_ctxt_tell(h);
+
+ se = find_se(v->domain, idstr, instance_id);
+ if (se == NULL) {
+ printk("warnings: hvm load can't find device %s's instance %d!\n",
+ idstr, instance_id);
+ } else {
+ ret = se->load_state(h, se->opaque, version_id);
+ if (ret < 0)
+ printk("warnings: loading state fail for device %s instance
%d!\n",
+ idstr, instance_id);
+ }
+
+
+ /* make sure to jump end of record */
+ if ( hvm_ctxt_tell(h) - rec_pos != rec_len) {
+ printk("wrong hvm record size, maybe some dismatch between save&restore
handler!\n");
+ }
+ hvm_ctxt_seek(h, rec_pos + rec_len);
+ }
+
+ return 0;
+}
+
+#ifdef HVM_DEBUG_SUSPEND
+static void shpage_info(shared_iopage_t *sh)
+{
+
+ vcpu_iodata_t *p = &sh->vcpu_iodata[0];
+ ioreq_t *req = &p->vp_ioreq;
+ printk("*****sharepage_info******!\n");
+ printk("vp_eport=%d\n", p->vp_eport);
+ printk("io packet: "
+ "state:%x, pvalid: %x, dir:%x, port: %"PRIx64", "
+ "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
+ req->state, req->data_is_ptr, req->dir, req->addr,
+ req->data, req->count, req->size);
+}
+#else
+static void shpage_info(shared_iopage_t *sh)
+{
+}
+#endif
+
+static void shpage_save(hvm_domain_context_t *h, void *opaque)
+{
+ /* XXX:no action required for shpage save/restore, since it's in guest
memory
+ * keep it for debug purpose only */
+
+#if 0
+ struct shared_iopage *s = opaque;
+ /* XXX:smp */
+ struct ioreq *req = &s->vcpu_iodata[0].vp_ioreq;
+
+ shpage_info(s);
+
+ hvm_put_buffer(h, (char*)req, sizeof(struct ioreq));
+#endif
+}
+
+static int shpage_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ struct shared_iopage *s = opaque;
+#if 0
+ /* XXX:smp */
+ struct ioreq *req = &s->vcpu_iodata[0].vp_ioreq;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ hvm_get_buffer(h, (char*)req, sizeof(struct ioreq));
+
+
+#endif
+ shpage_info(s);
+ return 0;
+}
+
+void shpage_init(struct domain *d, shared_iopage_t *sp)
+{
+ hvm_register_savevm(d, "xen_hvm_shpage", 0x10, 1, shpage_save,
shpage_load, sp);
}
/*
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/vioapic.c Wed Dec 13 22:52:02 2006 +0800
@@ -466,10 +466,138 @@ void vioapic_update_EOI(struct domain *d
spin_unlock(&hvm_irq->lock);
}
+#ifdef HVM_DEBUG_SUSPEND
+static void ioapic_info(struct vioapic *s)
+{
+ int i;
+ printk("*****ioapic state:*****\n");
+ printk("ioapic 0x%x.\n", s->ioregsel);
+ printk("ioapic 0x%x.\n", s->id);
+ printk("ioapic 0x%lx.\n", s->base_address);
+ for (i = 0; i < VIOAPIC_NUM_PINS; i++) {
+ printk("ioapic redirtbl[%d]:0x%"PRIx64"\n", i, s->redirtbl[i].bits);
+ }
+
+}
+static void hvmirq_info(struct hvm_irq *hvm_irq)
+{
+ int i;
+ printk("*****hvmirq state:*****\n");
+ for (i = 0; i < BITS_TO_LONGS(32*4); i++)
+ printk("hvmirq pci_intx[%d]:0x%lx.\n", i, hvm_irq->pci_intx[i]);
+
+ for (i = 0; i < BITS_TO_LONGS(16); i++)
+ printk("hvmirq isa_irq[%d]:0x%lx.\n", i, hvm_irq->isa_irq[i]);
+
+ for (i = 0; i < BITS_TO_LONGS(1); i++)
+ printk("hvmirq callback_irq_wire[%d]:0x%lx.\n", i,
hvm_irq->callback_irq_wire[i]);
+
+ printk("hvmirq callback_gsi:0x%x.\n", hvm_irq->callback_gsi);
+
+ for (i = 0; i < 4; i++)
+ printk("hvmirq pci_link_route[%d]:0x%"PRIx8".\n", i,
hvm_irq->pci_link_route[i]);
+
+ for (i = 0; i < 4; i++)
+ printk("hvmirq pci_link_assert_count[%d]:0x%"PRIx8".\n", i,
hvm_irq->pci_link_assert_count[i]);
+
+ for (i = 0; i < 4; i++)
+ printk("hvmirq gsi_assert_count[%d]:0x%"PRIx8".\n", i,
hvm_irq->gsi_assert_count[i]);
+
+ printk("hvmirq round_robin_prev_vcpu:0x%"PRIx8".\n",
hvm_irq->round_robin_prev_vcpu);
+}
+#else
+static void ioapic_info(struct vioapic *s)
+{
+}
+static void hvmirq_info(struct hvm_irq *hvm_irq)
+{
+}
+#endif
+
+static void ioapic_save(hvm_domain_context_t *h, void *opaque)
+{
+ int i;
+ struct domain *d = opaque;
+ struct vioapic *s = domain_vioapic(d);
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+
+ ioapic_info(s);
+ hvmirq_info(hvm_irq);
+
+ /* save iopaic state*/
+ hvm_put_32u(h, s->ioregsel);
+ hvm_put_32u(h, s->id);
+ hvm_put_64u(h, s->base_address);
+ for (i = 0; i < VIOAPIC_NUM_PINS; i++) {
+ hvm_put_64u(h, s->redirtbl[i].bits);
+ }
+
+ /* save hvm irq state */
+ hvm_put_buffer(h, (char*)hvm_irq->pci_intx, 16);
+ hvm_put_buffer(h, (char*)hvm_irq->isa_irq, 2);
+ hvm_put_buffer(h, (char*)hvm_irq->callback_irq_wire, 1);
+ hvm_put_32u(h, hvm_irq->callback_gsi);
+
+ for (i = 0; i < 4; i++)
+ hvm_put_8u(h, hvm_irq->pci_link_route[i]);
+
+ for (i = 0; i < 4; i++)
+ hvm_put_8u(h, hvm_irq->pci_link_assert_count[i]);
+
+ for (i = 0; i < VIOAPIC_NUM_PINS; i++)
+ hvm_put_8u(h, hvm_irq->gsi_assert_count[i]);
+
+ hvm_put_8u(h, hvm_irq->round_robin_prev_vcpu);
+
+}
+
+static int ioapic_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ int i;
+ struct domain *d = opaque;
+ struct vioapic *s = domain_vioapic(d);
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ /* restore ioapic state */
+ s->ioregsel = hvm_get_32u(h);
+ s->id = hvm_get_32u(h);
+ s->base_address = hvm_get_64u(h);
+ for (i = 0; i < VIOAPIC_NUM_PINS; i++) {
+ s->redirtbl[i].bits = hvm_get_64u(h);
+ }
+
+ /* restore irq state */
+ hvm_get_buffer(h, (char*)hvm_irq->pci_intx, 16);
+ hvm_get_buffer(h, (char*)hvm_irq->isa_irq, 2);
+ hvm_get_buffer(h, (char*)hvm_irq->callback_irq_wire, 1);
+ hvm_irq->callback_gsi = hvm_get_32u(h);
+
+ for (i = 0; i < 4; i++)
+ hvm_irq->pci_link_route[i] = hvm_get_8u(h);
+
+ for (i = 0; i < 4; i++)
+ hvm_irq->pci_link_assert_count[i] = hvm_get_8u(h);
+
+ for (i = 0; i < VIOAPIC_NUM_PINS; i++)
+ hvm_irq->gsi_assert_count[i] = hvm_get_8u(h);
+
+ hvm_irq->round_robin_prev_vcpu = hvm_get_8u(h);
+
+ ioapic_info(s);
+ hvmirq_info(hvm_irq);
+
+ return 0;
+}
+
void vioapic_init(struct domain *d)
{
struct vioapic *vioapic = domain_vioapic(d);
int i;
+
+ hvm_register_savevm(d, "xen_hvm_ioapic", 0, 1, ioapic_save, ioapic_load,
d);
memset(vioapic, 0, sizeof(*vioapic));
for ( i = 0; i < VIOAPIC_NUM_PINS; i++ )
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/vlapic.c Wed Dec 13 22:52:02 2006 +0800
@@ -921,6 +921,82 @@ static int vlapic_reset(struct vlapic *v
return 1;
}
+#ifdef HVM_DEBUG_SUSPEND
+static void lapic_info(struct vlapic *s)
+{
+ printk("*****lapic state:*****\n");
+ printk("lapic 0x%"PRIx64".\n", s->apic_base_msr);
+ printk("lapic 0x%x.\n", s->disabled);
+ printk("lapic 0x%x.\n", s->timer_divisor);
+ printk("lapic 0x%x.\n", s->timer_pending_count);
+}
+#else
+static void lapic_info(struct vlapic *s)
+{
+}
+#endif
+
+static void lapic_save(hvm_domain_context_t *h, void *opaque)
+{
+ struct vlapic *s = opaque;
+
+ lapic_info(s);
+
+ hvm_put_64u(h, s->apic_base_msr);
+ hvm_put_32u(h, s->disabled);
+ hvm_put_32u(h, s->timer_divisor);
+
+ /*XXX: need this?*/
+ hvm_put_32u(h, s->timer_pending_count);
+
+ hvm_put_buffer(h, (char*)s->regs, 0x3f0);
+
+}
+
+static int lapic_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ struct vlapic *s = opaque;
+ uint32_t tmict;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ s->apic_base_msr = hvm_get_64u(h);
+ s->disabled = hvm_get_32u(h);
+ s->timer_divisor = hvm_get_32u(h);
+
+ /*XXX: need this?*/
+ s->timer_pending_count = hvm_get_32u(h);
+
+ hvm_get_buffer(h, (char*)s->regs, 0x3f0);
+
+ /* rearm the actiemr if needed */
+ tmict = vlapic_get_reg(s, APIC_TMICT);
+ if (tmict > 0) {
+ s_time_t now = NOW(), offset;
+ stop_timer(&s->vlapic_timer);
+ vlapic_set_reg(s, APIC_TMCCT, tmict);
+ s->timer_last_update = now;
+
+ offset = APIC_BUS_CYCLE_NS * s->timer_divisor * tmict;
+
+ set_timer(&s->vlapic_timer, now + offset);
+
+ printk("lapic_load to rearm the actimer:"
+ "bus cycle is %"PRId64"ns, now 0x%016"PRIx64", "
+ "timer initial count 0x%x, offset 0x%016"PRIx64", "
+ "expire @ 0x%016"PRIx64".",
+ APIC_BUS_CYCLE_NS, now,
+ vlapic_get_reg(s, APIC_TMICT),
+ offset, now + offset);
+ }
+
+
+ lapic_info(s);
+
+ return 0;
+}
+
int vlapic_init(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
@@ -939,6 +1015,7 @@ int vlapic_init(struct vcpu *v)
vlapic->regs = map_domain_page_global(page_to_mfn(vlapic->regs_page));
memset(vlapic->regs, 0, PAGE_SIZE);
+ hvm_register_savevm(v->domain, "xen_hvm_lapic", v->vcpu_id, 1, lapic_save,
lapic_load, vlapic);
vlapic_reset(vlapic);
vlapic->apic_base_msr = MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Dec 13 22:52:02 2006 +0800
@@ -426,6 +426,319 @@ static void vmx_store_cpu_guest_regs(
vmx_vmcs_exit(v);
}
+static int __get_instruction_length(void);
+int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c)
+{
+ unsigned long inst_len;
+
+ inst_len = __get_instruction_length();
+ c->eip = __vmread(GUEST_RIP);
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n",
+ inst_len, c->eip);
+#endif
+
+ c->esp = __vmread(GUEST_RSP);
+ c->eflags = __vmread(GUEST_RFLAGS);
+
+ c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
+ c->cr3 = v->arch.hvm_vmx.cpu_cr3;
+ c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64",
cr4=0x%"PRIx64".\n",
+ c->cr3,
+ c->cr0,
+ c->cr4);
+#endif
+
+ c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
+ c->idtr_base = __vmread(GUEST_IDTR_BASE);
+
+ c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
+ c->gdtr_base = __vmread(GUEST_GDTR_BASE);
+
+ c->cs_sel = __vmread(GUEST_CS_SELECTOR);
+ c->cs_limit = __vmread(GUEST_CS_LIMIT);
+ c->cs_base = __vmread(GUEST_CS_BASE);
+ c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
+
+ c->ds_sel = __vmread(GUEST_DS_SELECTOR);
+ c->ds_limit = __vmread(GUEST_DS_LIMIT);
+ c->ds_base = __vmread(GUEST_DS_BASE);
+ c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
+
+ c->es_sel = __vmread(GUEST_ES_SELECTOR);
+ c->es_limit = __vmread(GUEST_ES_LIMIT);
+ c->es_base = __vmread(GUEST_ES_BASE);
+ c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
+
+ c->ss_sel = __vmread(GUEST_SS_SELECTOR);
+ c->ss_limit = __vmread(GUEST_SS_LIMIT);
+ c->ss_base = __vmread(GUEST_SS_BASE);
+ c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
+
+ c->fs_sel = __vmread(GUEST_FS_SELECTOR);
+ c->fs_limit = __vmread(GUEST_FS_LIMIT);
+ c->fs_base = __vmread(GUEST_FS_BASE);
+ c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
+
+ c->gs_sel = __vmread(GUEST_GS_SELECTOR);
+ c->gs_limit = __vmread(GUEST_GS_LIMIT);
+ c->gs_base = __vmread(GUEST_GS_BASE);
+ c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
+
+ c->tr_sel = __vmread(GUEST_TR_SELECTOR);
+ c->tr_limit = __vmread(GUEST_TR_LIMIT);
+ c->tr_base = __vmread(GUEST_TR_BASE);
+ c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
+
+ c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
+ c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
+ c->ldtr_base = __vmread(GUEST_LDTR_BASE);
+ c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
+
+ c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
+ c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
+ c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
+
+ return 1;
+}
+
+int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c)
+{
+ unsigned long mfn, old_cr4, old_base_mfn;
+ int error = 0;
+
+ __vmwrite(GUEST_RIP, c->eip);
+ __vmwrite(GUEST_RSP, c->esp);
+ __vmwrite(GUEST_RFLAGS, c->eflags);
+
+ v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
+ __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+
+ old_cr4 = __vmread(CR4_READ_SHADOW);
+ __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+
+ v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
+ __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64",
cr4=0x%"PRIx64".\n",
+ c->cr3,
+ c->cr0,
+ c->cr4);
+#endif
+
+ if (!vmx_paging_enabled(v)) {
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
+ goto skip_cr3;
+ }
+
+ if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
+ /*
+ * This is simple TLB flush, implying the guest has
+ * removed some translation or changed page attributes.
+ * We simply invalidate the shadow.
+ */
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+ printk("Invalid CR3 value=%"PRIx64"", c->cr3);
+ domain_crash(v->domain);
+ return 0;
+ }
+ } else {
+ /*
+ * If different, make a shadow. Check if the PDBR is valid
+ * first.
+ */
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
+ if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
+ printk("Invalid CR3 value=%"PRIx64"", c->cr3);
+ domain_crash(v->domain);
+ return 0;
+ }
+
+ /* current!=vcpu as not called by arch_vmx_do_launch */
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if(!get_page(mfn_to_page(mfn), v->domain)) {
+ struct page_info *page = mfn_to_page(mfn);
+ printk("get_page for mfn failed. CR3 value=%"PRIx64",
count_info=0x%"PRIx32", type_info=0x%lx, owner=%d.\n", c->cr3,
+ page->count_info,
+ page->u.inuse.type_info,
+ page->u.inuse._domain);
+ return 0;
+ }
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if (old_base_mfn)
+ put_page(mfn_to_page(old_base_mfn));
+ /*
+ * arch.shadow_table should now hold the next CR3 for shadow
+ */
+ v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+ }
+
+ skip_cr3:
+#if defined(__x86_64__)
+ if (vmx_long_mode_enabled(v)) {
+ unsigned long vm_entry_value;
+ vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
+ vm_entry_value |= VM_ENTRY_IA32E_MODE;
+ __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+ }
+#endif
+
+ shadow_update_paging_modes(v);
+ __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
+
+ __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+ __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+ __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+ __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+ __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+ __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+ __vmwrite(GUEST_CS_BASE, c->cs_base);
+ __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
+
+ __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+ __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+ __vmwrite(GUEST_DS_BASE, c->ds_base);
+ __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
+
+ __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+ __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+ __vmwrite(GUEST_ES_BASE, c->es_base);
+ __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
+
+ __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+ __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+ __vmwrite(GUEST_SS_BASE, c->ss_base);
+ __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
+
+ __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+ __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+ __vmwrite(GUEST_FS_BASE, c->fs_base);
+ __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
+
+ __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+ __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+ __vmwrite(GUEST_GS_BASE, c->gs_base);
+ __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
+
+ __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+ __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+ __vmwrite(GUEST_TR_BASE, c->tr_base);
+ __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
+
+ __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+ __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+ __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+ __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
+
+ __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
+ __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
+ __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
+
+ return !error;
+}
+
+#ifdef HVM_DEBUG_SUSPEND
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+ int i = 0;
+ printk("**** msr state ****\n");
+ printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ printk("0x%lx,", m->msrs[i]);
+ printk("\n");
+}
+#else
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+}
+#endif
+
+void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ struct vmcs_data *data = &ctxt->data;
+ struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+ unsigned long guest_flags = guest_state->flags;
+ int i = 0;
+
+ data->shadow_gs = guest_state->shadow_gs;
+ data->vmxassist_enabled = v->arch.hvm_vmx.vmxassist_enabled;
+ /* save msrs */
+ data->flags = guest_flags;
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ data->msr_items[i] = guest_state->msrs[i];
+
+ dump_msr_state(guest_state);
+}
+
+void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ int i = 0;
+ struct vmcs_data *data = &ctxt->data;
+ struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+
+ /* restore msrs */
+ guest_state->flags = data->flags;
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ guest_state->msrs[i] = data->msr_items[i];
+
+ guest_state->shadow_gs = data->shadow_gs;
+
+ /*XXX:no need to restore msrs, current!=vcpu as not called by
arch_vmx_do_launch */
+/* vmx_restore_guest_msrs(v);*/
+
+ v->arch.hvm_vmx.vmxassist_enabled = data->vmxassist_enabled;
+
+ dump_msr_state(guest_state);
+}
+
+void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ struct vmcs_data *data = &ctxt->data;
+
+ /* set valid flag to recover whole vmcs when restore */
+ ctxt->valid = 1;
+
+ vmx_save_cpu_state(v, ctxt);
+
+ vmx_vmcs_enter(v);
+
+ vmx_vmcs_save(v, data);
+
+ vmx_vmcs_exit(v);
+
+}
+
+void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ if (!ctxt->valid)
+ return;
+
+ vmx_load_cpu_state(v, ctxt);
+
+ vmx_vmcs_enter(v);
+
+ if (!vmx_vmcs_restore(v, &ctxt->data)) {
+ printk("vmx_vmcs restore failed!\n");
+ domain_crash(v->domain);
+ }
+
+ /* only load vmcs once */
+ ctxt->valid = 0;
+
+ vmx_vmcs_exit(v);
+
+}
+
/*
* The VMX spec (section 4.3.1.2, Checks on Guest Segment
* Registers) says that virtual-8086 mode guests' segment
@@ -737,6 +1050,9 @@ static void vmx_setup_hvm_funcs(void)
hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
+
+ hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt;
+ hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt;
hvm_funcs.paging_enabled = vmx_paging_enabled;
hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/hvm/vpic.c
--- a/xen/arch/x86/hvm/vpic.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/hvm/vpic.c Wed Dec 13 22:52:02 2006 +0800
@@ -378,6 +378,87 @@ static int vpic_intercept_elcr_io(ioreq_
return 1;
}
+#ifdef HVM_DEBUG_SUSPEND
+static void vpic_info(struct vpic *s)
+{
+ printk("*****pic state:*****\n");
+ printk("pic 0x%x.\n", s->irr);
+ printk("pic 0x%x.\n", s->imr);
+ printk("pic 0x%x.\n", s->isr);
+ printk("pic 0x%x.\n", s->irq_base);
+ printk("pic 0x%x.\n", s->init_state);
+ printk("pic 0x%x.\n", s->priority_add);
+ printk("pic 0x%x.\n", s->readsel_isr);
+ printk("pic 0x%x.\n", s->poll);
+ printk("pic 0x%x.\n", s->auto_eoi);
+ printk("pic 0x%x.\n", s->rotate_on_auto_eoi);
+ printk("pic 0x%x.\n", s->special_fully_nested_mode);
+ printk("pic 0x%x.\n", s->special_mask_mode);
+ printk("pic 0x%x.\n", s->elcr);
+ printk("pic 0x%x.\n", s->int_output);
+ printk("pic 0x%x.\n", s->is_master);
+}
+#else
+static void vpic_info(struct vpic *s)
+{
+}
+#endif
+
+static void vpic_save(hvm_domain_context_t *h, void *opaque)
+{
+ struct vpic *s = opaque;
+
+ vpic_info(s);
+
+ hvm_put_8u(h, s->irr);
+ hvm_put_8u(h, s->imr);
+ hvm_put_8u(h, s->isr);
+ hvm_put_8u(h, s->irq_base);
+ hvm_put_8u(h, s->init_state);
+ hvm_put_8u(h, s->priority_add);
+ hvm_put_8u(h, s->readsel_isr);
+
+ hvm_put_8u(h, s->poll);
+ hvm_put_8u(h, s->auto_eoi);
+
+ hvm_put_8u(h, s->rotate_on_auto_eoi);
+ hvm_put_8u(h, s->special_fully_nested_mode);
+ hvm_put_8u(h, s->special_mask_mode);
+
+ hvm_put_8u(h, s->elcr);
+ hvm_put_8u(h, s->int_output);
+}
+
+static int vpic_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ struct vpic *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ s->irr = hvm_get_8u(h);
+ s->imr = hvm_get_8u(h);
+ s->isr = hvm_get_8u(h);
+ s->irq_base = hvm_get_8u(h);
+ s->init_state = hvm_get_8u(h);
+ s->priority_add = hvm_get_8u(h);
+ s->readsel_isr = hvm_get_8u(h);
+
+ s->poll = hvm_get_8u(h);
+ s->auto_eoi = hvm_get_8u(h);
+
+ s->rotate_on_auto_eoi = hvm_get_8u(h);
+ s->special_fully_nested_mode = hvm_get_8u(h);
+ s->special_mask_mode = hvm_get_8u(h);
+
+ s->elcr = hvm_get_8u(h);
+ s->int_output = hvm_get_8u(h);
+
+ vpic_info(s);
+
+ return 0;
+}
+
void vpic_init(struct domain *d)
{
struct vpic *vpic;
@@ -387,12 +468,14 @@ void vpic_init(struct domain *d)
memset(vpic, 0, sizeof(*vpic));
vpic->is_master = 1;
vpic->elcr = 1 << 2;
+ hvm_register_savevm(d, "xen_hvm_i8259", 0x20, 1, vpic_save, vpic_load,
vpic);
register_portio_handler(d, 0x20, 2, vpic_intercept_pic_io);
register_portio_handler(d, 0x4d0, 1, vpic_intercept_elcr_io);
/* Slave PIC. */
vpic++;
memset(vpic, 0, sizeof(*vpic));
+ hvm_register_savevm(d, "xen_hvm_i8259", 0xa0, 1, vpic_save, vpic_load,
vpic);
register_portio_handler(d, 0xa0, 2, vpic_intercept_pic_io);
register_portio_handler(d, 0x4d1, 1, vpic_intercept_elcr_io);
}
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/mm/shadow/common.c Wed Dec 13 22:52:02 2006 +0800
@@ -2145,7 +2145,7 @@ int shadow_remove_all_mappings(struct vc
/* Don't complain if we're in HVM and there's one extra mapping:
* The qemu helper process has an untyped mapping of this dom's RAM */
if ( !(shadow_mode_external(v->domain)
- && (page->count_info & PGC_count_mask) <= 2
+ && (page->count_info & PGC_count_mask) <= 3 /* vmx restore add
one extra mapping*/
&& (page->u.inuse.type_info & PGT_count_mask) == 0) )
{
SHADOW_ERROR("can't find all mappings of mfn %lx: "
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/arch/x86/mm/shadow/multi.c Wed Dec 13 22:52:02 2006 +0800
@@ -1613,6 +1613,14 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
}
}
+ {
+ struct page_info *page = mfn_to_page(gmfn);
+ /* XXX: add it to emulate a touched page */
+ if ((page->u.inuse.type_info & PGT_type_mask) == PGT_none){
+ page->u.inuse.type_info |= (PGT_writable_page | PGT_validated);
+ }
+ }
+
shadow_promote(v, gmfn, shadow_type);
set_shadow_status(v, gmfn, shadow_type, smfn);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/common/domain.c
--- a/xen/common/domain.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/common/domain.c Wed Dec 13 22:52:02 2006 +0800
@@ -24,6 +24,7 @@
#include <xen/percpu.h>
#include <xen/multicall.h>
#include <asm/debugger.h>
+#include <asm/hvm/support.h>
#include <public/sched.h>
#include <public/vcpu.h>
@@ -454,8 +455,14 @@ int set_info_guest(struct domain *d,
domain_pause(d);
rc = -EFAULT;
- if ( copy_from_guest(c, vcpucontext->ctxt, 1) == 0 )
+ if ( copy_from_guest(c, vcpucontext->ctxt, 1) == 0 ) {
rc = arch_set_info_guest(v, c);
+ if ( v->vcpu_id != 0 &&
+ is_hvm_vcpu(v) &&
+ test_and_clear_bit(_VCPUF_down, &v->vcpu_flags) ) {
+ vcpu_wake(v);
+ }
+ }
domain_unpause(d);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/common/domctl.c
--- a/xen/common/domctl.c Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/common/domctl.c Wed Dec 13 22:52:02 2006 +0800
@@ -26,6 +26,10 @@ extern long arch_do_domctl(
struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
extern void arch_getdomaininfo_ctxt(
struct vcpu *, struct vcpu_guest_context *);
+extern int arch_gethvm_ctxt(
+ struct vcpu *, struct hvm_domain_context *);
+extern int arch_sethvm_ctxt(
+ struct vcpu *, struct hvm_domain_context *);
void cpumask_to_xenctl_cpumap(
struct xenctl_cpumap *xenctl_cpumap, cpumask_t *cpumask)
@@ -205,6 +209,37 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
}
break;
+ case XEN_DOMCTL_sethvmcontext:
+ {
+ struct hvm_domain_context *c;
+ struct domain *d;
+ struct vcpu *v;
+
+ ret = -ESRCH;
+ if ( (d = find_domain_by_id(op->domain)) == NULL )
+ break;
+
+ ret = -ENOMEM;
+ if ( (c = xmalloc(struct hvm_domain_context)) == NULL )
+ goto sethvmcontext_out;
+
+ /*XXX: need check input vcpu when smp */
+ v = d->vcpu[0];
+
+ ret = -EFAULT;
+ if ( copy_from_guest(c, op->u.hvmcontext.ctxt, 1) != 0 )
+ goto sethvmcontext_out;
+
+ ret = arch_sethvm_ctxt(v, c);
+
+ xfree(c);
+
+ sethvmcontext_out:
+ put_domain(d);
+
+ }
+ break;
+
case XEN_DOMCTL_pausedomain:
{
struct domain *d = find_domain_by_id(op->domain);
@@ -489,6 +524,44 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
getvcpucontext_out:
put_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_gethvmcontext:
+ {
+ struct hvm_domain_context *c;
+ struct domain *d;
+ struct vcpu *v;
+
+ ret = -ESRCH;
+ if ( (d = find_domain_by_id(op->domain)) == NULL )
+ break;
+
+ ret = -ENOMEM;
+ if ( (c = xmalloc(struct hvm_domain_context)) == NULL )
+ goto gethvmcontext_out;
+
+ v = d->vcpu[0];
+
+ ret = -ENODATA;
+ if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ goto gethvmcontext_out;
+
+ ret = 0;
+ if (arch_gethvm_ctxt(v, c) == -1)
+ ret = -EFAULT;
+
+ if ( copy_to_guest(op->u.hvmcontext.ctxt, c, 1) )
+ ret = -EFAULT;
+
+ xfree(c);
+
+ if ( copy_to_guest(u_domctl, op, 1) )
+ ret = -EFAULT;
+
+ gethvmcontext_out:
+ put_domain(d);
+
}
break;
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/asm-x86/hvm/domain.h Wed Dec 13 22:52:02 2006 +0800
@@ -27,6 +27,20 @@
#include <asm/hvm/io.h>
#include <public/hvm/params.h>
+typedef void SaveStateHandler(hvm_domain_context_t *h, void *opaque);
+typedef int LoadStateHandler(hvm_domain_context_t *h, void *opaque, int
version_id);
+
+#define HVM_SE_IDSTR_LEN 32
+typedef struct HVMStateEntry {
+ char idstr[HVM_SE_IDSTR_LEN];
+ int instance_id;
+ int version_id;
+ SaveStateHandler *save_state;
+ LoadStateHandler *load_state;
+ void *opaque;
+ struct HVMStateEntry *next;
+} HVMStateEntry;
+
struct hvm_domain {
unsigned long shared_page_va;
unsigned long buffered_io_va;
@@ -44,6 +58,9 @@ struct hvm_domain {
spinlock_t pbuf_lock;
uint64_t params[HVM_NR_PARAMS];
+
+ struct hvm_domain_context *hvm_ctxt;
+ HVMStateEntry *first_se;
};
#endif /* __ASM_X86_HVM_DOMAIN_H__ */
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/asm-x86/hvm/hvm.h Wed Dec 13 22:52:02 2006 +0800
@@ -79,6 +79,13 @@ struct hvm_function_table {
struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs);
void (*load_cpu_guest_regs)(
struct vcpu *v, struct cpu_user_regs *r);
+
+ /* save and load hvm guest cpu context for save/restore */
+ void (*save_cpu_ctxt)(
+ struct vcpu *v, struct hvmcpu_context *ctxt);
+ void (*load_cpu_ctxt)(
+ struct vcpu *v, struct hvmcpu_context *ctxt);
+
/*
* Examine specifics of the guest state:
* 1) determine whether paging is enabled,
@@ -152,6 +159,20 @@ hvm_load_cpu_guest_regs(struct vcpu *v,
hvm_funcs.load_cpu_guest_regs(v, r);
}
+static inline void
+hvm_save_cpu_context(
+ struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ hvm_funcs.save_cpu_ctxt(v, ctxt);
+}
+
+static inline void
+hvm_load_cpu_context(
+ struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ hvm_funcs.load_cpu_ctxt(v, ctxt);
+}
+
static inline int
hvm_paging_enabled(struct vcpu *v)
{
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/asm-x86/hvm/support.h Wed Dec 13 22:52:02 2006 +0800
@@ -121,6 +121,130 @@ extern unsigned int opt_hvm_debug_level;
#define TRACE_VMEXIT(index, value) \
current->arch.hvm_vcpu.hvm_trace_values[index] = (value)
+/* save/restore support */
+
+//#define HVM_DEBUG_SUSPEND
+
+extern int hvm_register_savevm(struct domain *d,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque);
+
+static inline void hvm_ctxt_seek(hvm_domain_context_t *h, unsigned int pos)
+{
+ h->cur = pos;
+}
+
+static inline uint32_t hvm_ctxt_tell(hvm_domain_context_t *h)
+{
+ return h->cur;
+}
+
+static inline int hvm_ctxt_end(hvm_domain_context_t *h)
+{
+ return (h->cur >= h->size || h->cur >= HVM_CTXT_SIZE);
+}
+
+static inline void hvm_put_byte(hvm_domain_context_t *h, unsigned int i)
+{
+ if (h->cur >= HVM_CTXT_SIZE) {
+ h->cur++;
+ return;
+ }
+ h->data[h->cur++] = (char)i;
+}
+
+static inline void hvm_put_8u(hvm_domain_context_t *h, uint8_t b)
+{
+ hvm_put_byte(h, b);
+}
+
+static inline void hvm_put_16u(hvm_domain_context_t *h, uint16_t b)
+{
+ hvm_put_8u(h, b >> 8);
+ hvm_put_8u(h, b);
+}
+
+static inline void hvm_put_32u(hvm_domain_context_t *h, uint32_t b)
+{
+ hvm_put_16u(h, b >> 16);
+ hvm_put_16u(h, b);
+}
+
+static inline void hvm_put_64u(hvm_domain_context_t *h, uint64_t b)
+{
+ hvm_put_32u(h, b >> 32);
+ hvm_put_32u(h, b);
+}
+
+static inline void hvm_put_buffer(hvm_domain_context_t *h, const char *buf,
int len)
+{
+ memcpy(&h->data[h->cur], buf, len);
+ h->cur += len;
+}
+
+
+static inline char hvm_get_byte(hvm_domain_context_t *h)
+{
+ if (h->cur >= HVM_CTXT_SIZE) {
+ printk("hvm_get_byte overflow.\n");
+ return -1;
+ }
+
+ if (h->cur >= h->size) {
+ printk("hvm_get_byte exceed data area.\n");
+ return -1;
+ }
+
+ return h->data[h->cur++];
+}
+
+static inline uint8_t hvm_get_8u(hvm_domain_context_t *h)
+{
+ return hvm_get_byte(h);
+}
+
+static inline uint16_t hvm_get_16u(hvm_domain_context_t *h)
+{
+ uint16_t v;
+ v = hvm_get_8u(h) << 8;
+ v |= hvm_get_8u(h);
+
+ return v;
+}
+
+static inline uint32_t hvm_get_32u(hvm_domain_context_t *h)
+{
+ uint32_t v;
+ v = hvm_get_16u(h) << 16;
+ v |= hvm_get_16u(h);
+
+ return v;
+}
+
+static inline uint64_t hvm_get_64u(hvm_domain_context_t *h)
+{
+ uint64_t v;
+ v = (uint64_t)hvm_get_32u(h) << 32;
+ v |= hvm_get_32u(h);
+
+ return v;
+}
+
+static inline void hvm_get_buffer(hvm_domain_context_t *h, char *buf, int len)
+{
+ memcpy(buf, &h->data[h->cur], len);
+ h->cur += len;
+}
+
+extern int hvm_save(struct vcpu*, hvm_domain_context_t *h);
+extern int hvm_load(struct vcpu*, hvm_domain_context_t *h);
+
+extern void shpage_init(struct domain *d, shared_iopage_t *sp);
+
extern int hvm_enabled;
int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/asm-x86/hvm/vpt.h Wed Dec 13 22:52:02 2006 +0800
@@ -123,7 +123,7 @@ extern void hvm_hooks_assist(struct vcpu
extern void hvm_hooks_assist(struct vcpu *v);
extern void pickup_deactive_ticks(struct periodic_time *vpit);
extern struct periodic_time *create_periodic_time(
- u32 period, char irq, char one_shot, time_cb *cb, void *data);
+ struct domain* d, u32 period, char irq, char one_shot, time_cb *cb, void
*data);
extern void destroy_periodic_time(struct periodic_time *pt);
void pit_init(struct vcpu *v, unsigned long cpu_khz);
void rtc_init(struct vcpu *v, int base, int irq);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/public/arch-x86_32.h Wed Dec 13 22:52:02 2006 +0800
@@ -181,6 +181,13 @@ DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t)
DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+#include "vmcs_data.h"
+
+struct hvmcpu_context {
+ uint32_t valid;
+ struct vmcs_data data;
+};
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
@@ -210,6 +217,7 @@ struct vcpu_guest_context {
unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
unsigned long failsafe_callback_eip;
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+ struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */
};
typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/public/arch-x86_64.h Wed Dec 13 22:52:02 2006 +0800
@@ -255,6 +255,13 @@ DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t)
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+#include "vmcs_data.h"
+
+struct hvmcpu_context {
+ uint32_t valid;
+ struct vmcs_data data;
+};
+
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -288,6 +295,7 @@ struct vcpu_guest_context {
uint64_t fs_base;
uint64_t gs_base_kernel;
uint64_t gs_base_user;
+ struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */
};
typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Fri Sep 15 17:05:38 2006 +0800
+++ b/xen/include/public/domctl.h Wed Dec 13 22:52:02 2006 +0800
@@ -384,6 +384,21 @@ struct xen_domctl_settimeoffset {
};
typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
+
+#define HVM_CTXT_SIZE 6144
+typedef struct hvm_domain_context {
+ uint32_t cur;
+ uint32_t size;
+ uint8_t data[HVM_CTXT_SIZE];
+} hvm_domain_context_t;
+DEFINE_XEN_GUEST_HANDLE(hvm_domain_context_t);
+
+#define XEN_DOMCTL_gethvmcontext 33
+#define XEN_DOMCTL_sethvmcontext 34
+typedef struct xen_domctl_hvmcontext {
+ XEN_GUEST_HANDLE(hvm_domain_context_t) ctxt; /* IN/OUT */
+} xen_domctl_hvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);
struct xen_domctl {
uint32_t cmd;
@@ -410,6 +425,7 @@ struct xen_domctl {
struct xen_domctl_hypercall_init hypercall_init;
struct xen_domctl_arch_setup arch_setup;
struct xen_domctl_settimeoffset settimeoffset;
+ struct xen_domctl_hvmcontext hvmcontext;
uint8_t pad[128];
} u;
};
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xc_hvm_restore.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_restore.c Wed Dec 13 22:52:02 2006 +0800
@@ -0,0 +1,280 @@
+/******************************************************************************
+ * xc_hvm_restore.c
+ *
+ * Restore the state of a HVM guest.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006 Intel Corperation
+ * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#include <xen/hvm/ioreq.h>
+#include <xen/hvm/params.h>
+#include <xen/hvm/e820.h>
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+/* A table mapping each PFN to its new MFN. */
+static xen_pfn_t *p2m = NULL;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+ int r = 0, s;
+ unsigned char *b = buf;
+
+ while (r < count) {
+ s = read(fd, &b[r], count - r);
+ if ((s == -1) && (errno == EINTR))
+ continue;
+ if (s <= 0) {
+ break;
+ }
+ r += s;
+ }
+
+ return (r == count) ? 1 : 0;
+}
+
+int xc_hvm_restore(int xc_handle, int io_fd,
+ uint32_t dom, unsigned long nr_pfns,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn,
+ unsigned int pae, unsigned int apic)
+{
+ DECLARE_DOMCTL;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ char *region_base;
+
+ xc_mmu_t *mmu = NULL;
+
+ xc_dominfo_t info;
+ unsigned int rc = 1, i;
+ uint32_t rec_len, nr_vcpus;
+ hvm_domain_context_t hvm_ctxt;
+ unsigned long long v_end, memsize;
+ unsigned long shared_page_nr;
+
+ /* hvm guest mem size (Mb) */
+ memsize = (unsigned long long)*store_mfn;
+ v_end = memsize << 20;
+
+ DPRINTF("xc_hvm_restore:dom=%d, nr_pfns=0x%lx, store_evtchn=%d, *store_mfn=%ld,
console_evtchn=%d, *console_mfn=%ld, pae=%u, apic=%u.\n",
+ dom, nr_pfns, store_evtchn, *store_mfn, console_evtchn,
*console_mfn, pae, apic);
+
+
+
+ /*XXX: caculate the VGA hole, it's better derived from memsize*/
+ max_pfn = nr_pfns + 0x20;
+
+ if(!get_platform_info(xc_handle, dom,
+ &max_mfn, &hvirt_start, &pt_levels)) {
+ ERROR("Unable to get platform info.");
+ return 1;
+ }
+
+ DPRINTF("xc_hvm_restore start: max_pfn = %lx, max_mfn = %lx, hvirt_start=%lx,
pt_levels=%d\n",
+ max_pfn,
+ max_mfn,
+ hvirt_start,
+ pt_levels);
+
+ if (mlock(&ctxt, sizeof(ctxt))) {
+ /* needed for build dom0 op, but might as well do early */
+ ERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
+
+ p2m = malloc(max_pfn * sizeof(xen_pfn_t));
+
+ if (p2m == NULL) {
+ ERROR("memory alloc failed");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Get the domain's shared-info frame. */
+ domctl.cmd = XEN_DOMCTL_getdomaininfo;
+ domctl.domain = (domid_t)dom;
+ if (xc_domctl(xc_handle, &domctl) < 0) {
+ ERROR("Could not get information on new domain");
+ goto out;
+ }
+ shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+ if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ for ( i = 0; i < max_pfn; i++ )
+ p2m[i] = i;
+ for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < max_pfn; i++ )
+ p2m[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+ /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, (max_pfn > 0xa0) ? 0xa0 : max_pfn,
+ 0, 0, &p2m[0x00]);
+ if ( (rc == 0) && (max_pfn > 0xc0) )
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, max_pfn - 0xc0, 0, 0, &p2m[0xc0]);
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate memory for HVM guest.\n");
+ goto out;
+ }
+
+
+ /**********XXXXXXXXXXXXXXXX******************/
+ if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+ ERROR("Could not get domain info");
+ return 1;
+ }
+
+ domctl.cmd = XEN_DOMCTL_getdomaininfo;
+ domctl.domain = (domid_t)dom;
+ if (xc_domctl(xc_handle, &domctl) < 0) {
+ ERROR("Could not get information on new domain");
+ goto out;
+ }
+
+ for ( i = 0; i < max_pfn; i++)
+ p2m[i] = i;
+
+ /* resotre memory */
+ if ( (region_base = xc_map_foreign_batch(xc_handle, dom, PROT_READ |
PROT_WRITE, p2m, max_pfn) ) == 0) {
+ ERROR("HVM:map page_array failed!\n");
+ goto out;
+ }
+
+ for (i = 0; i < max_pfn; i++) {
+ void *zpage = region_base + i * PAGE_SIZE;
+ if ( p2m[i] == (~0UL)) { /*invalid mfn*/
+ continue;
+ }
+ if (i >= 0xa0 && i < 0xc0) {
+ continue;
+ }
+
+ if (!read_exact(io_fd, zpage, PAGE_SIZE)) {
+ ERROR("HVM:read page %d failed!\n", i);
+ goto out;
+ }
+ }
+
+ (void)munmap(region_base, max_pfn*PAGE_SIZE);
+
+
+/* xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
+
+ if ( v_end > HVM_BELOW_4G_RAM_END )
+ shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+ else
+ shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
+
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
+
+ /* caculate the store_mfn , wrong val cause hang when introduceDomain */
+ *store_mfn = p2m[(v_end >> PAGE_SHIFT) - 2];
+ DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n",
*store_mfn, v_end);
+
+ /* restore hvm context including pic/pit/shpage */
+ if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+ ERROR("error read hvm context size!\n");
+ goto out;
+ }
+ if (rec_len != sizeof(hvm_ctxt)) {
+ ERROR("hvm context size dismatch!\n");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt))) {
+ ERROR("error read hvm context!\n");
+ goto out;
+ }
+
+ if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, &hvm_ctxt))) {
+ ERROR("error set hvm context!\n");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
+ ERROR("error read nr vcpu !\n");
+ goto out;
+ }
+ DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus);
+
+ for (i =0; i < nr_vcpus; i++) {
+ if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+ ERROR("error read vcpu context size!\n");
+ goto out;
+ }
+ if (rec_len != sizeof(ctxt)) {
+ ERROR("vcpu context size dismatch!\n");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) {
+ ERROR("error read vcpu context.\n");
+ goto out;
+ }
+
+ if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) {
+ ERROR("Could not set vcpu context, rc=%d", rc);
+ goto out;
+ }
+ }
+
+ rc = 0;
+ goto out;
+
+ out:
+ if ( (rc != 0) && (dom != 0) )
+ xc_domain_destroy(xc_handle, dom);
+ free(mmu);
+ free(p2m);
+
+ DPRINTF("Restore exit with rc=%d\n", rc);
+
+ return rc;
+}
diff -r 7c0030214af1 -r 3c0bd8907fd9 tools/libxc/xc_hvm_save.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_save.c Wed Dec 13 22:52:02 2006 +0800
@@ -0,0 +1,248 @@
+/******************************************************************************
+ * xc_hvm_save.c
+ *
+ * Save the state of a running HVM guest.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006 Intel Corperation
+ * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "xc_private.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */
+#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
+
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+ xc_dominfo_t info;
+
+ int rc = 1, i;
+ int live = (flags & XCFLAGS_LIVE);
+ int debug = (flags & XCFLAGS_DEBUG);
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ /* A copy of hvm domain context */
+ hvm_domain_context_t hvm_ctxt;
+
+ /* Live mapping of shared info structure */
+ shared_info_t *live_shinfo = NULL;
+
+ /* base of the region in which domain memory is mapped */
+ unsigned char *region_base = NULL;
+
+ uint32_t nr_pfns, max_pfns, rec_size, nr_vcpus;
+ unsigned long *page_array;
+
+ DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x.\n",
+ dom, max_iters, max_factor, flags);
+
+ /* If no explicit control parameters given, use defaults */
+ if(!max_iters)
+ max_iters = DEF_MAX_ITERS;
+ if(!max_factor)
+ max_factor = DEF_MAX_FACTOR;
+
+/* initialize_mbit_rate();*/
+
+ if(!get_platform_info(xc_handle, dom,
+ &max_mfn, &hvirt_start, &pt_levels)) {
+ ERROR("HVM:Unable to get platform info.");
+ return 1;
+ }
+
+ if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+ ERROR("HVM:Could not get domain info");
+ return 1;
+ }
+ nr_vcpus = info.nr_online_vcpus;
+
+ if (mlock(&ctxt, sizeof(ctxt))) {
+ ERROR("HVM:Unable to mlock ctxt");
+ return 1;
+ }
+
+ /* Only have to worry about vcpu 0 even for SMP */
+ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+ ERROR("HVM:Could not get vcpu context");
+ goto out;
+ }
+ shared_info_frame = info.shared_info_frame;
+
+ /* A cheesy test to see whether the domain contains valid state. */
+ if (ctxt.ctrlreg[3] == 0)
+ {
+ ERROR("Domain is not in a valid HVM guest state");
+ goto out;
+ }
+
+ /* cheesy sanity check */
+ if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) {
+ ERROR("Invalid HVM state record -- pfn count out of range: %lu",
+ (info.max_memkb >> (PAGE_SHIFT - 10)));
+ goto out;
+ }
+
+ /* Map the shared info frame */
+ if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, shared_info_frame))) {
+ ERROR("HVM:Couldn't map live_shinfo");
+ goto out;
+ }
+
+ max_pfn = live_shinfo->arch.max_pfn;
+
+ DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx,
nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages);
+
+ if (live) {
+ ERROR("hvm domain doesn't support live migration now.\n");
+ if (debug)
+ ERROR("hvm domain debug on.\n");
+ goto out;
+ }
+
+ /* suspend hvm domain */
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) {
+ ERROR("HVM Domain appears not to have suspended");
+ goto out;
+ }
+
+ nr_pfns = info.nr_pages;
+ DPRINTF("after suspend hvm domain nr_pages=0x%x, max_memkb=0x%lx.\n",
nr_pfns, info.max_memkb);
+
+ /*XXX: caculate the VGA hole*/
+ max_pfns = nr_pfns + 0x20;
+
+ /* get all the HVM domain pfns */
+ if ( (page_array = (unsigned long *) malloc (sizeof(unsigned long) *
max_pfns)) == NULL) {
+ ERROR("HVM:malloc fail!\n");
+ goto out;
+ }
+
+ for ( i = 0; i < max_pfns; i++)
+ page_array[i] = i;
+
+ if ( (region_base = xc_map_foreign_batch(xc_handle, dom, PROT_READ |
PROT_WRITE, page_array, max_pfns) ) == 0) {
+ ERROR("HVM domain map pages failed!\n");
+ goto out;
+ }
+
+
+ /* Start writing out the saved-domain record. begin with mem */
+ if (!write_exact(io_fd, &nr_pfns, sizeof(unsigned int))) {
+ ERROR("write: nr_pfns");
+ goto out;
+ }
+
+ for (i = 0; i < max_pfns; i++) {
+ int ret;
+ void *zpage = region_base + i * PAGE_SIZE;
+ if ( page_array[i] == (~0UL)) {
+ continue;
+ }
+ if (i >= 0xa0 && i < 0xc0) {
+ continue;
+ }
+
+ if ((ret = ratewrite(io_fd, zpage, PAGE_SIZE)) != PAGE_SIZE) {
+ ERROR("HVM:read page %d failed, mfn=0x%lx.\n", i, page_array[i]);
+ goto out;
+ }
+ }
+
+ /* save hvm hypervisor state including pic/pit/shpage */
+ if (mlock(&hvm_ctxt, sizeof(hvm_ctxt))) {
+ ERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
+ if (xc_domain_hvm_getcontext(xc_handle, dom, &hvm_ctxt)){
+ ERROR("HVM:Could not get hvm context");
+ goto out;
+ }
+
+ rec_size = sizeof(hvm_ctxt);
+ if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+ ERROR("error write hvm ctxt size");
+ goto out;
+ }
+
+ if ( !write_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt)) ) {
+ ERROR("write HVM info failed!\n");
+ }
+
+ /* save vcpu/vmcs context */
+ if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
+ ERROR("error write nr vcpus");
+ goto out;
+ }
+
+ /*XXX: need a online map to exclude down cpu */
+ for (i = 0; i < nr_vcpus; i++) {
+
+ if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) {
+ ERROR("HVM:Could not get vcpu context");
+ goto out;
+ }
+
+ rec_size = sizeof(ctxt);
+ DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus);
+ if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+ ERROR("error write vcpu ctxt size");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) {
+ ERROR("write vmcs failed!\n");
+ goto out;
+ }
+ }
+
+ /* Success! */
+ rc = 0;
+
+ out:
+ return !!rc;
+}
diff -r 7c0030214af1 -r 3c0bd8907fd9 xen/include/public/vmcs_data.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/vmcs_data.h Wed Dec 13 22:52:02 2006 +0800
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * vmcs_data.h
+ *
+ * Copyright (c) 2006 Intel Corperation
+ *
+ */
+
+#ifndef __XEN_PUBLIC_VMCS_DATA_H__
+#define __XEN_PUBLIC_VMCS_DATA_H__
+
+/*
+ * World vmcs state
+ */
+struct vmcs_data {
+ uint64_t eip; /* execution pointer */
+ uint64_t esp; /* stack pointer */
+ uint64_t eflags; /* flags register */
+ uint64_t cr0;
+ uint64_t cr3; /* page table directory */
+ uint64_t cr4;
+ uint32_t idtr_limit; /* idt */
+ uint64_t idtr_base;
+ uint32_t gdtr_limit; /* gdt */
+ uint64_t gdtr_base;
+ uint32_t cs_sel; /* cs selector */
+ uint32_t cs_limit;
+ uint64_t cs_base;
+ uint32_t cs_arbytes;
+ uint32_t ds_sel; /* ds selector */
+ uint32_t ds_limit;
+ uint64_t ds_base;
+ uint32_t ds_arbytes;
+ uint32_t es_sel; /* es selector */
+ uint32_t es_limit;
+ uint64_t es_base;
+ uint32_t es_arbytes;
+ uint32_t ss_sel; /* ss selector */
+ uint32_t ss_limit;
+ uint64_t ss_base;
+ uint32_t ss_arbytes;
+ uint32_t fs_sel; /* fs selector */
+ uint32_t fs_limit;
+ uint64_t fs_base;
+ uint32_t fs_arbytes;
+ uint32_t gs_sel; /* gs selector */
+ uint32_t gs_limit;
+ uint64_t gs_base;
+ uint32_t gs_arbytes;
+ uint32_t tr_sel; /* task selector */
+ uint32_t tr_limit;
+ uint64_t tr_base;
+ uint32_t tr_arbytes;
+ uint32_t ldtr_sel; /* ldtr selector */
+ uint32_t ldtr_limit;
+ uint64_t ldtr_base;
+ uint32_t ldtr_arbytes;
+ uint32_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ /* msr for em64t */
+ uint64_t shadow_gs;
+ uint64_t flags;
+ /* same size as VMX_MSR_COUNT */
+ uint64_t msr_items[6];
+ uint64_t vmxassist_enabled;
+};
+typedef struct vmcs_data vmcs_data_t;
+#endif
------------------------------------------------------------------------
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel