[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [PATCH] xm save / restore
Xen and Linux patches for 'nonlive' save & restore. Also improved code for searching page_array[] while saving htab, xc_linux_save.c A modified version of htab.h is placed in new directory: tools/libxc/xen/asm Probably need better resolution. htab is mapped via 'decorating' the pfn, xen/arch/powerpc/mm.c. However, no range / validation checking is done at this time. ................................................... Xen diffs: diff -r 7669fca80bfc config/powerpc64.mk --- a/config/powerpc64.mk Mon Dec 04 11:46:53 2006 -0500 +++ b/config/powerpc64.mk Wed Dec 13 15:39:32 2006 -0500 @@ -3,3 +3,4 @@ CONFIG_POWERPC_$(XEN_OS) := y CFLAGS += -DELFSIZE=64 LIBDIR := lib +CONFIG_XCUTILS :=y diff -r 7669fca80bfc tools/libxc/powerpc64/Makefile --- a/tools/libxc/powerpc64/Makefile Mon Dec 04 11:46:53 2006 -0500 +++ b/tools/libxc/powerpc64/Makefile Wed Dec 13 15:39:32 2006 -0500 @@ -2,5 +2,7 @@ GUEST_SRCS-y += powerpc64/xc_linux_build GUEST_SRCS-y += powerpc64/xc_linux_build.c GUEST_SRCS-y += powerpc64/xc_prose_build.c GUEST_SRCS-y += powerpc64/utils.c +GUEST_SRCS-y += powerpc64/xc_linux_save.c +GUEST_SRCS-y += powerpc64/xc_linux_restore.c CTRL_SRCS-y += powerpc64/xc_memory.c diff -r 7669fca80bfc tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Mon Dec 04 11:46:53 2006 -0500 +++ b/tools/libxc/xc_private.c Wed Dec 13 15:39:32 2006 -0500 @@ -306,6 +306,23 @@ int xc_get_pfn_list(int xc_handle, return (ret < 0) ? -1 : domctl.u.getmemlist.num_pfns; } + +int xc_get_shadow_list( int xc_handle, + uint32_t domid, + uint64_t *htab_raddr) +{ + DECLARE_DOMCTL; + int ret; + + domctl.cmd = XEN_DOMCTL_getshadowlist; + domctl.domain = (domid_t)domid; + + ret = do_domctl(xc_handle, &domctl); + *htab_raddr = domctl.u.getshadowlist.htab_map; + + return (ret < 0) ? -1 : domctl.u.getshadowlist.htab_num_ptes; +} + #endif long xc_get_tot_pages(int xc_handle, uint32_t domid) diff -r 7669fca80bfc tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Dec 04 11:46:53 2006 -0500 +++ b/tools/libxc/xenctrl.h Wed Dec 13 15:39:32 2006 -0500 @@ -518,6 +518,8 @@ int xc_get_pfn_list(int xc_handle, uint3 int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf, unsigned long max_pfns); +int xc_get_shadow_list(int xc_handle, uint32_t domid, uint64_t *mfn_htab_map); + unsigned long xc_ia64_fpsr_default(void); int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, diff -r 7669fca80bfc tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Dec 04 11:46:53 2006 -0500 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Dec 13 15:39:32 2006 -0500 @@ -461,6 +461,7 @@ class Common_XendDomainInfo: if self.state in (DOM_STATE_HALTED, DOM_STATE_SUSPENDED): try: self._constructDomain() + self._allocMem2() self._storeVmDetails() self._createDevices() self._createChannels() @@ -1237,7 +1238,67 @@ class Common_XendDomainInfo: # Set maximum number of vcpus in domain xc.domain_max_vcpus(self.domid, int(self.info['vcpus'])) - + # Use the saved architecture- and image-specific calculations + # the various headrooms necessary, given the raw configured + # values. maxmem, memory, and shadow are all in KiB. + + def _allocMem2(self): + log.debug("allocMem2"); + + maxmem = self.info['maxmem'] * 1024 + memory = self.info['memory'] * 1024 + shadow = self.info['shadow_memory'] * 1024 + + # Round shadow up to a multiple of a MiB, as shadow_mem_control + # takes MiB and we must not round down and end up under-providing. + shadow = ((shadow + 1023) / 1024) * 1024 + + # set memory limit + xc.domain_setmaxmem(self.domid, maxmem) + + # Make sure there's enough RAM available for the domain + balloon.free(memory + shadow) + + # Set up the shadow memory, i.e. the PowerPC hash table + shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024) + self.info['shadow_memory'] = shadow_cur + + rma_log = 26 ### self.info['powerpc_rma_log'] + if rma_log == 0: + # use smallest RMA size available + rma_log = self.getRealModeLogs()[0] + + if rma_log not in self.getRealModeLogs(): + raise ValueError("rma_log(%d) must be one of" % rma_log, + self.getRealModeLogs()) + + # store info for FlatDeviceTree + ### self.info['powerpc_rma_log'] = rma_log + + rma_kb = (1 << rma_log) / 1024 + if memory < rma_kb: + raise ValueError("Domain memory must be at least %d KB" % rma_kb) + + if memory % (16 << 10): + raise ValueError("Domain memory %dKB must be a multiple of 16MB" + % memory) + + # allocate the RMA + log.debug("alloc_real_mode_area(%d, %d)", self.domid, rma_log) + xc.alloc_real_mode_area(self.domid, rma_log) + + # now allocate the remaining memory as large-order allocations + memory -= rma_kb + extent_log = 24 # 16 MB + page_log = 12 # 4 KB + extent_order = extent_log - page_log + log.debug("increase_reservation(%d, 0x%x, %d)", self.domid, + memory, extent_order) + xc.domain_memory_increase_reservation(self.domid, + memory, + extent_order) + + def _introduceDomain(self): assert self.domid is not None assert self.store_mfn is not None diff -r 7669fca80bfc xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/arch/powerpc/domain.c Wed Dec 13 15:39:32 2006 -0500 @@ -152,7 +152,32 @@ void vcpu_destroy(struct vcpu *v) int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c) { + int i; + memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs)); + + for ( i = 0; i < NUM_SLB_ENTRIES; i++) { + memcpy(&v->arch.slb_entries[i], &c->slb_entries[i], sizeof(struct slb_entry)); + } + + for ( i = 0; i< 4; i++) v->arch.sprg[i] = c->sprg[i]; + + v->arch.timebase = c->timebase; + v->arch.dar = c->dar; + v->arch.dsisr = c->dsisr; + + memcpy( &v->arch.cpu, &c->cpu, sizeof(struct cpu_vcpu)); + v->arch.dec = c->dec; + +#ifdef HAS_FLOAT + memcpy( v->arch.fprs, c->fprs, sizeof(double)*NUM_FPRS); +#endif /* HAS_FLOAT */ + +#ifdef HAS_VMX + memcpy( &v->arch.vrs, &c->vrs, sizeof(vector128)*32); + memcpy( &v->arch.vscr, &c->vscr, sizeof(vector128)); + v->arch.vrsave = c->vrsave; +#endif /* HAS_VMX */ printk("Domain[%d].%d: initializing\n", v->domain->domain_id, v->vcpu_id); diff -r 7669fca80bfc xen/arch/powerpc/domctl.c --- a/xen/arch/powerpc/domctl.c Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/arch/powerpc/domctl.c Wed Dec 13 15:39:32 2006 -0500 @@ -29,10 +29,37 @@ #include <public/sysctl.h> #include <asm/processor.h> +#define DECOR 0x80000000 // indicates htab address + + void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *); void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c) { + int i; + memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); + for (i = 0; i < NUM_SLB_ENTRIES; i++) { + memcpy(&c->slb_entries[i],&v->arch.slb_entries[i],sizeof(struct slb_entry)); + } + + for (i = 0; i < 4; i++) c->sprg[i] = v->arch.sprg[i] ; + c->timebase = v->arch.timebase; + c->dar = v->arch.dar; + c->dsisr = v->arch.dsisr; + memcpy(&c->cpu,&v->arch.cpu,sizeof(struct cpu_vcpu)); + c->dec = v->arch.dec; + +#ifdef HAS_FLOAT + memcpy(c->fprs,v->arch.fprs,sizeof(double)*NUM_FPRS); +#endif /* HAS_FLOAT */ + +#ifdef HAS_VMX + memcpy(c->vrs, v->arch.vrs, sizeof(vector128)*32); + memcpy(&c->vscr, &v->arch.vscr, sizeof(vector128)); + c->vrsave = v->arch.vrsave; +#endif /* HAS_VMX */ + + /* XXX fill in rest of vcpu_guest_context_t */ } @@ -108,6 +135,27 @@ long arch_do_domctl(struct xen_domctl *d } } break; + case XEN_DOMCTL_getshadowlist: + { + struct domain *d = find_domain_by_id(domctl->domain); + uint num_ptes; + + ret = -EINVAL; + if ( d != NULL) + { + ret = 0; + + domctl->u.getshadowlist.htab_map = (uint64_t)(d->arch.htab.map); + + num_ptes = 1UL << d->arch.htab.log_num_ptes; + domctl->u.getshadowlist.htab_num_ptes = num_ptes; + + copy_to_guest(u_domctl, domctl, 1); + put_domain(d); + } + } + break; + default: ret = -ENOSYS; diff -r 7669fca80bfc xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/arch/powerpc/mm.c Wed Dec 13 15:39:32 2006 -0500 @@ -37,6 +37,8 @@ #define MEM_LOG(_f, _a...) ((void)0) #endif +#define DECOR 0x80000000UL + /* Frame table and its size in pages. */ struct page_info *frame_table; unsigned long max_page; @@ -408,6 +410,11 @@ ulong pfn2mfn(struct domain *d, ulong pf ulong foreign_map_pfn = 1UL << cpu_foreign_map_order(); /* quick tests first */ + if (pfn & DECOR) + { + mfn = pfn & ~DECOR; //*** TBD Check for valid htab range? + } + else if (pfn & foreign_map_pfn) { t = PFN_TYPE_FOREIGN; mfn = foreign_to_mfn(d, pfn); diff -r 7669fca80bfc xen/include/asm-powerpc/domain.h --- a/xen/include/asm-powerpc/domain.h Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/include/asm-powerpc/domain.h Wed Dec 13 15:39:32 2006 -0500 @@ -51,10 +51,6 @@ struct arch_domain { uint large_page_order[4]; } __cacheline_aligned; -struct slb_entry { - ulong slb_vsid; - ulong slb_esid; -}; #define SLB_ESID_VALID (1ULL << (63 - 36)) #define SLB_ESID_CLASS (1ULL << (63 - 56)) #define SLB_ESID_MASK (~0ULL << (63 - 35)) @@ -63,9 +59,9 @@ struct slb_entry { struct xencomm; -typedef struct { - u32 u[4]; -} __attribute__((aligned(16))) vector128; +#ifdef HAS_VMX +typedef _vector128 vector128; +#endif /* HAS_VMX */ struct arch_vcpu { cpu_user_regs_t ctxt; /* User-level CPU registers */ diff -r 7669fca80bfc xen/include/asm-powerpc/htab.h --- a/xen/include/asm-powerpc/htab.h Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/include/asm-powerpc/htab.h Wed Dec 13 15:39:32 2006 -0500 @@ -69,68 +69,68 @@ union pte { struct pte_words { - ulong vsid; - ulong rpn; + uint64_t vsid; + uint64_t rpn; } words; struct pte_bits { /* *INDENT-OFF* */ /* high word */ - ulong avpn: 57; /* [0-56] abbreviated virtual page number */ - ulong lock: 1; /* [57] hypervisor lock bit */ - ulong res: 1; /* [58] reserved for hypervisor */ - ulong bolted: 1; /* [59] XXX software-reserved; temp hack */ - ulong sw: 1; /* [60] reserved for software */ - ulong l: 1; /* [61] Large Page */ - ulong h: 1; /* [62] hash function id */ - ulong v: 1; /* [63] valid */ + uint64_t avpn: 57; /* [0-56] abbreviated virtual page number */ + uint64_t lock: 1; /* [57] hypervisor lock bit */ + uint64_t res: 1; /* [58] reserved for hypervisor */ + uint64_t bolted: 1; /* [59] XXX software-reserved; temp hack */ + uint64_t sw: 1; /* [60] reserved for software */ + uint64_t l: 1; /* [61] Large Page */ + uint64_t h: 1; /* [62] hash function id */ + uint64_t v: 1; /* [63] valid */ /* low word */ - ulong pp0: 1; /* [0] page protection bit 0 (current PowerPC + uint64_t pp0: 1; /* [0] page protection bit 0 (current PowerPC * specification says it can always be 0) */ - ulong ts: 1; /* [1] tag select */ - ulong rpn: 50; /* [2-51] real page number */ - ulong res2: 2; /* [52,53] reserved */ - ulong ac: 1; /* [54] address compare */ - ulong r: 1; /* [55] referenced */ - ulong c: 1; /* [56] changed */ - ulong w: 1; /* [57] write through */ - ulong i: 1; /* [58] cache inhibited */ - ulong m: 1; /* [59] memory coherent */ - ulong g: 1; /* [60] guarded */ - ulong n: 1; /* [61] no-execute */ - ulong pp1: 2; /* [62,63] page protection bits 1:2 */ + uint64_t ts: 1; /* [1] tag select */ + uint64_t rpn: 50; /* [2-51] real page number */ + uint64_t res2: 2; /* [52,53] reserved */ + uint64_t ac: 1; /* [54] address compare */ + uint64_t r: 1; /* [55] referenced */ + uint64_t c: 1; /* [56] changed */ + uint64_t w: 1; /* [57] write through */ + uint64_t i: 1; /* [58] cache inhibited */ + uint64_t m: 1; /* [59] memory coherent */ + uint64_t g: 1; /* [60] guarded */ + uint64_t n: 1; /* [61] no-execute */ + uint64_t pp1: 2; /* [62,63] page protection bits 1:2 */ /* *INDENT-ON* */ } bits; }; union ptel { - ulong word; + uint64_t word; struct ptel_bits { /* *INDENT-OFF* */ - ulong pp0: 1; /* page protection bit 0 (current PPC + uint64_t pp0: 1; /* page protection bit 0 (current PPC * AS says it can always be 0) */ - ulong ts: 1; /* tag select */ - ulong rpn: 50; /* real page number */ - ulong res2: 2; /* reserved */ - ulong ac: 1; /* address compare */ - ulong r: 1; /* referenced */ - ulong c: 1; /* changed */ - ulong w: 1; /* write through */ - ulong i: 1; /* cache inhibited */ - ulong m: 1; /* memory coherent */ - ulong g: 1; /* guarded */ - ulong n: 1; /* no-execute */ - ulong pp1: 2; /* page protection bits 1:2 */ + uint64_t ts: 1; /* tag select */ + uint64_t rpn: 50; /* real page number */ + uint64_t res2: 2; /* reserved */ + uint64_t ac: 1; /* address compare */ + uint64_t r: 1; /* referenced */ + uint64_t c: 1; /* changed */ + uint64_t w: 1; /* write through */ + uint64_t i: 1; /* cache inhibited */ + uint64_t m: 1; /* memory coherent */ + uint64_t g: 1; /* guarded */ + uint64_t n: 1; /* no-execute */ + uint64_t pp1: 2; /* page protection bits 1:2 */ /* *INDENT-ON* */ } bits; }; struct domain_htab { - ulong sdr1; + uint64_t sdr1; uint log_num_ptes; /* log number of PTEs in HTAB. */ uint order; /* order for freeing. */ union pte *map; /* access the htab like an array */ - ulong *shadow; /* idx -> logical translation array */ + uint64_t *shadow; /* idx -> logical translation array */ }; #endif diff -r 7669fca80bfc xen/include/public/arch-powerpc.h --- a/xen/include/public/arch-powerpc.h Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/include/public/arch-powerpc.h Wed Dec 13 15:39:32 2006 -0500 @@ -98,11 +98,66 @@ typedef struct cpu_user_regs cpu_user_re typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ +#define NUM_SLB_ENTRIES 64 +struct slb_entry { + uint64_t slb_vsid; + uint64_t slb_esid; +}; +typedef struct slb_entry slb_entry_t; + +#ifndef HAS_VMX +#define HAS_VMX 1 +#endif + +#ifndef HAS_FLOAT +#define HAS_FLOAT 1 +#endif + +#ifdef HAS_VMX +typedef struct { + uint32_t u[4]; +} __attribute__((aligned(16))) _vector128; +#endif /* HAS_VMX */ + + /* ONLY used to communicate with dom0! See also struct exec_domain. */ struct vcpu_guest_context { cpu_user_regs_t user_regs; /* User-level CPU registers */ + slb_entry_t slb_entries[NUM_SLB_ENTRIES]; /* Segment Lookaside Buffer */ + + /* Special-Purpose Registers */ + uint64_t sprg[4]; + uint64_t timebase; + uint64_t dar; + uint64_t dsisr; + + struct cpu_vcpu_tag { + uint64_t hid4; + } cpu; /* CPU-specific bits */ + + uint32_t dec; + + /* XXX etc */ +#ifdef HAS_FLOAT +#define NUM_FPRS 32 + double fprs[NUM_FPRS]; +#endif +#ifdef HAS_VMX + _vector128 vrs[32]; + _vector128 vscr; + uint32_t vrsave; +#endif + +#if 0 + struct xencomm *xencomm; + + /* I/O-port access bitmap. */ + u8 *iobmp; /* Guest kernel virtual address of the bitmap. */ + int iobmp_limit; /* Number of ports represented in the bitmap. */ + int iopl; /* Current IOPL for this VCPU. */ +#endif + uint64_t sdr1; /* Pagetable base */ - /* XXX etc */ }; typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); diff -r 7669fca80bfc xen/include/public/domctl.h --- a/xen/include/public/domctl.h Mon Dec 04 11:46:53 2006 -0500 +++ b/xen/include/public/domctl.h Wed Dec 13 15:39:32 2006 -0500 @@ -392,6 +392,18 @@ typedef struct xen_domctl_real_mode_area typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); +#define XEN_DOMCTL_getshadowlist 29 +struct xen_domctl_getshadowlist { + /* OUT variables */ + /* Start of htab array */ + uint64_t htab_map; + /* Number of ptes within htab */ + uint32_t htab_num_ptes; +}; + +typedef struct xen_domctl_getshadowlist xen_domctl_getshadowlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t); + struct xen_domctl { uint32_t cmd; uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ @@ -418,6 +430,7 @@ struct xen_domctl { struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_getshadowlist getshadowlist; uint8_t pad[128]; } u; }; diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/xc_linux_restore.c Wed Dec 13 15:39:32 2006 -0500 @@ -0,0 +1,312 @@ +/****************************************************************************** + * xc_linux_restore.c + * + * Restore the state of a Linux session. + * + * Copyright (c) 2003, K A Fraser. + * Rewritten for PPC: Dan Poff <poff@xxxxxxxxxx>, Yi Ge <geyi@xxxxxxxxxx> + */ + +#include <inttypes.h> +#include <stdlib.h> +#include <unistd.h> +#include <xen/asm/htab.h> + +#include "xg_private.h" + +#define DECOR 0x80000000 // indicates htab address +#define LOG_PTE_SIZE 4 + +#define INVALID_MFN (~0ULL) + +#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +static int +read_page(int xc_handle, int io_fd, uint32_t dom, xen_pfn_t mfn) +{ + void *mem; + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, mfn); + if (mem == NULL) { + ERROR("cannot map page"); + return -1; + } + if (!read_exact(io_fd, mem, PAGE_SIZE)) { + ERROR("Error when reading from state file (5)"); + return -1; + } + + munmap(mem, PAGE_SIZE); + return 0; +} + +int +xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, + unsigned long *store_mfn, unsigned int console_evtchn, + unsigned long *console_mfn) +{ + DECLARE_DOMCTL; + int rc = 1, i; + xen_pfn_t pfn; + xen_pfn_t mfn = INVALID_MFN; + unsigned long ver; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ + shared_info_t *shared_info = (shared_info_t *)shared_info_page; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + xen_pfn_t shared_info_pfn, *page_array = NULL; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + max_pfn = nr_pfns; + + DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn); + + if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { + ERROR("Error when reading version"); + goto out; + } + if (ver != 1) { + ERROR("version of save doesn't match"); + goto out; + } + + if (mlock(&ctxt, sizeof(ctxt))) { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to mlock ctxt"); + return 1; + } + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + errno = ENOMEM; + goto out; + } + + /* Get pages. */ + page_array = malloc(max_pfn * sizeof(xen_pfn_t)); + if (page_array == NULL ) { + ERROR("Could not allocate memory"); + goto out; + } + + if (xc_get_pfn_list(xc_handle, dom, + page_array, max_pfn) != max_pfn) { + ERROR("Could not get the page frame list"); + goto out; + } + + DPRINTF("Reloading memory pages: 0%%\n"); + + while (1) { + if (!read_exact(io_fd, &pfn, sizeof(xen_pfn_t))) { + ERROR("Error when reading batch size"); + goto out; + } + + if (pfn == INVALID_MFN) + break; + + if (pfn > max_pfn){ + DPRINTF("pfn: 0x%016llx\n", pfn); + continue; + } + + mfn = page_array[pfn]; + + if (read_page(xc_handle, io_fd, dom, mfn) < 0) + goto out; + } + + DPRINTF("Received all pages\n"); + + /* Read and uncanonicalise htab, page-at-a-time */ + { + int N, total_sent = 0; + int num_ptes, htab_ptes, htab_pages; + unsigned long htab_mfn; + uint64_t htab_raddr; + xen_pfn_t htab_rpn; + union pte *ppte; + char *mem, *temp, *copy; + + htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr); + if (htab_ptes == -1){ + ERROR("Could not get the shadow list"); + goto out; + } + + if (!read_exact(io_fd, &num_ptes, sizeof(num_ptes))) { + ERROR("Error when reading num_ptes"); + goto out; + } + + if (num_ptes != htab_ptes){ + ERROR("num_ptes != htab_ptes: %d %d htab_raddr: 0x%016llx", + num_ptes, htab_ptes, htab_raddr); + goto out; + } + + temp = malloc(PAGE_SIZE * 2); + if (temp == NULL){ + ERROR("Could not allocate temp memory"); + goto out; + } + + copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1))); + + htab_mfn = htab_raddr >> PAGE_SHIFT; + htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); + + /* Replace guest pfn with rfn, then copy to htab, by page */ + for (N = 0; N < htab_pages; N++, htab_mfn++) { + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, htab_mfn | DECOR); + if (mem == NULL){ + ERROR("Cannot map htab_mfn 0x%08lx: %s\n", + htab_mfn, strerror (errno)); + goto out; + } + + if (!read_exact(io_fd, copy, PAGE_SIZE)) { + ERROR("Error when reading htab page"); + goto out; + } + + ppte = (union pte *)copy; + for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){ + if (ppte->bits.v == 1){ // valid htab entry + htab_rpn = ppte->bits.rpn; // guest's pfn + + if (htab_rpn >= max_pfn){ + ERROR("htab_rpn: 0x%016llx not found in page_array[]", + htab_rpn); + goto out; + } + + ppte->bits.rpn = page_array[htab_rpn]; // guest's rpn + + } else { // invalid htab entry + ppte->words.rpn = 0; + } + } + + memcpy(mem, copy, PAGE_SIZE); + munmap(mem, PAGE_SIZE); + total_sent++; + } + + free(temp); + } + + /* Read vcpu context and set */ + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("Error when reading ctxt"); + goto out; + } + + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = 0; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + + if (xc_domctl(xc_handle, &domctl) != 0) { + ERROR("Couldn't set vcpu context"); + goto out; + } + + /* Read shared info. */ + shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, shared_info_frame); + if (shared_info == NULL) { + ERROR("cannot map page"); + goto out; + } + if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { + ERROR("Error when reading shared_info page"); + goto out; + } + + /* clear any pending events */ + memset(&(shared_info->evtchn_pending[0]), 0, + sizeof (shared_info->evtchn_pending)); + for (i = 0; i < MAX_VIRT_CPUS; i++) + shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + mfn = shared_info_frame - 3 ; + + munmap (shared_info, PAGE_SIZE); + + for (i = 0; i < max_pfn; i++) // find pfn of shared_info_frame + if (page_array[i] == shared_info_frame) break; + if ( i >= max_pfn) { + ERROR("Cannot find pfn of shared_info_frame"); + goto out; + } + shared_info_pfn = (unsigned long) i; + + /* Setup start_info page */ + start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, mfn); + start_info->nr_pages = max_pfn; + start_info->shared_info = shared_info_pfn << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = page_array[start_info->store_mfn]; + start_info->store_evtchn = store_evtchn; + *console_mfn = page_array[start_info->console.domU.mfn]; + start_info->console.domU.evtchn = console_evtchn; + munmap(start_info, PAGE_SIZE); + + DPRINTF("Domain ready to be built.\n"); + + rc = 0; + + out: + if ((rc != 0) && (dom != 0)) + xc_domain_destroy(xc_handle, dom); + + free (page_array); + safe_munlock(&ctxt, sizeof(ctxt)); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/xc_linux_save.c Wed Dec 13 15:39:32 2006 -0500 @@ -0,0 +1,417 @@ +/****************************************************************************** + * xc_linux_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2003, K A Fraser. + * Rewritten for PPC: Dan Poff <poff@xxxxxxxxxx>, Yi Ge <geyi@xxxxxxxxxx> + */ + +#include <inttypes.h> +#include <time.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> +#include <xen/asm/htab.h> + +#include "xg_private.h" + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_linux_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, and to skip. +*/ + +#define DECOR 0x80000000 // indicates htab address +#define LOG_PTE_SIZE 4 + +#define INVALID_MFN (~0ULL) + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +static int +suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info) +{ + int i = 0; + + if (!(*suspend)(dom)) { + ERROR("Suspend request failed"); + return -1; + } + +retry: + if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { + ERROR("Could not get domain info"); + return -1; + } + + if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) + return 0; // success + + if (info->paused) { + // try unpausing domain, wait, and retest + xc_domain_unpause(xc_handle, dom); + + ERROR("Domain was paused. Wait and re-test."); + usleep(10000); // 10ms + + goto retry; + } + + + if(++i < 100) { + ERROR("Retry suspend domain."); + usleep(10000); // 10ms + goto retry; + } + + ERROR("Unable to suspend domain."); + + return -1; +} + +static inline ssize_t +write_exact(int fd, void *buf, size_t count) +{ + if (write(fd, buf, count) != count) + return 0; + return 1; +} + +int +xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +{ + xc_dominfo_t info; + + int rc = 1; + int debug = 0; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + xen_pfn_t *page_array = NULL; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("Could not get domain info"); + return 1; + } + + shared_info_frame = info.shared_info_frame; + + /* Map the shared info frame */ + live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame); + if (!live_shinfo) { + ERROR("Couldn't map live_shinfo"); + goto out; + } + + max_pfn = info.max_memkb >> (PAGE_SHIFT - 10); + + page_array = malloc(max_pfn * sizeof(xen_pfn_t)); + if (page_array == NULL) { + ERROR("Could not allocate memory"); + goto out; + } + + /* This is expected by xm restore. */ + if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + ERROR("write: max_pfn"); + goto out; + } + + /* xc_linux_restore starts to read here. */ + /* Write a version number. This can avoid searching for a stupid bug + if the format change. + The version is hard-coded, don't forget to change the restore code + too! */ + { + unsigned long version = 1; + + if (!write_exact(io_fd, &version, sizeof(unsigned long))) { + ERROR("write: version"); + goto out; + } + } + + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERROR("Domain appears not to have suspended"); + goto out; + } + + + { + char *mem; + xen_pfn_t pfn; + unsigned int total_sent = 0; + + if (xc_get_pfn_list(xc_handle, dom, + page_array, max_pfn) != max_pfn) { + ERROR("Could not get the page frame list"); + goto out; + } + + /* Start writing out the saved-domain record. */ + for (pfn = 0; pfn < max_pfn; pfn++){ + if (page_array[pfn] == INVALID_MFN) + continue; + + if (debug) + fprintf(stderr, "xc_linux_save: page %llx (%llu/%lu)\n", + page_array[pfn], pfn, max_pfn); + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, page_array[pfn]); + if (mem == NULL) { + ERROR("cannot map page %llx: %s", + page_array[pfn], strerror (errno)); + goto out; + } + + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERROR("Error when writing to state file (4)"); + goto out; + } + + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERROR("Error when writing to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); + total_sent++; + } + } + + DPRINTF("All memory is saved\n"); + + /* terminate memory dump */ + { + xen_pfn_t pfn = INVALID_MFN; + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + } + + /* Canonicalize htab and save */ + { + int i, k, n; + int N, total_sent = 0; + int htab_ptes, htab_pages, n_chunks; + unsigned long htab_mfn; + uint64_t htab_raddr; + xen_pfn_t htab_rpn, pfn = 0; + union pte *ppte; + char *mem, *temp, *copy; + unsigned long long sizes; + struct chunk_array {xen_pfn_t mfn; unsigned long long size;} *p_chunk; + + n_chunks = 1; + for (k = 0; k < max_pfn - 1; k++){ // find number of chunks + if (page_array[k] + 1 != page_array[k+1]){ + n_chunks += 1; + } + } + + p_chunk = malloc(n_chunks * sizeof(struct chunk_array)); + if (p_chunk == NULL) { + ERROR("Could not allocate memory for chunk_array"); + goto out; + } + + k = 0; n = 0; + p_chunk[n].mfn = page_array[k]; + p_chunk[n].size = 1; + + for (k = 0; k < max_pfn - 1; k++){ // record mfn for start of each chunk, size + if (page_array[k] + 1 != page_array[k+1]){ + p_chunk[n+1].mfn = page_array[k+1]; + p_chunk[n+1].size = 1; + n += 1; + } else { + p_chunk[n].size += 1; + } + } +#if 0 + DPRINTF("n_chunks: %d\n", n_chunks); + for (i = 0; i < n_chunks; i++){ + DPRINTF("0x%016llx 0x%016llx\n", p_chunk[i].mfn, p_chunk[i].size); + } +#endif + htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr); + if (htab_ptes == -1){ + ERROR("Could not get the shadow list"); + goto out; + } + + temp = malloc(PAGE_SIZE * 2); + if (temp == NULL){ + ERROR("Could not allocate temp memory"); + goto out; + } + + copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1))); + + htab_mfn = htab_raddr >> PAGE_SHIFT; + htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); + // DPRINTF("htab_pages: 0x%08lx htab_addr: %llx htab_mfn %lx\n", + // htab_pages,htab_raddr, htab_mfn); + + if (!write_exact(io_fd, &htab_ptes, sizeof(htab_ptes))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + + /* Replace rpn with guest pfn, then write out htab, by page */ + for (N = 0; N < htab_pages; N++, htab_mfn++) { + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, htab_mfn | DECOR); + if (mem == NULL){ + ERROR("Cannot map htab_mfn 0x%08lx: %s\n", + htab_mfn, strerror (errno)); + goto out; + } + + memcpy(copy, mem, PAGE_SIZE); + + /* Improved search of page_array[] for htab_rpn - by chunks*/ + ppte = (union pte *)copy; + for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){ + if (ppte->bits.v == 1){ // valid htab entry + sizes = 0; + htab_rpn = ppte->bits.rpn; + for (n = 0; n < n_chunks; n++){ // search by memory chunk + if ((htab_rpn >= p_chunk[n].mfn) && + (htab_rpn < (p_chunk[n].mfn) + p_chunk[n].size)){ + pfn = (htab_rpn - p_chunk[n].mfn) + sizes; + break; + } else { + sizes += p_chunk[n].size; + } + } + + if (n >= n_chunks){ + ERROR("htab_rpn: 0x%016llx not found in page_array[]", + htab_rpn); + goto out; + } + + if (pfn >= max_pfn){ + ERROR("pfn >= max_pfn: 0x%08llx 0x%08lx", pfn, max_pfn); + goto out; + } + +//*** validation + for (k = 0; k < max_pfn; k++){ // linear search + if (htab_rpn == page_array[k]) + break; + } + + if (k != pfn){ + ERROR("k != pfn: 0x%08x 0x%08llx", k, pfn); + ERROR("htab_rpn: 0x%016llx", htab_rpn); + goto out; + } + + if (k >= max_pfn){ + ERROR("htab_rpn: 0x%016llx not found in page_array[] %d", + htab_rpn, i); + goto out; + } +//*** + + ppte->bits.rpn = pfn; + } else { // invalid htab entry + ppte->words.rpn = 0; + } + } + + if (write(io_fd, copy, PAGE_SIZE) != PAGE_SIZE) { + ERROR("Error when writing to state file (7)"); + goto out; + } + + munmap(mem, PAGE_SIZE); + total_sent++; + } + + free(temp); + free(p_chunk); + } + + /* save vcpu context only for vcpu 0; */ + /* linux already suspended other vcpus via smp_suspend() */ + if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { + ERROR("Could not get vcpu context"); + goto out; + } + + if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("Error when writing to state file (1)"); + goto out; + } + + if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + ERROR("Error when writing to state file (1)"); + goto out; + } + + /* Success! */ + rc = 0; + +#if 0 + DPRINTF("Domain ready to be built.\n"); + + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = 0; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + rc = xc_domctl(xc_handle, &domctl); + + if (rc != 0) { + ERROR("Couldn't build the domain"); + goto out; + } +#endif + + out: + + free(page_array); + if (live_shinfo) + munmap(live_shinfo, PAGE_SIZE); + + DPRINTF("Save exit rc=%d\n",rc); + + return !!rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 7669fca80bfc tools/libxc/xen/asm/htab.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xen/asm/htab.h Wed Dec 13 15:39:32 2006 -0500 @@ -0,0 +1,134 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corp. 2005 + * + * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + */ + +#ifndef _ASM_HTAB_H_ +#define _ASM_HTAB_H_ + + +/***** general PowerPC architecture limits ******/ + +/* 256KB, from PowerPC Architecture specification */ +#define HTAB_MIN_LOG_SIZE 18 + +#define LOG_NUM_PTES_IN_PTEG 3 +#define NUM_PTES_IN_PTEG (1 << LOG_NUM_PTES_IN_PTEG) +#define LOG_PTE_SIZE 4 +#define LOG_PTEG_SIZE (LOG_NUM_PTES_IN_PTEG + LOG_PTE_SIZE) +#define LOG_HTAB_HASH (LOG_HTAB_SIZE - LOG_PTEG_SIZE) + +/* real page number shift to create the rpn field of the pte */ +#define RPN_SHIFT 12 + +/* page protection bits in pp1 (name format: MSR:PR=0 | MSR:PR=1) */ +#define PP_RWxx 0x0UL +#define PP_RWRW 0x2UL +#define PP_RWRx 0x4UL +#define PP_RxRx 0x6UL + +/***** 64-bit PowerPC architecture limits ******/ + +#define SDR1_HTABORG_MASK 0xfffffffffff80000ULL +#define SDR1_HTABSIZE_MASK 0x1fUL +#define SDR1_HTABSIZE_MAX 46 +#define SDR1_HTABSIZE_BASEBITS 11 + +/* used to turn a vsid into a number usable in the hash function */ +#define VSID_HASH_MASK 0x0000007fffffffffUL + +/* used to turn a vaddr into an api for a pte */ +#define VADDR_TO_API(vaddr) (((vaddr) & API_MASK) >> API_SHIFT) +#define API_VEC 0x1fUL +#define API_SHIFT 23 +#define API_MASK (API_VEC << API_SHIFT) + +/***** hypervisor internals ******/ + +/* 64M: reasonable hypervisor limit? */ +#define HTAB_MAX_LOG_SIZE 26 + +#define GET_HTAB(domain) ((domain)->arch.htab.sdr1 & SDR1_HTABORG_MASK) + +union pte { + struct pte_words { + uint64_t vsid; + uint64_t rpn; + } words; + struct pte_bits { + /* *INDENT-OFF* */ + /* high word */ + uint64_t avpn: 57; /* [0-56] abbreviated virtual page number */ + uint64_t lock: 1; /* [57] hypervisor lock bit */ + uint64_t res: 1; /* [58] reserved for hypervisor */ + uint64_t bolted: 1; /* [59] XXX software-reserved; temp hack */ + uint64_t sw: 1; /* [60] reserved for software */ + uint64_t l: 1; /* [61] Large Page */ + uint64_t h: 1; /* [62] hash function id */ + uint64_t v: 1; /* [63] valid */ + + /* low word */ + uint64_t pp0: 1; /* [0] page protection bit 0 (current PowerPC + * specification says it can always be 0) */ + uint64_t ts: 1; /* [1] tag select */ + uint64_t rpn: 50; /* [2-51] real page number */ + uint64_t res2: 2; /* [52,53] reserved */ + uint64_t ac: 1; /* [54] address compare */ + uint64_t r: 1; /* [55] referenced */ + uint64_t c: 1; /* [56] changed */ + uint64_t w: 1; /* [57] write through */ + uint64_t i: 1; /* [58] cache inhibited */ + uint64_t m: 1; /* [59] memory coherent */ + uint64_t g: 1; /* [60] guarded */ + uint64_t n: 1; /* [61] no-execute */ + uint64_t pp1: 2; /* [62,63] page protection bits 1:2 */ + /* *INDENT-ON* */ + } bits; +}; + +union ptel { + uint64_t word; + struct ptel_bits { + /* *INDENT-OFF* */ + + uint64_t pp0: 1; /* page protection bit 0 (current PPC + * AS says it can always be 0) */ + uint64_t ts: 1; /* tag select */ + uint64_t rpn: 50; /* real page number */ + uint64_t res2: 2; /* reserved */ + uint64_t ac: 1; /* address compare */ + uint64_t r: 1; /* referenced */ + uint64_t c: 1; /* changed */ + uint64_t w: 1; /* write through */ + uint64_t i: 1; /* cache inhibited */ + uint64_t m: 1; /* memory coherent */ + uint64_t g: 1; /* guarded */ + uint64_t n: 1; /* no-execute */ + uint64_t pp1: 2; /* page protection bits 1:2 */ + /* *INDENT-ON* */ + } bits; +}; + +struct domain_htab { + uint64_t sdr1; + uint log_num_ptes; /* log number of PTEs in HTAB. */ + uint order; /* order for freeing. */ + union pte *map; /* access the htab like an array */ + uint64_t *shadow; /* idx -> logical translation array */ +}; +#endif Linux diffs: diff -r c8d1f32fd7de arch/powerpc/platforms/xen/hcall.c --- a/arch/powerpc/platforms/xen/hcall.c Wed Nov 22 14:51:54 2006 -0500 +++ b/arch/powerpc/platforms/xen/hcall.c Wed Dec 13 15:54:20 2006 -0500 @@ -256,6 +256,7 @@ static int xenppc_privcmd_domctl(privcmd case XEN_DOMCTL_pausedomain: case XEN_DOMCTL_unpausedomain: case XEN_DOMCTL_getdomaininfo: + case XEN_DOMCTL_getshadowlist: break; case XEN_DOMCTL_getmemlist: ret = xencomm_create( diff -r c8d1f32fd7de arch/powerpc/platforms/xen/reboot.c --- a/arch/powerpc/platforms/xen/reboot.c Wed Nov 22 14:51:54 2006 -0500 +++ b/arch/powerpc/platforms/xen/reboot.c Wed Dec 13 15:54:20 2006 -0500 @@ -1,10 +1,20 @@ #include <linux/module.h> +#include <linux/kernel.h> #include <xen/interface/xen.h> #include <xen/interface/io/console.h> #include <xen/xencons.h> +#include <xen/cpu_hotplug.h> +#include <xen/xenbus.h> +#include <xen/gnttab.h> +#include <xen/evtchn.h> #include <asm/hypervisor.h> #include <asm/machdep.h> +#include <asm/mmu_context.h> +#define SHUTDOWN_INVALID -1 + +extern int shutting_down ; + static void domain_machine_restart(char * __unused) { /* We really want to get pending console data out before we die. */ @@ -31,3 +41,86 @@ void xen_reboot_init(struct machdep_call ppc_md.halt = domain_machine_power_off; } } + +static void switch_idle_mm(void) +{ + struct mm_struct *mm = current->active_mm; + + if (mm == &init_mm) + return; + + atomic_inc(&init_mm.mm_count); + switch_mm(mm, &init_mm, current); + current->active_mm = &init_mm; + mmdrop(mm); +} + +int ppc_do_suspend(void *ignore) +{ + int err; + enum system_states temp_state; + + BUG_ON(smp_processor_id() != 0); + BUG_ON(in_interrupt()); + +#ifndef CONFIG_PPC_XEN + if (xen_feature(XENFEAT_auto_translated_physmap)) { + printk(KERN_WARNING "Cannot suspend in " + "auto_translated_physmap mode.\n"); + return -EOPNOTSUPP; + } +#endif + err = smp_suspend(); + if (err) + return err; + + xenbus_suspend(); + + preempt_disable(); + + local_irq_disable(); + + temp_state = system_state; + system_state = SYSTEM_SUSPEND_DISK; + + preempt_enable(); + + gnttab_suspend(); + + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; + + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn); + + /* + * We'll stop somewhere inside this hypercall. When it returns, + * we'll start resuming after the restore. + */ + HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + shutting_down = SHUTDOWN_INVALID; + + HYPERVISOR_shared_info = (shared_info_t *)__va(xen_start_info->shared_info); + memset(empty_zero_page, 0, PAGE_SIZE); + + gnttab_resume(); + + irq_resume(); + + switch_idle_mm(); + + system_state = temp_state; + + per_cpu(last_jiffy, smp_processor_id()) = get_tbl(); /* update time base */ + + local_irq_enable(); + + xencons_resume(); + + xenbus_resume(); + + smp_resume(); + + return err; +} + diff -r c8d1f32fd7de drivers/xen/core/reboot.c --- a/drivers/xen/core/reboot.c Wed Nov 22 14:51:54 2006 -0500 +++ b/drivers/xen/core/reboot.c Wed Dec 13 15:54:20 2006 -0500 @@ -7,16 +7,16 @@ #include <linux/reboot.h> #include <linux/sysrq.h> #include <linux/stringify.h> -#include <asm/irq.h> -#include <asm/mmu_context.h> -#include <xen/evtchn.h> -#include <asm/hypervisor.h> -#include <xen/xenbus.h> #include <linux/cpu.h> #include <linux/kthread.h> +#include <xen/evtchn.h> +#include <xen/xenbus.h> #include <xen/gnttab.h> #include <xen/xencons.h> #include <xen/cpu_hotplug.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/hypervisor.h> #if defined(__i386__) || defined(__x86_64__) /* @@ -79,7 +79,7 @@ EXPORT_SYMBOL(machine_power_off); */ /* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; +int shutting_down = SHUTDOWN_INVALID; static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); @@ -192,13 +192,14 @@ static int __do_suspend(void *ignore) return err; } -#else /* CONFIG_PPC_XEN */ + +#else +int ppc_do_suspend(void *ignore); static int __do_suspend(void *ignore) { - printk("SUSPEND!!??\n"); - return 0; -} -#endif /* CONFIG_PPC_XEN */ + return ppc_do_suspend(ignore); +} +#endif static int shutdown_process(void *__unused) { diff -r c8d1f32fd7de include/asm-powerpc/xen/asm/hypercall.h --- a/include/asm-powerpc/xen/asm/hypercall.h Wed Nov 22 14:51:54 2006 -0500 +++ b/include/asm-powerpc/xen/asm/hypercall.h Wed Dec 13 15:54:20 2006 -0500 @@ -60,6 +60,16 @@ static inline int HYPERVISOR_shutdown(un return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); } + +static inline int HYPERVISOR_suspend(unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); +} + static inline int HYPERVISOR_set_timer_op(unsigned long arg) { return plpar_hcall_norets(XEN_MARK(__HYPERVISOR_set_timer_op), arg); diff -r c8d1f32fd7de include/xen/interface/arch-powerpc.h --- a/include/xen/interface/arch-powerpc.h Wed Nov 22 14:51:54 2006 -0500 +++ b/include/xen/interface/arch-powerpc.h Wed Dec 13 15:54:20 2006 -0500 @@ -29,7 +29,6 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) \ do { \ if (sizeof ((hnd).__pad)) \ @@ -42,9 +41,6 @@ #endif #ifndef __ASSEMBLY__ - -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); @@ -98,9 +94,65 @@ typedef struct cpu_user_regs cpu_user_re typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ +#define NUM_SLB_ENTRIES 64 +struct slb_entry { + uint64_t slb_vsid; + uint64_t slb_esid; +}; +typedef struct slb_entry slb_entry_t; + +#ifndef HAS_VMX +#define HAS_VMX 1 +#endif + +#ifndef HAS_FLOAT +#define HAS_FLOAT 1 +#endif + +#ifdef HAS_VMX +typedef struct { + uint32_t u[4]; +} __attribute__((aligned(16))) _vector128; +#endif /* HAS_VMX */ + + /* ONLY used to communicate with dom0! See also struct exec_domain. */ struct vcpu_guest_context { cpu_user_regs_t user_regs; /* User-level CPU registers */ + slb_entry_t slb_entries[NUM_SLB_ENTRIES]; /* Segment Lookaside Buffer */ + + /* Special-Purpose Registers */ + uint64_t sprg[4]; + uint64_t timebase; + uint64_t dar; + uint64_t dsisr; + + struct cpu_vcpu_tag { + uint64_t hid4; + } cpu; /* CPU-specific bits */ + + uint32_t dec; + + /* XXX etc */ +#ifdef HAS_FLOAT +#define NUM_FPRS 32 + double fprs[NUM_FPRS]; +#endif +#ifdef HAS_VMX + _vector128 vrs[32]; + _vector128 vscr; + uint32_t vrsave; +#endif + +#if 0 + struct xencomm *xencomm; + + /* I/O-port access bitmap. */ + u8 *iobmp; /* Guest kernel virtual address of the bitmap. */ + int iobmp_limit; /* Number of ports represented in the bitmap. */ + int iopl; /* Current IOPL for this VCPU. */ +#endif + uint64_t sdr1; /* Pagetable base */ /* XXX etc */ }; diff -r c8d1f32fd7de include/xen/interface/domctl.h --- a/include/xen/interface/domctl.h Wed Nov 22 14:51:54 2006 -0500 +++ b/include/xen/interface/domctl.h Wed Dec 13 15:54:20 2006 -0500 @@ -354,6 +354,17 @@ struct xen_domctl_real_mode_area { }; typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + +#define XEN_DOMCTL_getshadowlist 29 +struct xen_domctl_getshadowlist { + /* OUT variables. */ + /* Start of htab array */ + uint64_t htab_map; + /* Numver of ptes within htab */ + uint htab_num_ptes; +}; +typedef struct xen_domctl_getshadowlist xen_domctl_getshadowlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t); struct xen_domctl { uint32_t cmd; @@ -381,6 +392,7 @@ struct xen_domctl { struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_getshadowlist getshadowlist; uint8_t pad[128]; } u; }; _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |