[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC/patch] domain builder rewrite
Hi, I've started cleaning up the domU domain builder, which basically ended up in being a nearly complete rewrite of the code. The patch below is the very first cut of the new code. I've tested it on x86_32 (both with and without shadow_translated) and with linux kernels only. In theory PAE mode and x86_64 should work too, I havn't even compiled on x86_64 though. Design goals: - General cleanup of the code, move lots of state information which currently is held in local variables into a struct. - separate out arch-specific bits into functions and source files (as far as possible), so there are not tons of #ifdef's all over the place ;) - Don't have xen hypercalls all over the place, so most of the code runs just fine without xen. Why I'm doing this: - The current code is a mess ... - I want to reuse the domain builder code for other purposes than directly booting xen domains. domU kexec is the first item on my todo list. Directly writing a suspend image, then boot the virtual machines via "xm restore" should be easy too. It isn't very useful on a single host, but booting xen virtual machines on _another_ machine that way (using the migration protocol) probably is. - I'm sure other people have fancy idea's too ;) Current TODO list: - discuss design, fixup if needed ;) - more testing. - re-add bsd symtab loading. - re-add other architectures. - re-add loader bits (plan9, ...) cheers, Gerd diff -r 4142bfd01e02 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/Makefile Thu Jun 1 15:23:55 2006 @@ -26,7 +26,7 @@ CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c GUEST_SRCS-y := -GUEST_SRCS-y += xc_linux_build.c +#GUEST_SRCS-y += xc_linux_build.c GUEST_SRCS-y += xc_load_bin.c GUEST_SRCS-y += xc_load_elf.c GUEST_SRCS-y += xg_private.c @@ -35,7 +35,12 @@ GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c -CFLAGS += -Werror +# new domain builder +GUEST_SRCS-y += xc_dom_compat.c xc_dom_core.c xc_dom_elf.c xc_dom_boot.c +GUEST_SRCS-$(CONFIG_X86) += xc_dom_x86.c +#GUEST_SRCS-$(CONFIG_IA64) += xc_dom_ia86.c + +CFLAGS += -g -Werror -Wmissing-prototypes CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. diff -r 4142bfd01e02 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_linux_build.c Thu Jun 1 15:23:55 2006 @@ -133,10 +133,10 @@ return 0; } -int load_initrd(int xc_handle, domid_t dom, - struct initrd_info *initrd, - unsigned long physbase, - unsigned long *phys_to_mach) +static int load_initrd(int xc_handle, domid_t dom, + struct initrd_info *initrd, + unsigned long physbase, + unsigned long *phys_to_mach) { char page[PAGE_SIZE]; unsigned long pfn_start, pfn, nr_pages; diff -r 4142bfd01e02 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_linux_restore.c Thu Jun 1 15:23:55 2006 @@ -57,7 +57,7 @@ ** This function inverts that operation, replacing the pfn values with ** the (now known) appropriate mfn values. */ -int uncanonicalize_pagetable(unsigned long type, void *page) +static int uncanonicalize_pagetable(unsigned long type, void *page) { int i, pte_last; unsigned long pfn; diff -r 4142bfd01e02 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_linux_save.c Thu Jun 1 15:23:55 2006 @@ -415,7 +415,7 @@ ** which entries do not require canonicalization (in particular, those ** entries which map the virtual address reserved for the hypervisor). */ -void canonicalize_pagetable(unsigned long type, unsigned long pfn, +static void canonicalize_pagetable(unsigned long type, unsigned long pfn, const void *spage, void *dpage) { diff -r 4142bfd01e02 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_private.c Thu Jun 1 15:23:55 2006 @@ -5,6 +5,7 @@ */ #include "xc_private.h" +#include "xg_private.h" /* NB: arr must be mlock'ed */ int xc_get_pfn_type_batch(int xc_handle, @@ -18,10 +19,11 @@ return do_dom0_op(xc_handle, &op); } +#if 0 /* dead code ??? */ #define GETPFN_ERR (~0U) -unsigned int get_pfn_type(int xc_handle, - unsigned long mfn, - uint32_t dom) +static unsigned int get_pfn_type(int xc_handle, + unsigned long mfn, + uint32_t dom) { DECLARE_DOM0_OP; op.cmd = DOM0_GETPAGEFRAMEINFO; @@ -34,6 +36,7 @@ } return op.u.getpageframeinfo.type; } +#endif int xc_mmuext_op( int xc_handle, diff -r 4142bfd01e02 tools/libxc/xc_dom.h --- /dev/null Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_dom.h Thu Jun 1 15:23:55 2006 @@ -0,0 +1,179 @@ +/* --- typedefs and structs ---------------------------------------- */ + +typedef unsigned long xen_vaddr_t; +typedef unsigned long xen_paddr_t; +typedef unsigned long xen_pfn_t; + +struct xc_dom_seg { + xen_vaddr_t vstart; + xen_vaddr_t vend; + xen_pfn_t pfn; +}; + +struct xc_dom_mem { + struct xc_dom_mem *next; + unsigned char memory[0]; +}; + +struct xc_dom_image { + /* files */ + void *kernel_blob; + size_t kernel_size; + void *ramdisk_blob; + size_t ramdisk_size; + + /* arguments and parameters */ + char *cmdline; + uint32_t f_requested[XENFEAT_NR_SUBMAPS]; + + /* info from (elf) kernel image */ + char *guestinfo; + char *guest_os; + char *guest_ver; + char *xen_ver; + char *loader; + int pae; + xen_vaddr_t virt_base; + xen_vaddr_t virt_entry; + xen_vaddr_t elf_paddr_offset; + xen_pfn_t hypercall_page; + uint32_t f_supported[XENFEAT_NR_SUBMAPS]; + uint32_t f_required[XENFEAT_NR_SUBMAPS]; + + /* memory layout */ + struct xc_dom_seg kernel_seg; + struct xc_dom_seg ramdisk_seg; + struct xc_dom_seg p2m_seg; + struct xc_dom_seg pgtables_seg; + xen_pfn_t start_info_pfn; + xen_pfn_t console_pfn; + xen_pfn_t xenstore_pfn; + xen_pfn_t shared_info_pfn; + xen_pfn_t bootstack_pfn; + xen_vaddr_t virt_alloc_end; + + /* initial page tables */ + unsigned int pgtables; + unsigned int pg_l4; + unsigned int pg_l3; + unsigned int pg_l2; + unsigned int pg_l1; + xen_vaddr_t virt_pgtab_end; + + /* other state info */ + uint32_t f_active[XENFEAT_NR_SUBMAPS]; + + /* physical memory */ + xen_pfn_t total_pages; + xen_pfn_t mapped_pages; + void *pages; + +#if 1 + /* xen state info + * FIXME: place this here or make this private to xc_dom_boot.c ??? */ + unsigned long flags; /* what is this ??? */ + unsigned int console_evtchn; + unsigned int xenstore_evtchn; + xen_pfn_t shared_info_mfn; +#endif + + /* malloc memory pool */ + struct xc_dom_mem *memblocks; +}; + +/* --- pluggable kernel loader ------------------------------------- */ + +struct xc_dom_loader { + char *name; + int (*probe)(struct xc_dom_image *dom); + int (*parser)(struct xc_dom_image *dom); + int (*loader)(struct xc_dom_image *dom); + struct xc_dom_loader *next; +}; + +#define __init __attribute__ ((constructor)) +void xc_dom_register_loader(struct xc_dom_loader *loader); + +/* --- main functions ---------------------------------------------- */ + +struct xc_dom_image* xc_dom_allocate(const char *cmdline, + const char *features, + xen_pfn_t nr_pages); +void xc_dom_release(struct xc_dom_image *dom); + +int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename); +int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename); +int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t memsize); +int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem, size_t memsize); + +int xc_dom_parse_image(struct xc_dom_image *dom); +int xc_dom_build_image(struct xc_dom_image *dom); +int xc_dom_boot_image(struct xc_dom_image *dom, int xc_handle, uint32_t domid); + +/* --- debugging bits ---------------------------------------------- */ + +extern FILE *xc_dom_logfile; +void xc_dom_loginit(void); + +#define xc_dom_printf(fmt, args...) \ + if (xc_dom_logfile) fprintf(xc_dom_logfile, fmt, ## args) + +/* --- simple memory pool ------------------------------------------ */ + +void* xc_dom_malloc(struct xc_dom_image *dom, size_t size); +char* xc_dom_strdup(struct xc_dom_image *dom, const char *str); +int xc_dom_readfile(struct xc_dom_image *dom, const char *filename, + void **blob, size_t *size); +int xc_dom_copyblk(struct xc_dom_image *dom, const void *mem, size_t memsize, + void **blob, size_t *size); + +/* --- xen feature bits -------------------------------------------- */ + +extern const char *xc_dom_feature_names[]; +extern const int xc_dom_features; + +int xc_dom_features_parse(const char *features, uint32_t *opt, uint32_t *req); + +static inline void xc_dom_feature_set(int nr, uint32_t *addr) +{ + addr[nr >> 5] |= 1 << (nr & 31); +} +static inline int xc_dom_feature_get(int nr, uint32_t *addr) +{ + return !! (addr[nr >> 5] & (1 << (nr & 31))); +} + +/* --- arch specific ----------------------------------------------- */ + +int xc_dom_arch_count_pgtables(struct xc_dom_image *dom); +int xc_dom_arch_setup_pgtables(struct xc_dom_image *dom); +int xc_dom_arch_start_info(struct xc_dom_image *dom, int xen_version); +int xc_dom_arch_shared_info(shared_info_t *shared_info); +int xc_dom_arch_vcpu(struct xc_dom_image *dom, vcpu_guest_context_t *ctxt); + +/* --- misc bits --------------------------------------------------- */ + +static inline int xc_dom_feature_translated(struct xc_dom_image *dom) +{ + return xc_dom_feature_get(XENFEAT_auto_translated_physmap, dom->f_active); +} + +static inline void* xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn) +{ + if (pfn > dom->mapped_pages) + return NULL; + return dom->pages + pfn * PAGE_SIZE; +} + +static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t pfn) +{ + xen_pfn_t *p2m = xc_dom_pfn_to_ptr(dom, dom->p2m_seg.pfn); + return p2m[pfn]; +} + +static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom, xen_pfn_t pfn) +{ + if (xc_dom_feature_translated(dom)) + return pfn; + return xc_dom_p2m_host(dom,pfn); +} diff -r 4142bfd01e02 tools/libxc/xc_dom_boot.c --- /dev/null Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_dom_boot.c Thu Jun 1 15:23:55 2006 @@ -0,0 +1,288 @@ +/* + * Xen domain builder -- xen booter. + * + * This is the code which actually boots a fresh + * prepared domain image as xen guest domain. + * + * ==> this is the only domain bilder code piece + * where xen hypercalls are allowed <== + * + * This code is licenced under the GPL. + * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <zlib.h> + +#include "xg_private.h" +#include "xc_dom.h" + +/* ------------------------------------------------------------------------ */ + +static int setup_hypercall_page(struct xc_dom_image *dom, + int xc, uint32_t domid) +{ + dom0_op_t op; + int rc; + + if (!dom->hypercall_page) + return 0; + + xc_dom_printf("%s\n", __FUNCTION__); + op.cmd = DOM0_HYPERCALL_INIT; + op.u.hypercall_init.domain = (domid_t)domid; + op.u.hypercall_init.mfn = xc_dom_p2m_host(dom, dom->hypercall_page); + rc = xc_dom0_op(xc, &op); + if (0 != rc) + xc_dom_printf("%s: HYPERCALL_INIT failed (rc=%d)\n", __FUNCTION__, rc); + return rc; +} + +static int launch_vm(int xc, uint32_t domid, vcpu_guest_context_t *ctxt) +{ + dom0_op_t launch_op; + int rc; + + xc_dom_printf("%s\n", __FUNCTION__); + memset(&launch_op, 0, sizeof(launch_op)); + launch_op.cmd = DOM0_SETVCPUCONTEXT; + launch_op.u.setvcpucontext.domain = (domid_t)domid; + launch_op.u.setvcpucontext.vcpu = 0; + set_xen_guest_handle(launch_op.u.setvcpucontext.ctxt, ctxt); + rc = xc_dom0_op(xc, &launch_op); + if (0 != rc) + xc_dom_printf("%s: SETVCPUCONTEXT failed (rc=%d)\n", __FUNCTION__, rc); + return rc; +} + +static int setup_p2m(struct xc_dom_image *dom, + int xc, uint32_t domid) +{ + xen_pfn_t *p2m = xc_dom_pfn_to_ptr(dom, dom->p2m_seg.pfn); + xen_pfn_t i, pages; + xc_mmu_t *mmu; + uint64_t entry; + int rc; + + xc_dom_printf("%s\n", __FUNCTION__); + pages = xc_get_pfn_list(xc, domid, p2m, dom->total_pages); + if (pages != dom->total_pages) { + xc_dom_printf("%s: xc_get_pfn_list failed (%ld/%ld)\n", __FUNCTION__, + pages, dom->total_pages); + return -1; + } + + mmu = xc_init_mmu_updates(xc, domid); + if (NULL == mmu) { + xc_dom_printf("%s: xc_init_mmu_updates failed\n", __FUNCTION__); + return -1; + } + + for (i = 0; i < pages; i++) { + entry = ((uint64_t)p2m[i] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + rc = xc_add_mmu_update(xc, mmu, entry, i); + if (0 != rc) { + xc_dom_printf("%s: xc_add_mmu_update failed (rc=%d)\n", __FUNCTION__, rc); + return rc; + } + } + rc = xc_finish_mmu_updates(xc, mmu); + if (0 != rc) + xc_dom_printf("%s: xc_finish_mmu_updates failed (rc=%d)\n", __FUNCTION__, rc); + return rc; +} + +static int copy_page(struct xc_dom_image *dom, + int xc, uint32_t domid, xen_pfn_t pfn) +{ + xen_pfn_t dst; + void *src; + int rc; + + if (0 == pfn) + return 0; + + src = xc_dom_pfn_to_ptr(dom, pfn); + dst = xc_dom_p2m_host(dom, pfn); + xc_dom_printf("%s: pfn 0x%lx, mfn 0x%lx\n", __FUNCTION__, pfn, dst); + rc = xc_copy_to_domain_page(xc, domid, dst, src); + if (0 != rc) + xc_dom_printf("%s: xc_copy_to_domain_page failed (pfn 0x%lx, rc=%d)\n", + __FUNCTION__, pfn, rc); + return rc; +} + +static int copy_segment(struct xc_dom_image *dom, + int xc, uint32_t domid, + struct xc_dom_seg *seg) +{ + xen_pfn_t pages = (seg->vend - seg->vstart) / PAGE_SIZE; + xen_pfn_t i, dst; + void *src; + int rc; + + if (0 == pages) + return 0; + + xc_dom_printf("%s: pfn 0x%lx, %ld pages\n", __FUNCTION__, seg->pfn, pages); + for (i = 0; i < pages; i++) { + src = xc_dom_pfn_to_ptr(dom, seg->pfn + i); + dst = xc_dom_p2m_host(dom, seg->pfn + i); + rc = xc_copy_to_domain_page(xc, domid, dst, src); + if (0 != rc) { + xc_dom_printf("%s: xc_copy_to_domain_page failed (pfn 0x%lx, rc=%d)\n", + __FUNCTION__, seg->pfn + i, rc); + return rc; + } + } + return 0; +} + +/* ------------------------------------------------------------------------ */ + +#if defined(__i386__) || defined(__x86_64__) + +static int x86_misc_mm_bits(struct xc_dom_image *dom, int xc, uint32_t domid) +{ +#if defined(__i386__) + unsigned long pgd_type = dom->pae ? MMUEXT_PIN_L3_TABLE : MMUEXT_PIN_L2_TABLE; +#endif +#if defined(__x86_64__) + unsigned long pgd_type = MMUEXT_PIN_L4_TABLE; +#endif + shared_info_t *shared_info; + int rc; + + if (!xc_dom_feature_translated(dom)) { + rc = pin_table(xc, pgd_type, xc_dom_p2m_host(dom, dom->pgtables_seg.pfn), domid); + if (0 != rc) { + xc_dom_printf("%s: pin_table failed (pfn 0x%lx, rc=%d)\n", + __FUNCTION__, dom->pgtables_seg.pfn, rc); + return rc; + } + } else { + struct xen_add_to_physmap xatp; + int i; + + /* enable shadow-translated mode */ + rc = xc_shadow_control(xc, domid, + DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE, + NULL, 0, NULL); + if (0 != rc) { + xc_dom_printf("%s: SHADOW_CONTROL_OP_ENABLE_TRANSLATE failed (rc=%d)\n", + __FUNCTION__, rc); + return rc; + } + xc_dom_printf("%s: translated shadow mode enabled\n", __FUNCTION__); + + /* Map shared info frame into guest physmap. */ + xatp.domid = domid; + xatp.space = XENMAPSPACE_shared_info; + xatp.idx = 0; + xatp.gpfn = dom->shared_info_pfn; + rc = xc_memory_op(xc, XENMEM_add_to_physmap, &xatp); + if ( rc != 0 ) { + xc_dom_printf("%s: mapping shared_info failed (pfn=%lx, rc=%d)\n", + __FUNCTION__, xatp.gpfn, rc); + return rc; + } + + /* Map grant table frames into guest physmap. */ + for (i = 0; ; i++) { + xatp.domid = domid; + xatp.space = XENMAPSPACE_grant_table; + xatp.idx = i; + xatp.gpfn = dom->total_pages + i; + rc = xc_memory_op(xc, XENMEM_add_to_physmap, &xatp); + if (rc != 0) { + if (i > 0 && errno == EINVAL) { + xc_dom_printf("%s: %d grant tables mapped\n", __FUNCTION__, i); + break; + } + xc_dom_printf("%s: mapping grant tables failed (pfn=%lx, rc=%d)\n", + __FUNCTION__, xatp.gpfn, rc); + return rc; + } + } + } + + /* setup shared_info page */ + xc_dom_printf("%s: shared_info: pfn 0x%lx, mfn 0x%lx\n", __FUNCTION__, + dom->shared_info_pfn, dom->shared_info_mfn); + shared_info = xc_map_foreign_range(xc, domid, PAGE_SIZE, PROT_READ|PROT_WRITE, + dom->shared_info_mfn); + xc_dom_arch_shared_info(shared_info); + munmap(shared_info, PAGE_SIZE); + + return 0; +} + +#endif /* x86 */ + +/* ------------------------------------------------------------------------ */ + +int xc_dom_boot_image(struct xc_dom_image *dom, int xc, uint32_t domid) +{ + vcpu_guest_context_t ctxt; + dom0_op_t op; + int rc, xen_version; + + xc_dom_printf("%s\n", __FUNCTION__); + + /* collect some info */ + xen_version = xc_version(xc, XENVER_version, NULL); + + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + rc = xc_dom0_op(xc, &op); + if (0 != rc) { + xc_dom_printf("%s: GETDOMAININFO failed (rc=%d)\n", __FUNCTION__, rc); + return rc; + } + if (op.u.getdomaininfo.domain != domid) { + xc_dom_printf("%s: Huh? domid mismatch (%d != %d)\n", __FUNCTION__, + op.u.getdomaininfo.domain, domid); + return -1; + } + dom->shared_info_mfn = op.u.getdomaininfo.shared_info_frame; + + /* initial mm setup */ + setup_p2m(dom, xc, domid); + if (0 != (rc = xc_dom_arch_setup_pgtables(dom))) + return rc; + + /* copy stuff */ + if (0 != (rc = copy_segment(dom, xc, domid, &dom->kernel_seg))) + return rc; + if (0 != (rc = copy_segment(dom, xc, domid, &dom->ramdisk_seg))) + return rc; + if (0 != (rc = copy_segment(dom, xc, domid, &dom->p2m_seg))) + return rc; + if (0 != (rc = copy_segment(dom, xc, domid, &dom->pgtables_seg))) + return rc; + if (0 != (rc = copy_page(dom, xc, domid, dom->console_pfn))) + return rc; + if (0 != (rc = copy_page(dom, xc, domid, dom->xenstore_pfn))) + return rc; + + /* start info page */ + xc_dom_arch_start_info(dom, xen_version); + if (0 != (rc = copy_page(dom, xc, domid, dom->start_info_pfn))) + return rc; + + /* misc x86 mm stuff */ + if (0 != (rc = x86_misc_mm_bits(dom, xc, domid))) + return rc; + if (0 != (rc = setup_hypercall_page(dom, xc, domid))) + return rc; + + /* let the vm run */ + if (0 != (rc = xc_dom_arch_vcpu(dom, &ctxt))) + return rc; + rc = launch_vm(xc, domid, &ctxt); + + return rc; +} diff -r 4142bfd01e02 tools/libxc/xc_dom_compat.c --- /dev/null Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_dom_compat.c Thu Jun 1 15:23:55 2006 @@ -0,0 +1,125 @@ +/* + * Xen domain builder -- compatibility code. + * + * Replacements for xc_linux_build & friends, + * as example code and to make the new builder + * usable as drop-in replacement. + * + * This code is licenced under the GPL. + * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <zlib.h> + +#include "xenctrl.h" +#include "xg_private.h" +#include "xc_dom.h" + +/* ------------------------------------------------------------------------ */ + +static int xc_linux_build_internal(struct xc_dom_image *dom, + int xc_handle, uint32_t domid, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + int rc; + + if (0 != (rc = xc_dom_parse_image(dom))) + goto out; + if (0 != (rc = xc_dom_build_image(dom))) + goto out; + + dom->flags = flags; + dom->console_evtchn = console_evtchn; + dom->xenstore_evtchn = store_evtchn; + rc = xc_dom_boot_image(dom, xc_handle, domid); + if (0 != rc) + goto out; + + *console_mfn = xc_dom_p2m_host(dom, dom->console_pfn); + *store_mfn = xc_dom_p2m_host(dom, dom->xenstore_pfn); + + out: + return rc; +} + +int xc_linux_build_mem(int xc_handle, + uint32_t domid, + const char *image_buffer, + unsigned long image_size, + const char *initrd, + unsigned long initrd_len, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + struct xc_dom_image *dom; + unsigned long nr_pages; + int rc; + + nr_pages = xc_get_tot_pages(xc_handle, domid); + + xc_dom_loginit(); + dom = xc_dom_allocate(cmdline, features, nr_pages); + if (0 != (rc = xc_dom_kernel_mem(dom, image_buffer, image_size))) + goto out; + if (initrd) + if (0 != (rc = xc_dom_ramdisk_mem(dom, initrd, initrd_len))) + goto out; + + rc = xc_linux_build_internal(dom, xc_handle, domid, + flags, + store_evtchn, store_mfn, + console_evtchn, console_mfn); + + out: + xc_dom_release(dom); + return rc; +} + +int xc_linux_build(int xc_handle, + uint32_t domid, + const char *image_name, + const char *initrd_name, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + struct xc_dom_image *dom; + unsigned long nr_pages; + int rc; + + nr_pages = xc_get_tot_pages(xc_handle, domid); + + xc_dom_loginit(); + dom = xc_dom_allocate(cmdline, features, nr_pages); + if (0 != (rc = xc_dom_kernel_file(dom, image_name))) + goto out; + if (initrd_name) + if (0 != (rc = xc_dom_ramdisk_file(dom, initrd_name))) + goto out; + + rc = xc_linux_build_internal(dom, xc_handle, domid, + flags, + store_evtchn, store_mfn, + console_evtchn, console_mfn); + + out: + xc_dom_release(dom); + return rc; +} diff -r 4142bfd01e02 tools/libxc/xc_dom_core.c --- /dev/null Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_dom_core.c Thu Jun 1 15:23:55 2006 @@ -0,0 +1,503 @@ +/* + * Xen domain builder -- core bits. + * + * The core code goes here: + * - allocate and release domain structs. + * - memory management functions. + * - misc helper functions. + * + * This code is licenced under the GPL. + * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <zlib.h> + +#include "xg_private.h" +#include "xc_dom.h" + +/* ------------------------------------------------------------------------ */ +/* debugging */ + +FILE *xc_dom_logfile = NULL; + +void xc_dom_loginit(void) +{ + if (xc_dom_logfile) + return; + xc_dom_logfile = fopen("/var/log/xen-kraxel.log", "a"); + setvbuf(xc_dom_logfile, NULL, _IONBF, 0); + xc_dom_printf("### ----- xc domain builder logfile opened -----\n"); +} + +/* ------------------------------------------------------------------------ */ +/* simple memory pool */ + +void* xc_dom_malloc(struct xc_dom_image *dom, size_t size) +{ + struct xc_dom_mem *block; + + block = malloc(sizeof(*block) + size); + if (NULL == block) + return NULL; + memset(block,0,sizeof(*block) + size); + block->next = dom->memblocks; + dom->memblocks = block; + return block->memory; +} + +static void xc_dom_free_all(struct xc_dom_image *dom) +{ + struct xc_dom_mem *block; + + while (NULL != (block = dom->memblocks)) { + dom->memblocks = block->next; + free(block); + } +} + +char* xc_dom_strdup(struct xc_dom_image *dom, const char *str) +{ + size_t len = strlen(str)+1; + char *nstr = xc_dom_malloc(dom, len); + + if (NULL == nstr) + return NULL; + memcpy(nstr, str, len); + return nstr; +} + +int xc_dom_readfile(struct xc_dom_image *dom, const char *filename, + void **blob, size_t *size) +{ + int fd; + gzFile gz; + char magic[2]; + unsigned char gzlen[4]; + size_t len; + + fd = open(filename, O_RDONLY); + if (-1 == fd) + return -1; + if (2 != read(fd, magic, 2)) + goto err; + + if (0 == strncmp(magic, "\037\213", 2)) { + /* gzipped file */ + lseek(fd, -4, SEEK_END); + if (4 != read(fd, gzlen, 4)) + goto err; + *size = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0]; + + lseek(fd, 0, SEEK_SET); + gz = gzdopen(fd, "rb"); + *blob = xc_dom_malloc(dom, *size); + if (NULL == *blob) + goto err; + len = gzread(gz, *blob, *size); + gzclose(gz); + xc_dom_printf("%s: %s: gzipped, %zd bytes\n", + __FUNCTION__, filename, *size); + } else { + /* normal file */ + lseek(fd, 0, SEEK_SET); + *size = lseek(fd, 0, SEEK_END); + + lseek(fd, 0, SEEK_SET); + *blob = xc_dom_malloc(dom, *size); + if (NULL == *blob) + goto err; + len = read(fd, *blob, *size); + close(fd); + xc_dom_printf("%s: %s: normal, %zd bytes\n", + __FUNCTION__, filename, *size); + } + + if (len != *size) + return -1; + return 0; + +err: + close(fd); + return -1; +} + +int xc_dom_copyblk(struct xc_dom_image *dom, const void *mem, size_t memsize, + void **blob, size_t *size) +{ + z_stream zStream; + char magic[2]; + unsigned char gzlen[4]; + int rc; + + memcpy(magic, mem, sizeof(magic)); + if (0 == strncmp(magic, "\037\213", 2)) { + /* gzipped memblk */ + memcpy(gzlen, mem + memsize - sizeof(gzlen), sizeof(gzlen)); + *size = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0]; + *blob = xc_dom_malloc(dom, (*size) + 16); + if (NULL == *blob) + return -1; + + memset(&zStream, 0, sizeof(zStream)); + zStream.next_in = (void*)mem; + zStream.avail_in = memsize; + zStream.next_out = *blob; + zStream.avail_out = (*size) + 16; + rc = inflateInit2(&zStream, (MAX_WBITS+32)); /* +32 means "handle gzip" */ + if (rc != Z_OK) { + xc_dom_printf("%s: inflateInit2 failed (rc=%d)\n", __FUNCTION__, rc); + return -1; + } + rc = inflate(&zStream, Z_FINISH); + if (rc != Z_STREAM_END) { + xc_dom_printf("%s: inflate failed (rc=%d)\n", __FUNCTION__, rc); + return -1; + } + xc_dom_printf("%s: %zd bytes inflated to %zd bytes\n", + __FUNCTION__, memsize, *size); + } else { + /* normal memblk */ + *size = memsize; + *blob = xc_dom_malloc(dom, *size); + if (NULL == *blob) + return -1; + + memcpy(*blob, mem, memsize); + xc_dom_printf("%s: copied %zd bytes\n", + __FUNCTION__, *size); + } + return 0; +} + +/* ------------------------------------------------------------------------ */ +/* xen feature bits */ + +const char *xc_dom_feature_names[] = { + [XENFEAT_writable_page_tables] = "writable_page_tables", + [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables", + [XENFEAT_auto_translated_physmap] = "auto_translated_physmap", + [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel", + [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" +}; +const int xc_dom_features = sizeof(xc_dom_feature_names)/sizeof(xc_dom_feature_names[0]); + +int xc_dom_features_parse(const char *features, uint32_t *opt, uint32_t *req) +{ + char feature[64]; + int pos, len, i; + + if (NULL == features) + return 0; + for (pos = 0; features[pos] != '\0'; pos += len) { + if (1 != sscanf(features+pos, "%63[^|]%n", feature, &len)) + break; + if (features[pos+len] == '|') + len++; + + for (i = 0; i < xc_dom_features; i++) { + if (!xc_dom_feature_names[i]) + continue; + if (NULL != req && feature[0] == '!') { + /* required */ + if (0 == strcmp(feature+1, xc_dom_feature_names[i])) { + xc_dom_printf("%s: req: %s\n", __FUNCTION__, feature+1); + xc_dom_feature_set(i, opt); + xc_dom_feature_set(i, req); + break; + } + } else { + /* supported */ + if (0 == strcmp(feature, xc_dom_feature_names[i])) { + xc_dom_printf("%s: opt: %s\n", __FUNCTION__, feature); + xc_dom_feature_set(i, opt); + break; + } + } + } + if (i == xc_dom_features) { + xc_dom_printf("%s: unknown feature: %s\n", __FUNCTION__, feature); + return -EINVAL; + } + } + return 0; +} + +/* ------------------------------------------------------------------------ */ +/* domain memory */ + +static int xc_dom_alloc_pages(struct xc_dom_image *dom) +{ + static const int max_pages = 256 * 1024 * 1024 / PAGE_SIZE; // 256 MB + + dom->mapped_pages = dom->total_pages; + if (dom->mapped_pages > max_pages) + dom->mapped_pages = max_pages; + dom->pages = mmap(NULL, PAGE_SIZE * dom->mapped_pages, + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (MAP_FAILED == dom->pages) { + xc_dom_printf("%s: can't map 0x%lx pages: %s\n", + __FUNCTION__, dom->mapped_pages, strerror(errno)); + return -1; + } + xc_dom_printf("%-20s: virtual memory : 0x%lx -> 0x%lx (%ld of %ld pages mapped)\n", + __FUNCTION__, dom->virt_base, + dom->virt_base + dom->mapped_pages * PAGE_SIZE, + dom->mapped_pages, dom->total_pages); + dom->virt_alloc_end = dom->virt_base; + return 0; +} + +void* xc_dom_pfn_to_ptr(struct xc_dom_image *dom, unsigned long pfn) +{ + if (pfn > dom->mapped_pages) + return NULL; + return dom->pages + pfn * PAGE_SIZE; +} + +static int xc_dom_alloc_segment(struct xc_dom_image *dom, + struct xc_dom_seg *seg, + char *name, + unsigned long start, + unsigned long size) +{ + unsigned long pages = (size + PAGE_SIZE-1) / PAGE_SIZE; + + if (0 == start) + start = dom->virt_alloc_end; + + if (start & (PAGE_SIZE-1)) { + xc_dom_printf("%s: segment start isn't page aligned (0x%lx)\n", + __FUNCTION__, start); + return -1; + } + if (start < dom->virt_alloc_end) { + xc_dom_printf("%s: segment start too low (0x%lx < 0x%lx)\n", + __FUNCTION__, start, dom->virt_alloc_end); + return -1; + } + + seg->vstart = start; + seg->vend = start + pages * PAGE_SIZE; + seg->pfn = (seg->vstart - dom->virt_base) / PAGE_SIZE; + dom->virt_alloc_end = seg->vend; + + if (dom->virt_alloc_end > dom->virt_base + dom->mapped_pages * PAGE_SIZE) { + xc_dom_printf("%s: segment too big (0x%lx > 0x%lx)\n", + __FUNCTION__, dom->virt_alloc_end, + dom->virt_base + dom->mapped_pages * PAGE_SIZE); + return -1; + } + + xc_dom_printf("%-20s: %-12s : 0x%lx -> 0x%lx (pfn 0x%lx + %ld pages)\n", + __FUNCTION__, name, seg->vstart, seg->vend, seg->pfn, pages); + return 0; +} + +static int xc_dom_alloc_page(struct xc_dom_image *dom, + char *name) +{ + unsigned long start, pfn; + + start = dom->virt_alloc_end; + dom->virt_alloc_end += PAGE_SIZE; + pfn = (start - dom->virt_base) / PAGE_SIZE; + + if (dom->virt_alloc_end > dom->virt_base + dom->mapped_pages * PAGE_SIZE) { + xc_dom_printf("%s: Oops: out of memory\n", __FUNCTION__); + return -1; + } + + xc_dom_printf("%-20s: %-12s : 0x%lx (pfn 0x%lx)\n", + __FUNCTION__, name, start, pfn); + return pfn; +} + +/* ------------------------------------------------------------------------ */ +/* pluggable kernel loaders */ + +static struct xc_dom_loader *first_loader = NULL; + +void xc_dom_register_loader(struct xc_dom_loader *loader) +{ + loader->next = first_loader; + first_loader = loader; +} + +static struct xc_dom_loader *xc_dom_find_loader(struct xc_dom_image *dom) +{ + struct xc_dom_loader *loader = first_loader; + + while (NULL != loader) { + xc_dom_printf("%s: trying %s loader ... ", __FUNCTION__, loader->name); + if (0 == loader->probe(dom)) { + xc_dom_printf("OK\n"); + return loader; + } + xc_dom_printf("failed\n"); + } + return NULL; +} + +/* ------------------------------------------------------------------------ */ +/* public interface */ + +void xc_dom_release(struct xc_dom_image *dom) +{ + xc_dom_printf("%s\n", __FUNCTION__); + xc_dom_free_all(dom); + free(dom); +} + +struct xc_dom_image* xc_dom_allocate(const char *cmdline, + const char *features, + unsigned long nr_pages) +{ + struct xc_dom_image *dom; + + xc_dom_printf("%s: cmdline=\"%s\", features=\"%s\"\n", + __FUNCTION__, cmdline, features); + dom = malloc(sizeof(*dom)); + if (!dom) + goto err; + + memset(dom,0,sizeof(*dom)); + if (cmdline) + dom->cmdline = xc_dom_strdup(dom,cmdline); + if (features) + xc_dom_features_parse(features, dom->f_requested, NULL); + dom->total_pages = nr_pages; + return dom; + + err: + if (dom) + xc_dom_release(dom); + return NULL; +} + +int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename) +{ + xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename); + return xc_dom_readfile(dom, filename, &dom->kernel_blob, &dom->kernel_size); +} + +int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename) +{ + xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename); + return xc_dom_readfile(dom, filename, &dom->ramdisk_blob, &dom->ramdisk_size); +} + +int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t memsize) +{ + xc_dom_printf("%s\n", __FUNCTION__); + return xc_dom_copyblk(dom, mem, memsize, + &dom->kernel_blob, &dom->kernel_size); +} + +int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem, size_t memsize) +{ + xc_dom_printf("%s\n", __FUNCTION__); + return xc_dom_copyblk(dom, mem, memsize, + &dom->ramdisk_blob, &dom->ramdisk_size); +} + +int xc_dom_parse_image(struct xc_dom_image *dom) +{ + struct xc_dom_loader *loader; + int i; + + xc_dom_printf("%s\n", __FUNCTION__); + + /* parse kernel image */ + loader = xc_dom_find_loader(dom); + if (NULL == loader) + goto err; + if (0 != loader->parser(dom)) + goto err; + + /* check features */ + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { + dom->f_active[i] |= dom->f_requested[i]; /* cmd line */ + dom->f_active[i] |= dom->f_required[i]; /* kernel */ + if ((dom->f_active[i] & dom->f_supported[i]) != dom->f_active[i]) { + xc_dom_printf("%s: unsupported feature requested\n", __FUNCTION__); + goto err; + } + } + +#if 0 + /* test and debug hacks */ + dom->pae = "yes"; + dom->virt_base = 0xbf000000; +#endif + + return 0; + + err: + return -1; +} + +int xc_dom_build_image(struct xc_dom_image *dom) +{ + struct xc_dom_loader *loader; + xen_pfn_t *p2m; + unsigned long i; + + xc_dom_printf("%s\n", __FUNCTION__); + if (0 != xc_dom_alloc_pages(dom)) + goto err; + + /* load kernel */ + if (0 != xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel", + dom->kernel_seg.vstart, + dom->kernel_seg.vend - dom->kernel_seg.vstart)) + goto err; + loader = xc_dom_find_loader(dom); + if (NULL == loader) + goto err; + if (0 != loader->loader(dom)) + goto err; + + /* load ramdisk */ + if (dom->ramdisk_blob) { + if (0 != xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0, + dom->ramdisk_size)) + goto err; + memcpy(xc_dom_pfn_to_ptr(dom, dom->ramdisk_seg.pfn), + dom->ramdisk_blob, dom->ramdisk_size); + } + + /* allocate phys2mach table */ + if (0 != xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, + dom->total_pages * sizeof(unsigned long))) + goto err; + + /* setup phys2mach table (identity mapping for now ...) */ + p2m = xc_dom_pfn_to_ptr(dom, dom->p2m_seg.pfn); + for (i = 0; i < dom->total_pages; i++) + p2m[i] = i; + + /* allocate special pages */ + dom->start_info_pfn = xc_dom_alloc_page(dom, "start info"); + dom->console_pfn = xc_dom_alloc_page(dom, "console"); + dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore"); + if (xc_dom_feature_translated(dom)) + dom->shared_info_pfn = xc_dom_alloc_page(dom, "shared info"); + dom->bootstack_pfn = xc_dom_alloc_page(dom, "boot stack"); + + /* setup page tables */ + xc_dom_arch_count_pgtables(dom); + if (0 != xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0, + dom->pgtables * PAGE_SIZE)) + goto err; + xc_dom_printf("%-20s: virt_alloc_end : 0x%lx\n", __FUNCTION__, dom->virt_alloc_end); + xc_dom_printf("%-20s: virt_pgtab_end : 0x%lx\n", __FUNCTION__, dom->virt_pgtab_end); + return 0; + + err: + return -1; +} diff -r 4142bfd01e02 tools/libxc/xc_dom_elf.c --- /dev/null Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_dom_elf.c Thu Jun 1 15:23:55 2006 @@ -0,0 +1,276 @@ +/* + * Xen domain builder -- ELF bits. + * + * Parse and load ELF kernel images. + * + * This code is licenced under the GPL. + * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "xg_private.h" +#include "xc_dom.h" + +#if defined(__i386__) +# define ELFSIZE 32 +# define MY_NAME "ELF-i386" +# define MY_CLASS ELFCLASS32 +# define MY_MACHINE EM_386 +#elif defined(__x86_64__) +# define ELFSIZE 64 +# define MY_NAME "ELF-x86_64" +# define MY_CLASS ELFCLASS64 +# define MY_MACHINE EM_X86_64 +#elif defined(__ia64__) +# define ELFSIZE 64 +# define MY_NAME "ELF-ia64" +#endif +#include "xc_elf.h" + +#define XEN_VER "xen-3.0" + +/* ------------------------------------------------------------------------ */ +/* parse elf binary */ + +static inline int is_loadable_phdr(Elf_Phdr *phdr) +{ + return ((phdr->p_type == PT_LOAD) && + ((phdr->p_flags & (PF_W|PF_X)) != 0)); +} + +static int check_elf_kernel(struct xc_dom_image *dom, int verbose) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)dom->kernel_blob; + + if (NULL == ehdr) { + if (verbose) + xc_dom_printf("%s: no kernel image loaded\n", __FUNCTION__); + return -EINVAL; + } + + if (!IS_ELF(*ehdr)) { + if (verbose) + xc_dom_printf("%s: kernel is not an ELF image\n", __FUNCTION__); + return -EINVAL; + } + + if ((ehdr->e_type != ET_EXEC) || +#ifdef MY_CLASS + (ehdr->e_ident[EI_CLASS] != MY_CLASS) || +#endif +#ifdef MY_MACHINE + (ehdr->e_machine != MY_MACHINE) || +#endif + (ehdr->e_ident[EI_DATA] != ELFDATA2LSB)) { + if (verbose) + xc_dom_printf("%s: incompatible ELF image\n", __FUNCTION__); + return -EINVAL; + } + + return 0; +} + +static int xc_dom_probe_elf_kernel(struct xc_dom_image *dom) +{ + return check_elf_kernel(dom, 0); +} + +static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)dom->kernel_blob; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + const char *shstrtab, *h; + char name[64], value[256]; + int i, len, rc; + xen_vaddr_t vaddr, vaddr_offset; + + dom->virt_base = -1; + dom->virt_entry = -1; + dom->elf_paddr_offset = -1; + dom->hypercall_page = -1; + dom->kernel_seg.vstart = -1; + dom->kernel_seg.vend = 0; + + /* sanity checks */ + rc = check_elf_kernel(dom, 1); + if (0 != rc) + return rc; + + if ((ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > dom->kernel_size) { + xc_dom_printf("%s: ELF phdr extend beyond end of image\n", __FUNCTION__); + return -EINVAL; + } + + if ((ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > dom->kernel_size) { + xc_dom_printf("%s: ELF shdr extend beyond end of image\n", __FUNCTION__); + return -EINVAL; + } + + /* Find the section-header strings table. */ + if (ehdr->e_shstrndx == SHN_UNDEF) { + xc_dom_printf("%s: ELF image has no shstrtab\n", __FUNCTION__); + return -EINVAL; + } + shdr = (Elf_Shdr *)(dom->kernel_blob + ehdr->e_shoff + + (ehdr->e_shstrndx*ehdr->e_shentsize)); + shstrtab = dom->kernel_blob + shdr->sh_offset; + + /* Find the special '__xen_guest' section and check its contents. */ +#if defined(__ia64__) + dom->guestinfo = ""; /* __xen_guest section not required */ +#endif + for (i = 0; i < ehdr->e_shnum; i++) { + shdr = (Elf_Shdr *)(dom->kernel_blob + ehdr->e_shoff + (i*ehdr->e_shentsize)); + if (strcmp(&shstrtab[shdr->sh_name], "__xen_guest") == 0) { + dom->guestinfo = (char *)dom->kernel_blob + shdr->sh_offset; + break; + } + } + if (NULL == dom->guestinfo) { + xc_dom_printf("%s: no __xen_guest section found\n", __FUNCTION__); + return -EINVAL; + } + xc_dom_printf("%s: __xen_guest: \"%s\"\n", __FUNCTION__, dom->guestinfo); + + for (h = dom->guestinfo; *h; h+= len) { + rc = sscanf(h, "%63[^=]=%255[^,]%n", name, value, &len); + if (2 != rc) + break; + if (h[len] == ',') + len++; + + /* strings */ + if (0 == strcmp(name, "LOADER")) + dom->loader = xc_dom_strdup(dom, value); + if (0 == strcmp(name, "GUEST_OS")) + dom->guest_os = xc_dom_strdup(dom, value); + if (0 == strcmp(name, "GUEST_VER")) + dom->guest_ver = xc_dom_strdup(dom, value); + if (0 == strcmp(name, "XEN_VER")) + dom->xen_ver = xc_dom_strdup(dom, value); + if (0 == strcmp(name, "PAE")) + dom->pae = (0 == strcasecmp(value, "yes")) ? 1 : 0; + + /* longs */ + if (0 == strcmp(name, "VIRT_BASE")) + dom->virt_base = strtoul(value, NULL, 0); + if (0 == strcmp(name, "VIRT_ENTRY")) + dom->virt_entry = strtoul(value, NULL, 0); + if (0 == strcmp(name, "ELF_PADDR_OFFSET")) + dom->elf_paddr_offset = strtoul(value, NULL, 0); + if (0 == strcmp(name, "HYPERCALL_PAGE")) + dom->hypercall_page = strtoul(value, NULL, 0); + + /* other */ + if (0 == strcmp(name, "FEATURES")) + xc_dom_features_parse(value, dom->f_supported, dom->f_required); + /* TODO: bsd_symtab */ + } + + if (dom->loader) { + if (0 != strcmp(dom->loader, "generic")) { + xc_dom_printf("%s: unknown loader: %s\n", __FUNCTION__, dom->loader); + return -EINVAL; + } + } else if (dom->guest_os) { + if (0 != strcmp(dom->guest_os, "linux")) { + xc_dom_printf("%s: unknown guest os: %s\n", __FUNCTION__, dom->guest_os); + return -EINVAL; + } + } else { + xc_dom_printf("%s: neither loader nor guest_os specified\n", __FUNCTION__); + return -EINVAL; + } + if (dom->xen_ver && 0 != strcmp(dom->xen_ver, XEN_VER)) { + xc_dom_printf("%s: will load only images for %s (seen: %s)\n", + __FUNCTION__, XEN_VER, dom->xen_ver); + return -EINVAL; + } + + if (-1 != dom->elf_paddr_offset) { + if (-1 == dom->virt_base) { + xc_dom_printf("%s: elf_paddr_offset defined, but virt_base isn't\n", + __FUNCTION__); + return -EINVAL; + } + vaddr_offset = dom->virt_base - dom->elf_paddr_offset; + } else { + /* assume paddr has virtual addresses */ + vaddr_offset = 0; + } + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = (Elf_Phdr *)(dom->kernel_blob + ehdr->e_phoff + (i*ehdr->e_phentsize)); + if (!is_loadable_phdr(phdr)) + continue; + vaddr = phdr->p_paddr + vaddr_offset; + xc_dom_printf("%s: phdr: va=0x%x pa=0x%x fz=0x%x mz=0x%x | vaddr=0x%lx\n", + __FUNCTION__, phdr->p_vaddr, phdr->p_paddr, + phdr->p_filesz, phdr->p_memsz, vaddr); + if ((vaddr + phdr->p_memsz) < vaddr) { + xc_dom_printf("%s: phdr invalid\n", __FUNCTION__); + return -EINVAL; + } + if (dom->kernel_seg.vstart > vaddr) + dom->kernel_seg.vstart = vaddr; + if (dom->kernel_seg.vend < vaddr + phdr->p_memsz) + dom->kernel_seg.vend = vaddr + phdr->p_memsz; + } + + /* + * Legacy compatibility and images with no __xen_guest section: assume + * header addresses are virtual addresses, and that guest memory should be + * mapped starting at kernel load address. + */ + if (-1 == dom->virt_base) + dom->virt_base = dom->kernel_seg.vstart; + if (-1 == dom->elf_paddr_offset) + dom->elf_paddr_offset = dom->virt_base; + if (-1 == dom->virt_entry) + dom->virt_entry = ehdr->e_entry; + + if ((dom->kernel_seg.vstart > dom->kernel_seg.vend) || + (dom->kernel_seg.vstart < dom->virt_base) || + (dom->kernel_seg.vstart > dom->virt_entry) || + (dom->kernel_seg.vend < dom->virt_entry)) { + xc_dom_printf("%s: virtual address layout broken\n", __FUNCTION__); + return -EINVAL; + } + + return 0; +} + +static int xc_dom_load_elf_kernel(struct xc_dom_image *dom) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)dom->kernel_blob; + Elf_Phdr *phdr; + xen_paddr_t pa; + int i; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = (Elf_Phdr *)(dom->kernel_blob + ehdr->e_phoff + (i*ehdr->e_phentsize)); + if (!is_loadable_phdr(phdr)) + continue; + pa = phdr->p_paddr - dom->elf_paddr_offset; + memcpy(dom->pages + pa, dom->kernel_blob + phdr->p_offset, phdr->p_filesz); + } + return 0; +} + +/* ------------------------------------------------------------------------ */ + +static struct xc_dom_loader elf_loader = { + .name = MY_NAME, + .probe = xc_dom_probe_elf_kernel, + .parser = xc_dom_parse_elf_kernel, + .loader = xc_dom_load_elf_kernel, +}; + +static void __init register_loader(void) +{ + xc_dom_register_loader(&elf_loader); +} diff -r 4142bfd01e02 tools/libxc/xc_dom_x86.c --- /dev/null Thu Jun 1 10:25:02 2006 +++ b/tools/libxc/xc_dom_x86.c Thu Jun 1 15:23:55 2006 @@ -0,0 +1,340 @@ +/* + * Xen domain builder -- i386 and x86_64 bits. + * + * Most architecture-specific code for x86 goes here. + * - prepare page tables. + * - fill architecture-specific structs. + * + * This code is licenced under the GPL. + * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +#include "xg_private.h" +#include "xc_dom.h" + +/* ------------------------------------------------------------------------ */ + +#define bits_to_mask(bits) ((1UL << (bits))-1) +#define round_down(addr, mask) ((addr) & ~(mask)) +#define round_up(addr, mask) ((addr) | (mask)) + +static inline unsigned long +nr_page_tables(xen_vaddr_t start, xen_vaddr_t end, unsigned long bits) +{ + xen_vaddr_t mask = bits_to_mask(bits); + int tables; + + if (0 == bits) + return 0; /* unused */ + if (8*sizeof(unsigned long) == bits) + return 1; /* must be pgd, need one */ + + start = round_down(start, mask); + end = round_up(end, mask); + tables = ((end-start) >> bits) +1; +#if 0 + xc_dom_printf("%s: 0x%08lx/%ld: 0x%08lx -> 0x%08lx, %d table(s)\n", + __FUNCTION__, mask, bits, start, end, tables); +#endif + return tables; +} + +int xc_dom_arch_count_pgtables(struct xc_dom_image *dom) +{ + int l1_bits = 0, l2_bits = 0, l3_bits = 0, l4_bits = 0; + xen_vaddr_t try_virt_end; + +#if defined(__i386__) + if (dom->pae) { + l1_bits = L2_PAGETABLE_SHIFT_PAE; + l2_bits = L3_PAGETABLE_SHIFT_PAE; + l3_bits = 32; + } else { + l1_bits = L2_PAGETABLE_SHIFT; + l2_bits = 32; + } +#endif + +#if defined(__x86_64__) + l1_bits = L2_PAGETABLE_SHIFT; + l2_bits = L3_PAGETABLE_SHIFT; + l3_bits = L4_PAGETABLE_SHIFT; + l4_bits = L4_PAGETABLE_SHIFT + 9; +#endif + + for (;;) { + try_virt_end = round_up(dom->virt_alloc_end + dom->pgtables * PAGE_SIZE, + bits_to_mask(l1_bits)); + dom->pg_l4 = nr_page_tables(dom->virt_base, try_virt_end, l4_bits); + dom->pg_l3 = nr_page_tables(dom->virt_base, try_virt_end, l3_bits); + dom->pg_l2 = nr_page_tables(dom->virt_base, try_virt_end, l2_bits); + dom->pg_l1 = nr_page_tables(dom->virt_base, try_virt_end, l1_bits); + dom->pgtables = dom->pg_l4 + dom->pg_l3 + dom->pg_l2 + dom->pg_l1; + if (dom->virt_alloc_end + dom->pgtables * PAGE_SIZE <= try_virt_end+1) + break; + } + dom->virt_pgtab_end = try_virt_end+1; + return 0; +} + +/* ------------------------------------------------------------------------ */ + +#if defined(__i386__) + +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT) + +static int setup_pgtables_32(struct xc_dom_image *dom) +{ + xen_pfn_t l2pfn = dom->pgtables_seg.pfn; + xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l2; + l2_pgentry_32_t *l2tab = xc_dom_pfn_to_ptr(dom, l2pfn); + l1_pgentry_32_t *l1tab = NULL; + unsigned long l2off, l1off; + xen_vaddr_t addr; + xen_pfn_t pgpfn; + + for (addr = dom->virt_base; addr < dom->virt_pgtab_end; addr += PAGE_SIZE) { + if (NULL == l1tab) { + /* get L1 tab, make L2 entry */ + l1tab = xc_dom_pfn_to_ptr(dom, l1pfn); + l2off = l2_table_offset(addr); + l2tab[l2off] = (xc_dom_p2m_guest(dom,l1pfn) << PAGE_SHIFT) | L2_PROT; + l1pfn++; + } + + /* make L1 entry */ + l1off = l1_table_offset(addr); + pgpfn = (addr - dom->virt_base) >> PAGE_SHIFT; + l1tab[l1off] = (xc_dom_p2m_guest(dom,pgpfn) << PAGE_SHIFT) | L1_PROT; + if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend) + l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */ + if (L1_PAGETABLE_ENTRIES-1 == l1off) + l1tab = NULL; + } + return 0; +} + +static int setup_pgtables_32_pae(struct xc_dom_image *dom) +{ + xen_pfn_t l3pfn = dom->pgtables_seg.pfn; + xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l3; + xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l3 + dom->pg_l2; + l3_pgentry_64_t *l3tab = xc_dom_pfn_to_ptr(dom, l3pfn); + l2_pgentry_64_t *l2tab = NULL; + l1_pgentry_64_t *l1tab = NULL; + unsigned long l3off, l2off, l1off; + xen_vaddr_t addr; + xen_pfn_t pgpfn; + + for (addr = dom->virt_base; addr < dom->virt_pgtab_end; addr += PAGE_SIZE) { + if (NULL == l2tab) { + /* get L2 tab, make L3 entry */ + l2tab = xc_dom_pfn_to_ptr(dom, l2pfn); + l3off = l3_table_offset_pae(addr); + l3tab[l3off] = (xc_dom_p2m_guest(dom,l2pfn) << PAGE_SHIFT) | L3_PROT; + l2pfn++; + } + + if (NULL == l1tab) { + /* get L1 tab, make L2 entry */ + l1tab = xc_dom_pfn_to_ptr(dom, l1pfn); + l2off = l2_table_offset_pae(addr); + l2tab[l2off] = (xc_dom_p2m_guest(dom,l1pfn) << PAGE_SHIFT) | L2_PROT; + if (L2_PAGETABLE_ENTRIES_PAE-1 == l2off) + l2tab = NULL; + l1pfn++; + } + + /* make L1 entry */ + l1off = l1_table_offset_pae(addr); + pgpfn = (addr - dom->virt_base) >> PAGE_SHIFT; + l1tab[l1off] = (xc_dom_p2m_guest(dom,pgpfn) << PAGE_SHIFT) | L1_PROT; + if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend) + l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */ + if (L1_PAGETABLE_ENTRIES_PAE-1 == l1off) + l1tab = NULL; + } + return 0; +} + +int xc_dom_arch_setup_pgtables(struct xc_dom_image *dom) +{ + if (dom->pae) + return setup_pgtables_32_pae(dom); + else + return setup_pgtables_32(dom); +} + +#endif /* __i386__ */ + +/* ------------------------------------------------------------------------ */ + +#if defined(__x86_64__) + +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) + +static int setup_pgtables_64(struct xc_dom_image *dom) +{ + xen_pfn_t l4pfn = dom->pgtables_seg.pfn; + xen_pfn_t l3pfn = dom->pgtables_seg.pfn + dom->pg_l4; + xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3; + xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3 + dom->pg_l2; + l4_pgentry_t *l4tab = xc_dom_pfn_to_ptr(dom, l4pfn); + l3_pgentry_t *l3tab = NULL; + l2_pgentry_t *l2tab = NULL; + l1_pgentry_t *l1tab = NULL; + unsigned long l4off, l3off, l2off, l1off, addr, pgpfn; + xen_vaddr_t addr; + xen_pfn_t pgpfn; + + for (addr = dom->virt_base; addr < dom->virt_pgtab_end; addr += PAGE_SIZE) { + if (NULL == l3tab) { + /* get L3 tab, make L4 entry */ + l3tab = xc_dom_pfn_to_ptr(dom, l3pfn); + l4off = l4_table_offset_pae(addr); + l4tab[l4off] = (xc_dom_p2m_guest(l3pfn] << PAGE_SHIFT) | L4_PROT; + l3pfn++; + } + + if (NULL == l2tab) { + /* get L2 tab, make L3 entry */ + l2tab = xc_dom_pfn_to_ptr(dom, l2pfn); + l3off = l3_table_offset_pae(addr); + l3tab[l3off] = (xc_dom_p2m_guest(l2pfn] << PAGE_SHIFT) | L3_PROT; + if (L3_PAGETABLE_ENTRIES_PAE-1 == l3off) + l3tab = NULL; + l2pfn++; + } + + if (NULL == l1tab) { + /* get L1 tab, make L2 entry */ + l1tab = xc_dom_pfn_to_ptr(dom, l1pfn); + l2off = l2_table_offset_pae(addr); + l2tab[l2off] = (xc_dom_p2m_guest(l1pfn] << PAGE_SHIFT) | L2_PROT; + if (L2_PAGETABLE_ENTRIES_PAE-1 == l2off) + l2tab = NULL; + l1pfn++; + } + + /* make L1 entry */ + l1off = l1_table_offset_pae(addr); + pgpfn = (addr - dom->virt_base) >> PAGE_SHIFT; + l1tab[l1off] = (xc_dom_p2m_guest(pgpfn] << PAGE_SHIFT) | L1_PROT; + if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend) + l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */ + if (L1_PAGETABLE_ENTRIES_PAE-1 == l1off) + l1tab = NULL; + } + return 0; +} + +int xc_dom_arch_setup_pgtables(struct xc_dom_image *dom) +{ + return setup_pgtables_64(dom); +} + +#endif /* __x86_64__ */ + +/* ------------------------------------------------------------------------ */ + +int xc_dom_arch_start_info(struct xc_dom_image *dom, int xen_version) +{ + start_info_t *start_info = xc_dom_pfn_to_ptr(dom, dom->start_info_pfn); + xen_pfn_t shinfo = xc_dom_feature_translated(dom) ? + dom->shared_info_pfn : dom->shared_info_mfn; + + xc_dom_printf("%s\n", __FUNCTION__); + + sprintf(start_info->magic, "xen-%i.%i-x86_%d%s", + xen_version >> 16, xen_version & (0xFFFF), + (unsigned int)sizeof(long)*8, + dom->pae ? "p" : ""); + + start_info->nr_pages = dom->total_pages; + start_info->shared_info = shinfo << PAGE_SHIFT; + start_info->pt_base = dom->pgtables_seg.vstart; + start_info->nr_pt_frames = dom->pgtables; + start_info->mfn_list = dom->p2m_seg.vstart; + + start_info->flags = dom->flags; + start_info->store_mfn = xc_dom_p2m_guest(dom,dom->xenstore_pfn); + start_info->console_mfn = xc_dom_p2m_guest(dom,dom->console_pfn); + start_info->store_evtchn = dom->xenstore_evtchn; + start_info->console_evtchn = dom->console_evtchn; + + if (dom->ramdisk_blob) { + start_info->mod_start = dom->ramdisk_seg.vstart; + start_info->mod_len = dom->ramdisk_size; + } + if (dom->cmdline) { + strncpy((char*)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE); + start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0'; + } + return 0; +} + +int xc_dom_arch_shared_info(shared_info_t *shared_info) +{ + int i; + + xc_dom_printf("%s\n", __FUNCTION__); + + memset(shared_info, 0, sizeof(*shared_info)); + for (i = 0; i < MAX_VIRT_CPUS; i++) + shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + return 0; +} + +int xc_dom_arch_vcpu(struct xc_dom_image *dom, vcpu_guest_context_t *ctxt) +{ + int i; + + xc_dom_printf("%s\n", __FUNCTION__); + + /* clear everything */ + memset(ctxt, 0, sizeof(*ctxt)); + + ctxt->user_regs.ds = FLAT_KERNEL_DS; + ctxt->user_regs.es = FLAT_KERNEL_DS; + ctxt->user_regs.fs = FLAT_KERNEL_DS; + ctxt->user_regs.gs = FLAT_KERNEL_DS; + ctxt->user_regs.ss = FLAT_KERNEL_SS; + ctxt->user_regs.cs = FLAT_KERNEL_CS; + ctxt->user_regs.eip = dom->virt_entry; + ctxt->user_regs.esp = dom->virt_base + (dom->bootstack_pfn+1) * PAGE_SIZE; + ctxt->user_regs.esi = dom->virt_base + (dom->start_info_pfn) * PAGE_SIZE; + ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */ + + ctxt->kernel_ss = FLAT_KERNEL_SS; + ctxt->kernel_sp = dom->virt_base + (dom->bootstack_pfn+1) * PAGE_SIZE; + + ctxt->flags = VGCF_IN_KERNEL; + ctxt->ctrlreg[3] = xc_dom_p2m_guest(dom,dom->pgtables_seg.pfn) << PAGE_SHIFT; + xc_dom_printf("%s: cr3: pfn 0x%lx mfn 0x%lx\n", __FUNCTION__, + dom->pgtables_seg.pfn, + xc_dom_p2m_guest(dom,dom->pgtables_seg.pfn)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) { + ctxt->trap_ctxt[i].vector = i; + ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS; + } + + /* No callback handlers. */ +#if defined(__i386__) + ctxt->event_callback_cs = FLAT_KERNEL_CS; + ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; +#endif + + return 0; +} _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |