[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 17/18 V2]: PVH xen: PVH dom0 creation...
Finally, the hardest. Mostly modify construct_dom0() to boot PV dom0 in PVH mode. Introduce, opt_dom0pvh, which when specified in the command line, causes dom0 to boot in PVH mode. Change in V2: - Map the entire IO region upfront in the P2M for PVH dom0. Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> --- xen/arch/x86/domain_build.c | 241 +++++++++++++++++++++++++++++++++---------- xen/arch/x86/mm/hap/hap.c | 17 +++- xen/arch/x86/setup.c | 10 ++- xen/include/asm-x86/hap.h | 1 + 4 files changed, 212 insertions(+), 57 deletions(-) diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index 8c5b27a..72aa70b 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -35,6 +35,8 @@ #include <asm/setup.h> #include <asm/bzimage.h> /* for bzimage_parse */ #include <asm/io_apic.h> +#include <asm/hap.h> +#include <asm/debugger.h> #include <public/version.h> @@ -307,6 +309,65 @@ static void __init process_dom0_ioports_disable(void) } } +/* + * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have + * the entire io region mapped in the EPT/NPT. + */ +static __init void pvh_map_all_iomem(struct domain *d) +{ + unsigned long start = 0; + const struct e820entry *entry; + int rc, i, nump; + + for (i = 0, entry = e820.map; i < e820.nr_map; i++, entry++) { + unsigned long end = entry->addr + entry->size; + + if (entry->type == E820_RAM || i == e820.nr_map - 1) { + unsigned long start_pfn = PFN_DOWN(start); + unsigned long end_pfn = PFN_UP(end); + + if (entry->type == E820_RAM) + end_pfn = PFN_UP(entry->addr); + + if (start_pfn < end_pfn) { + nump = end_pfn - start_pfn + 1; + rc = domctl_memory_mapping(d, start_pfn, start_pfn, nump, 1); + BUG_ON(rc); + } + start = end; + } + } +} + +static __init void dom0_update_physmap(struct domain *d, unsigned long pfn, + unsigned long mfn, unsigned long vphysmap_s) +{ + if ( is_pvh_domain(d) ) { + int rc = guest_physmap_add_page(d, pfn, mfn, 0); + BUG_ON(rc); + return; + } + if ( !is_pv_32on64_domain(d) ) + ((unsigned long *)vphysmap_s)[pfn] = mfn; + else + ((unsigned int *)vphysmap_s)[pfn] = mfn; + + set_gpfn_from_mfn(mfn, pfn); +} + +static __init void copy_pvh(char *dest, char *src, int bytes) +{ + /* raw_copy_to_guest() -> copy_to_user_hvm -> __hvm_copy needs curr + * to point to the hvm/pvh vcpu. Hence for PVH dom0 we can't use that. + * So we just use dbg_rw_mem(). + */ + int rem = dbg_rw_mem((dbgva_t)dest, (unsigned char *)src, bytes, 0, 1, 0); + if (rem) { + printk("PVH: Failed to copy to dom0. len:%d rem:%d\n", bytes, rem); + BUG(); + } +} + int __init construct_dom0( struct domain *d, const module_t *image, unsigned long image_headroom, @@ -314,6 +375,7 @@ int __init construct_dom0( void *(*bootstrap_map)(const module_t *), char *cmdline) { + char *si_buf=NULL, *tmp_buf=NULL; int i, cpu, rc, compatible, compat32, order, machine; struct cpu_user_regs *regs; unsigned long pfn, mfn; @@ -322,7 +384,7 @@ int __init construct_dom0( unsigned long alloc_spfn; unsigned long alloc_epfn; unsigned long initrd_pfn = -1, initrd_mfn = 0; - unsigned long count; + unsigned long count, shared_info_pfn_addr = 0; struct page_info *page = NULL; start_info_t *si; struct vcpu *v = d->vcpu[0]; @@ -416,6 +478,13 @@ int __init construct_dom0( { printk("Kernel does not support Dom0 operation\n"); return -EINVAL; + + if ( is_pvh_domain(d) && + !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) ) + { + printk("Kernel does not support PVH mode\n"); + return -EINVAL; + } } if ( compat32 ) @@ -480,6 +549,12 @@ int __init construct_dom0( vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + sizeof(struct dom0_vga_console_info)); + + if ( is_pvh_domain(d) ) { + shared_info_pfn_addr = round_pgup(vstartinfo_end) - v_start; + vstartinfo_end += PAGE_SIZE; + } + vpt_start = round_pgup(vstartinfo_end); for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { @@ -621,16 +696,26 @@ int __init construct_dom0( maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l3_page_table; l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; } - clear_page(l4tab); - init_guest_l4_table(l4tab, d); - v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); - if ( is_pv_32on64_domain(d) ) - v->arch.guest_table_user = v->arch.guest_table; + if ( is_pvh_domain(d) ) + { + v->arch.guest_table = pagetable_from_paddr(vpt_start - v_start); + pfn = 0; + } else { + clear_page(l4tab); + init_guest_l4_table(l4tab, d); + v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); + if ( is_pv_32on64_domain(d) ) + v->arch.guest_table_user = v->arch.guest_table; + pfn = alloc_spfn; + } l4tab += l4_table_offset(v_start); - pfn = alloc_spfn; for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ ) { + /* initrd chunk's mfns are separate, so we need to adjust for them */ + signed long pvh_adj = is_pvh_domain(d) ? + (PFN_UP(initrd_len) - alloc_spfn)<<PAGE_SHIFT : 0; + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) { maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table; @@ -657,16 +742,17 @@ int __init construct_dom0( clear_page(l3tab); if ( count == 0 ) l3tab += l3_table_offset(v_start); - *l4tab = l4e_from_paddr(__pa(l3start), L4_PROT); + *l4tab = l4e_from_paddr(__pa(l3start) + pvh_adj, L4_PROT); l4tab++; } - *l3tab = l3e_from_paddr(__pa(l2start), L3_PROT); + *l3tab = l3e_from_paddr(__pa(l2start) + pvh_adj, L3_PROT); l3tab++; } - *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT); + *l2tab = l2e_from_paddr(__pa(l1start) + pvh_adj, L2_PROT); l2tab++; } - if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) ) + if ( is_pvh_domain(d) || + count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) ) mfn = pfn++; else mfn = initrd_mfn++; @@ -674,6 +760,9 @@ int __init construct_dom0( L1_PROT : COMPAT_L1_PROT)); l1tab++; + if ( is_pvh_domain(d) ) + continue; + page = mfn_to_page(mfn); if ( (page->u.inuse.type_info == 0) && !get_page_and_type(page, d, PGT_writable_page) ) @@ -702,6 +791,9 @@ int __init construct_dom0( COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2tab)); } + if ( is_pvh_domain(d) ) + goto pvh_skip_pt_rdonly; + /* Pages that are part of page tables must be read only. */ l4tab = l4start + l4_table_offset(vpt_start); l3start = l3tab = l4e_to_l3e(*l4tab); @@ -741,6 +833,8 @@ int __init construct_dom0( } } +pvh_skip_pt_rdonly: + /* Mask all upcalls... */ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1; @@ -754,6 +848,11 @@ int __init construct_dom0( (void)alloc_vcpu(d, i, cpu); } + if ( is_pvh_domain(d) ) + { + v->arch.cr3 = v->arch.hvm_vcpu.guest_cr[3] = + (pagetable_get_pfn(v->arch.guest_table)) << PAGE_SHIFT; + } /* Set up CR3 value for write_ptbase */ if ( paging_mode_enabled(d) ) paging_update_paging_modes(v); @@ -764,35 +863,16 @@ int __init construct_dom0( write_ptbase(v); mapcache_override_current(v); - /* Copy the OS image and free temporary buffer. */ - elf.dest = (void*)vkern_start; - rc = elf_load_binary(&elf, 0); - if ( rc < 0 ) - { - printk("Failed to load the kernel binary\n"); - return rc; - } - bootstrap_map(NULL); - - if ( UNSET_ADDR != parms.virt_hypercall ) - { - if ( (parms.virt_hypercall < v_start) || - (parms.virt_hypercall >= v_end) ) - { - mapcache_override_current(NULL); - write_ptbase(current); - printk("Invalid HYPERCALL_PAGE field in ELF notes.\n"); - return -1; + /* Set up start info area. */ + if ( is_pvh_domain(d) ) { + if ( (si_buf=xmalloc_bytes(PAGE_SIZE)) == NULL) { + printk("PVH: xmalloc failed to alloc %ld bytes.\n", PAGE_SIZE); + return -ENOMEM; } - hypercall_page_initialise( - d, (void *)(unsigned long)parms.virt_hypercall); - } - - /* Free temporary buffers. */ - discard_initial_images(); + si = (start_info_t *)si_buf; + } else + si = (start_info_t *)vstartinfo_start; - /* Set up start info area. */ - si = (start_info_t *)vstartinfo_start; clear_page(si); si->nr_pages = nr_pages; @@ -814,7 +894,7 @@ int __init construct_dom0( l2tab = NULL; l1tab = NULL; /* Set up the phys->machine table if not part of the initial mapping. */ - if ( parms.p2m_base != UNSET_ADDR ) + if ( parms.p2m_base != UNSET_ADDR && !is_pvh_domain(d) ) { unsigned long va = vphysmap_start; @@ -935,6 +1015,9 @@ int __init construct_dom0( unmap_domain_page(l3tab); unmap_domain_page(l4start); + if (is_pvh_domain(d) ) + hap_set_pvh_alloc_for_dom0(d, nr_pages); + /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < count; pfn++ ) { @@ -951,11 +1034,8 @@ int __init construct_dom0( if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + dom0_update_physmap(d, pfn, mfn, vphysmap_start); + if (!(pfn & 0xfffff)) process_pending_softirqs(); } @@ -971,8 +1051,8 @@ int __init construct_dom0( if ( !page->u.inuse.type_info && !get_page_and_type(page, d, PGT_writable_page) ) BUG(); - ((unsigned long *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + + dom0_update_physmap(d, pfn, mfn, vphysmap_start); ++pfn; if (!(pfn & 0xfffff)) process_pending_softirqs(); @@ -992,11 +1072,7 @@ int __init construct_dom0( #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + dom0_update_physmap(d, pfn, mfn, vphysmap_start); #undef pfn page++; pfn++; if (!(pfn & 0xfffff)) @@ -1004,6 +1080,47 @@ int __init construct_dom0( } } + /* Copy the OS image and free temporary buffer. */ + elf.dest = (void*)vkern_start; + rc = elf_load_binary(&elf, is_pvh_domain(d) ); + if ( rc < 0 ) + { + printk("Failed to load the kernel binary\n"); + return rc; + } + bootstrap_map(NULL); + + if ( UNSET_ADDR != parms.virt_hypercall ) + { + void *addr; + + if ( is_pvh_domain(d) ) { + if ( (tmp_buf=xzalloc_bytes(PAGE_SIZE)) == NULL ) { + printk("xzalloc failed for tmp_buf. %ld bytes.\n", PAGE_SIZE); + return -ENOMEM; + } + addr = tmp_buf; + } else + addr = (void *)parms.virt_hypercall; + + if ( (parms.virt_hypercall < v_start) || + (parms.virt_hypercall >= v_end) ) + { + write_ptbase(current); + printk("Invalid HYPERCALL_PAGE field in ELF notes.\n"); + return -1; + } + hypercall_page_initialise(d, addr); + + if ( is_pvh_domain(d) ) { + copy_pvh((void *)parms.virt_hypercall, tmp_buf, PAGE_SIZE); + xfree(tmp_buf); + } + } + + /* Free temporary buffers. */ + discard_initial_images(); + if ( initrd_len != 0 ) { si->mod_start = vinitrd_start ?: initrd_pfn; @@ -1019,6 +1136,15 @@ int __init construct_dom0( si->console.dom0.info_off = sizeof(struct start_info); si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); } + if ( is_pvh_domain(d) ) { + unsigned long mfn = virt_to_mfn(d->shared_info); + unsigned long pfn = shared_info_pfn_addr>>PAGE_SHIFT; + si->shared_info = shared_info_pfn_addr; + dom0_update_physmap(d, pfn, mfn, 0); + + copy_pvh((char *)vstartinfo_start, si_buf, PAGE_SIZE); + xfree(si_buf); + } if ( is_pv_32on64_domain(d) ) xlat_start_info(si, XLAT_start_info_console_dom0); @@ -1050,12 +1176,16 @@ int __init construct_dom0( regs->eip = parms.virt_entry; regs->esp = vstack_end; regs->esi = vstartinfo_start; - regs->eflags = X86_EFLAGS_IF; + regs->eflags = X86_EFLAGS_IF | 0x2; - if ( opt_dom0_shadow ) + if ( opt_dom0_shadow ) { + if ( is_pvh_domain(d) ) { + printk("Invalid option dom0_shadow for PVH\n"); + return -EINVAL; + } if ( paging_enable(d, PG_SH_enable) == 0 ) paging_update_paging_modes(v); - + } if ( supervisor_mode_kernel ) { v->arch.pv_vcpu.kernel_ss &= ~3; @@ -1132,6 +1262,9 @@ int __init construct_dom0( BUG_ON(rc != 0); + if ( is_pvh_domain(d) ) + pvh_map_all_iomem(d); + iommu_dom0_init(dom0); return 0; diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index 055833d..d3d5697 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -574,6 +574,20 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, } } +/* Resize hap table. Copied from: libxl_get_required_shadow_memory() */ +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages) +{ + int rc; + unsigned long memkb = num_pages * (PAGE_SIZE / 1024); + + memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024)); + num_pages = ((memkb+1023)/1024) << (20 - PAGE_SHIFT); + paging_lock(d); + rc = hap_set_allocation(d, num_pages, NULL); + paging_unlock(d); + BUG_ON(rc); +} + static const struct paging_mode hap_paging_real_mode; static const struct paging_mode hap_paging_protected_mode; static const struct paging_mode hap_paging_pae_mode; @@ -633,7 +647,8 @@ static void hap_update_cr3(struct vcpu *v, int do_locking) const struct paging_mode * hap_paging_get_mode(struct vcpu *v) { - return !hvm_paging_enabled(v) ? &hap_paging_real_mode : + return is_pvh_vcpu(v) ? &hap_paging_long_mode : + !hvm_paging_enabled(v) ? &hap_paging_real_mode : hvm_long_mode_enabled(v) ? &hap_paging_long_mode : hvm_pae_enabled(v) ? &hap_paging_pae_mode : &hap_paging_protected_mode; diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 43301a5..f307f24 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -60,6 +60,10 @@ integer_param("maxcpus", max_cpus); static bool_t __initdata disable_smep; invbool_param("smep", disable_smep); +/* Boot dom0 in PVH mode */ +static bool_t __initdata opt_dom0pvh; +boolean_param("dom0pvh", opt_dom0pvh); + /* **** Linux config option: propagated to domain0. */ /* "acpi=off": Sisables both ACPI table parsing and interpreter. */ /* "acpi=force": Override the disable blacklist. */ @@ -545,7 +549,7 @@ void __init __start_xen(unsigned long mbi_p) { char *memmap_type = NULL; char *cmdline, *kextra, *loader; - unsigned int initrdidx; + unsigned int initrdidx, domcr_flags = 0; multiboot_info_t *mbi = __va(mbi_p); module_t *mod = (module_t *)__va(mbi->mods_addr); unsigned long nr_pages, modules_headroom, *module_map; @@ -1314,7 +1318,9 @@ void __init __start_xen(unsigned long mbi_p) panic("Could not protect TXT memory regions\n"); /* Create initial domain 0. */ - dom0 = domain_create(0, DOMCRF_s3_integrity, 0); + domcr_flags = (opt_dom0pvh ? DOMCRF_pvh | DOMCRF_hap : 0); + domcr_flags |= DOMCRF_s3_integrity; + dom0 = domain_create(0, domcr_flags, 0); if ( IS_ERR(dom0) || (alloc_dom0_vcpu0() == NULL) ) panic("Error creating domain 0\n"); diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h index e03f983..aab8558 100644 --- a/xen/include/asm-x86/hap.h +++ b/xen/include/asm-x86/hap.h @@ -63,6 +63,7 @@ int hap_track_dirty_vram(struct domain *d, XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); extern const struct paging_mode *hap_paging_get_mode(struct vcpu *); +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages); #endif /* XEN_HAP_H */ -- 1.7.2.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |