[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC 0 PATCH 3/3] PVH dom0: construct_dom0 changes
This patch changes construct_dom0 to boot in PVH mode. Changes need to support it are also included here. Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> --- xen/arch/x86/domain_build.c | 231 +++++++++++++++++++++++++++++++++++++++---- xen/arch/x86/domctl.c | 2 +- xen/arch/x86/mm/hap/hap.c | 14 +++ xen/include/asm-x86/hap.h | 1 + xen/include/xen/domain.h | 3 + 5 files changed, 231 insertions(+), 20 deletions(-) diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index 5125aa2..3fd2b6c 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -35,6 +35,7 @@ #include <asm/setup.h> #include <asm/bzimage.h> /* for bzimage_parse */ #include <asm/io_apic.h> +#include <asm/hap.h> #include <public/version.h> @@ -307,6 +308,136 @@ static void __init process_dom0_ioports_disable(void) } } +/* + * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have + * the entire io region mapped in the EPT/NPT. + * + * PVH FIXME: The following doesn't map MMIO ranges when they sit above the + * highest E820 covered address. + */ +static __init void pvh_map_all_iomem(struct domain *d) +{ + unsigned long start_pfn, end_pfn, end = 0, start = 0; + const struct e820entry *entry; + unsigned int i, nump; + int rc; + + for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ ) + { + end = entry->addr + entry->size; + + if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE || + i == e820.nr_map - 1 ) + { + start_pfn = PFN_DOWN(start); + end_pfn = PFN_UP(end); + + if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ) + end_pfn = PFN_UP(entry->addr); + + if ( start_pfn < end_pfn ) + { + nump = end_pfn - start_pfn; + /* Add pages to the mapping */ + rc = domctl_memory_mapping(d, start_pfn, start_pfn, nump, 1); + BUG_ON(rc); + } + start = end; + } + } + + /* If the e820 ended under 4GB, we must map the remaining space upto 4GB */ + if ( end < GB(4) ) + { + start_pfn = PFN_UP(end); + end_pfn = (GB(4)) >> PAGE_SHIFT; + nump = end_pfn - start_pfn; + rc = domctl_memory_mapping(d, start_pfn, start_pfn, nump, 1); + BUG_ON(rc); + } +} + +static __init void dom0_update_physmap(struct domain *d, unsigned long pfn, + unsigned long mfn, unsigned long vphysmap_s) +{ + if ( is_pvh_domain(d) ) + { + int rc = guest_physmap_add_page(d, pfn, mfn, 0); + BUG_ON(rc); + return; + } + if ( !is_pv_32on64_domain(d) ) + ((unsigned long *)vphysmap_s)[pfn] = mfn; + else + ((unsigned int *)vphysmap_s)[pfn] = mfn; + + set_gpfn_from_mfn(mfn, pfn); +} + +static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v, + unsigned long v_start) +{ + int i, j, k; + l4_pgentry_t *l4tab = NULL, *l4start = NULL; + l3_pgentry_t *l3tab = NULL; + l2_pgentry_t *l2tab = NULL; + l1_pgentry_t *l1tab = NULL; + l4_pgentry_t sav_guest_l4; + unsigned long cr3_pfn; + + ASSERT(paging_mode_enabled(v->domain)); + + l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table)); + l4tab = l4start + l4_table_offset(v_start); + sav_guest_l4 = *l4tab; + + /* Give guest a clean slate to start with */ + clear_page(l4start); + *l4tab = sav_guest_l4; + BUG_ON(!l4e_get_pfn(sav_guest_l4)); + + l3tab = map_l3t_from_l4e(*l4tab); + for (i=0; i < PAGE_SIZE / sizeof(l3_pgentry_t); i++, l3tab++) + { + if ( l3e_get_pfn(*l3tab) == 0 ) + continue; + + l2tab = map_l2t_from_l3e(*l3tab); + for (j=0; j < PAGE_SIZE / sizeof(l2_pgentry_t); j++, l2tab++) + { + if ( l2e_get_pfn(*l2tab) == 0 ) + continue; + + l1tab = map_l1t_from_l2e(*l2tab); + + for (k=0; k < PAGE_SIZE / sizeof(l2_pgentry_t); k++, l1tab++) + { + if ( l1e_get_pfn(*l1tab) == 0 ) + continue; + + *l1tab = l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*l1tab)), + l1e_get_flags(*l1tab)); + } + *l2tab = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*l2tab)), + l2e_get_flags(*l2tab)); + } + *l3tab = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*l3tab)), + l3e_get_flags(*l3tab)); + } + *l4tab = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*l4tab)), + l4e_get_flags(*l4tab)); + + cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3)); + v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn); + + /* + * now we update the paging modes (hap_update_paging_modes). This will + * create monitor_table for us, update v->arch.cr3, and vmcs.cr3. + */ + paging_update_paging_modes(v); + +} + static __init void mark_pv_pt_pages_rdonly(struct domain *d, l4_pgentry_t *l4start, unsigned long vpt_start, @@ -513,7 +644,7 @@ int __init construct_dom0( unsigned long alloc_spfn; unsigned long alloc_epfn; unsigned long initrd_pfn = -1, initrd_mfn = 0; - unsigned long count; + unsigned long count, shared_info_paddr = 0; struct page_info *page = NULL; start_info_t *si; struct vcpu *v = d->vcpu[0]; @@ -526,6 +657,7 @@ int __init construct_dom0( l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; + u32 save_pvh_pg_mode = 0; /* * This fully describes the memory layout of the initial domain. All @@ -603,12 +735,20 @@ int __init construct_dom0( goto out; } - if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE && - !test_bit(XENFEAT_dom0, parms.f_supported) ) + if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE ) { - printk("Kernel does not support Dom0 operation\n"); - rc = -EINVAL; - goto out; + if ( !test_bit(XENFEAT_dom0, parms.f_supported) ) + { + printk("Kernel does not support Dom0 operation\n"); + rc = -EINVAL; + goto out; + } + if ( is_pvh_domain(d) && + !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) ) + { + printk("Kernel does not support PVH mode\n"); + return -EINVAL; + } } if ( compat32 ) @@ -673,6 +813,14 @@ int __init construct_dom0( vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + sizeof(struct dom0_vga_console_info)); + + if ( is_pvh_domain(d) ) + { + /* note, following is paddr and not maddr */ + shared_info_paddr = round_pgup(vstartinfo_end) - v_start; + vstartinfo_end += PAGE_SIZE; + } + vpt_start = round_pgup(vstartinfo_end); for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { @@ -868,6 +1016,9 @@ int __init construct_dom0( L1_PROT : COMPAT_L1_PROT)); l1tab++; + if ( is_pvh_domain(d) ) + continue; + page = mfn_to_page(mfn); if ( (page->u.inuse.type_info == 0) && !get_page_and_type(page, d, PGT_writable_page) ) @@ -912,6 +1063,16 @@ int __init construct_dom0( (void)alloc_vcpu(d, i, cpu); } + /* + * pvh: we temporarily disable paging mode so that we can build cr3 needed + * to run on dom0's page tables. + */ + if ( is_pvh_domain(d) ) + { + save_pvh_pg_mode = d->arch.paging.mode; + d->arch.paging.mode = 0; + } + /* Set up CR3 value for write_ptbase */ if ( paging_mode_enabled(d) ) paging_update_paging_modes(v); @@ -974,6 +1135,17 @@ int __init construct_dom0( setup_pv_p2m_table(d, v, &parms, v_start, vphysmap_start, vphysmap_end, v_end, nr_pages); + if ( is_pvh_domain(d) ) + { + hap_set_pvh_alloc_for_dom0(d, nr_pages); + + /* + * We enable paging mode again so guest_physmap_add_page will do the + * right thing for us. + */ + d->arch.paging.mode = save_pvh_pg_mode; + } + /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < count; pfn++ ) { @@ -990,11 +1162,7 @@ int __init construct_dom0( if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + dom0_update_physmap(d, pfn, mfn, vphysmap_start); if (!(pfn & 0xfffff)) process_pending_softirqs(); } @@ -1010,8 +1178,8 @@ int __init construct_dom0( if ( !page->u.inuse.type_info && !get_page_and_type(page, d, PGT_writable_page) ) BUG(); - ((unsigned long *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + + dom0_update_physmap(d, pfn, mfn, vphysmap_start); ++pfn; if (!(pfn & 0xfffff)) process_pending_softirqs(); @@ -1031,11 +1199,7 @@ int __init construct_dom0( #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + dom0_update_physmap(d, pfn, mfn, vphysmap_start); #undef pfn page++; pfn++; if (!(pfn & 0xfffff)) @@ -1059,6 +1223,15 @@ int __init construct_dom0( si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); } + /* + * PVH: We need to update si->shared_info while we are on dom0 page tables, + * but need to defer the p2m update until after we have fixed up the + * page tables for PVH so that the m2p for the si pte entry returns + * correct pfn. + */ + if ( is_pvh_domain(d) ) + si->shared_info = shared_info_paddr; + if ( is_pv_32on64_domain(d) ) xlat_start_info(si, XLAT_start_info_console_dom0); @@ -1089,11 +1262,18 @@ int __init construct_dom0( regs->eip = parms.virt_entry; regs->esp = vstack_end; regs->esi = vstartinfo_start; - regs->eflags = X86_EFLAGS_IF; + regs->eflags = X86_EFLAGS_IF | 0x2; if ( opt_dom0_shadow ) + { + if ( is_pvh_domain(d) ) + { + printk("Invalid option dom0_shadow for PVH\n"); + return -EINVAL; + } if ( paging_enable(d, PG_SH_enable) == 0 ) paging_update_paging_modes(v); + } if ( supervisor_mode_kernel ) { @@ -1183,6 +1363,19 @@ int __init construct_dom0( printk(" Xen warning: dom0 kernel broken ELF: %s\n", elf_check_broken(&elf)); + if ( is_pvh_domain(d) ) + { + /* finally, fixup the page table, replacing mfns with pfns */ + pvh_fixup_page_tables_for_hap(v, v_start); + + /* the pt has correct pfn for si, now update the mfn in the p2m */ + mfn = virt_to_mfn(d->shared_info); + pfn = shared_info_paddr >> PAGE_SHIFT; + dom0_update_physmap(d, pfn, mfn, 0); + + pvh_map_all_iomem(d); + } + iommu_dom0_init(dom0); return 0; diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index fe7ca00..998827c 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -46,7 +46,7 @@ static int gdbsx_guest_mem_io( return (iop->remain ? -EFAULT : 0); } -static long domctl_memory_mapping(struct domain *d, unsigned long gfn, +long domctl_memory_mapping(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned long nr_mfns, bool_t add) { diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index bff05d9..2f5a48b 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -580,6 +580,20 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, } } +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages) +{ + int rc; + unsigned long memkb = num_pages * (PAGE_SIZE / 1024); + + /* Copied from: libxl_get_required_shadow_memory() */ + memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024)); + num_pages = ((memkb+1023)/1024) << (20 - PAGE_SHIFT); + paging_lock(d); + rc = hap_set_allocation(d, num_pages, NULL); + paging_unlock(d); + BUG_ON(rc); +} + static const struct paging_mode hap_paging_real_mode; static const struct paging_mode hap_paging_protected_mode; static const struct paging_mode hap_paging_pae_mode; diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h index e03f983..aab8558 100644 --- a/xen/include/asm-x86/hap.h +++ b/xen/include/asm-x86/hap.h @@ -63,6 +63,7 @@ int hap_track_dirty_vram(struct domain *d, XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); extern const struct paging_mode *hap_paging_get_mode(struct vcpu *); +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages); #endif /* XEN_HAP_H */ diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index a057069..6436bab 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -89,4 +89,7 @@ extern unsigned int xen_processor_pmbits; extern bool_t opt_dom0_vcpus_pin; +extern long domctl_memory_mapping(struct domain *d, unsigned long gfn, + unsigned long mfn, unsigned long nr_mfns, bool_t add_map); + #endif /* __XEN_DOMAIN_H__ */ -- 1.7.2.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |