x86: properly use map_domain_page() during domain creation/destruction This involves no longer storing virtual addresses of the per-domain mapping L2 and L3 page tables. Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -289,9 +289,10 @@ static int setup_compat_l4(struct vcpu * /* This page needs to look like a pagetable so that it can be shadowed */ pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; - l4tab = page_to_virt(pg); + l4tab = __map_domain_page(pg); clear_page(l4tab); init_guest_l4_table(l4tab, v->domain); + unmap_domain_page(l4tab); v->arch.guest_table = pagetable_from_page(pg); v->arch.guest_table_user = v->arch.guest_table; @@ -383,17 +384,22 @@ int vcpu_initialise(struct vcpu *v) v->arch.flags = TF_kernel_mode; - idx = perdomain_pt_pgidx(v); - if ( !perdomain_pt_page(d, idx) ) + idx = perdomain_pt_idx(v); + if ( !d->arch.perdomain_pts[idx] ) { - struct page_info *pg; - pg = alloc_domheap_page(NULL, MEMF_node(vcpu_to_node(v))); - if ( !pg ) + void *pt; + l2_pgentry_t *l2tab; + + pt = alloc_xenheap_pages(0, MEMF_node(vcpu_to_node(v))); + if ( !pt ) return -ENOMEM; - clear_page(page_to_virt(pg)); - perdomain_pt_page(d, idx) = pg; - d->arch.mm_perdomain_l2[0][l2_table_offset(PERDOMAIN_VIRT_START)+idx] - = l2e_from_page(pg, __PAGE_HYPERVISOR); + clear_page(pt); + d->arch.perdomain_pts[idx] = pt; + + l2tab = __map_domain_page(d->arch.perdomain_l2_pg[0]); + l2tab[l2_table_offset(PERDOMAIN_VIRT_START) + idx] + = l2e_from_paddr(__pa(pt), __PAGE_HYPERVISOR); + unmap_domain_page(l2tab); } rc = mapcache_vcpu_init(v); @@ -484,6 +490,7 @@ void vcpu_destroy(struct vcpu *v) int arch_domain_create(struct domain *d, unsigned int domcr_flags) { struct page_info *pg; + l3_pgentry_t *l3tab; int i, paging_initialised = 0; int rc = -ENOMEM; @@ -514,28 +521,29 @@ int arch_domain_create(struct domain *d, d->domain_id); } - BUILD_BUG_ON(PDPT_L2_ENTRIES * sizeof(*d->arch.mm_perdomain_pt_pages) + BUILD_BUG_ON(PDPT_L2_ENTRIES * sizeof(*d->arch.perdomain_pts) != PAGE_SIZE); - pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); - if ( !pg ) + d->arch.perdomain_pts = + alloc_xenheap_pages(0, MEMF_node(domain_to_node(d))); + if ( !d->arch.perdomain_pts ) goto fail; - d->arch.mm_perdomain_pt_pages = page_to_virt(pg); - clear_page(d->arch.mm_perdomain_pt_pages); + clear_page(d->arch.perdomain_pts); pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); if ( pg == NULL ) goto fail; - d->arch.mm_perdomain_l2[0] = page_to_virt(pg); - clear_page(d->arch.mm_perdomain_l2[0]); + d->arch.perdomain_l2_pg[0] = pg; + clear_domain_page(page_to_mfn(pg)); pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); if ( pg == NULL ) goto fail; - d->arch.mm_perdomain_l3 = page_to_virt(pg); - clear_page(d->arch.mm_perdomain_l3); - d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] = - l3e_from_pfn(virt_to_mfn(d->arch.mm_perdomain_l2[0]), - __PAGE_HYPERVISOR); + d->arch.perdomain_l3_pg = pg; + l3tab = __map_domain_page(pg); + clear_page(l3tab); + l3tab[l3_table_offset(PERDOMAIN_VIRT_START)] = + l3e_from_page(d->arch.perdomain_l2_pg[0], __PAGE_HYPERVISOR); + unmap_domain_page(l3tab); mapcache_domain_init(d); @@ -611,12 +619,12 @@ int arch_domain_create(struct domain *d, if ( paging_initialised ) paging_final_teardown(d); mapcache_domain_exit(d); - if ( d->arch.mm_perdomain_l2[0] ) - free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0])); - if ( d->arch.mm_perdomain_l3 ) - free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); - if ( d->arch.mm_perdomain_pt_pages ) - free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages)); + for ( i = 0; i < PERDOMAIN_SLOTS; ++i) + if ( d->arch.perdomain_l2_pg[i] ) + free_domheap_page(d->arch.perdomain_l2_pg[i]); + if ( d->arch.perdomain_l3_pg ) + free_domheap_page(d->arch.perdomain_l3_pg); + free_xenheap_page(d->arch.perdomain_pts); return rc; } @@ -638,13 +646,12 @@ void arch_domain_destroy(struct domain * mapcache_domain_exit(d); for ( i = 0; i < PDPT_L2_ENTRIES; ++i ) - { - if ( perdomain_pt_page(d, i) ) - free_domheap_page(perdomain_pt_page(d, i)); - } - free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages)); - free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0])); - free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); + free_xenheap_page(d->arch.perdomain_pts[i]); + free_xenheap_page(d->arch.perdomain_pts); + for ( i = 0; i < PERDOMAIN_SLOTS; ++i) + if ( d->arch.perdomain_l2_pg[i] ) + free_domheap_page(d->arch.perdomain_l2_pg[i]); + free_domheap_page(d->arch.perdomain_l3_pg); free_xenheap_page(d->shared_info); cleanup_domain_irq_mapping(d); @@ -810,9 +817,10 @@ int arch_set_info_guest( fail |= xen_pfn_to_cr3(pfn) != c.nat->ctrlreg[1]; } } else { - l4_pgentry_t *l4tab = __va(pfn_to_paddr(pfn)); + l4_pgentry_t *l4tab = map_domain_page(pfn); pfn = l4e_get_pfn(*l4tab); + unmap_domain_page(l4tab); fail = compat_pfn_to_cr3(pfn) != c.cmp->ctrlreg[3]; } @@ -951,9 +959,10 @@ int arch_set_info_guest( return -EINVAL; } - l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); + l4tab = map_domain_page(pagetable_get_pfn(v->arch.guest_table)); *l4tab = l4e_from_pfn(page_to_mfn(cr3_page), _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + unmap_domain_page(l4tab); } if ( v->vcpu_id == 0 ) @@ -1971,12 +1980,13 @@ static int relinquish_memory( static void vcpu_destroy_pagetables(struct vcpu *v) { struct domain *d = v->domain; - unsigned long pfn; + unsigned long pfn = pagetable_get_pfn(v->arch.guest_table); if ( is_pv_32on64_vcpu(v) ) { - pfn = l4e_get_pfn(*(l4_pgentry_t *) - __va(pagetable_get_paddr(v->arch.guest_table))); + l4_pgentry_t *l4tab = map_domain_page(pfn); + + pfn = l4e_get_pfn(*l4tab); if ( pfn != 0 ) { @@ -1986,15 +1996,12 @@ static void vcpu_destroy_pagetables(stru put_page_and_type(mfn_to_page(pfn)); } - l4e_write( - (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), - l4e_empty()); + l4e_write(l4tab, l4e_empty()); v->arch.cr3 = 0; return; } - pfn = pagetable_get_pfn(v->arch.guest_table); if ( pfn != 0 ) { if ( paging_mode_refcounts(d) ) --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -241,6 +241,8 @@ void copy_domain_page(unsigned long dmfn int mapcache_domain_init(struct domain *d) { struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache; + l3_pgentry_t *l3tab; + l2_pgentry_t *l2tab; unsigned int i, bitmap_pages, memf = MEMF_node(domain_to_node(d)); unsigned long *end; @@ -251,14 +253,18 @@ int mapcache_domain_init(struct domain * return 0; dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1); - d->arch.mm_perdomain_l2[MAPCACHE_SLOT] = alloc_xenheap_pages(0, memf); - if ( !dcache->l1tab || !d->arch.mm_perdomain_l2[MAPCACHE_SLOT] ) + d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf); + if ( !dcache->l1tab || !d->arch.perdomain_l2_pg[MAPCACHE_SLOT] ) return -ENOMEM; - clear_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]); - d->arch.mm_perdomain_l3[l3_table_offset(MAPCACHE_VIRT_START)] = - l3e_from_paddr(__pa(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]), - __PAGE_HYPERVISOR); + clear_domain_page(page_to_mfn(d->arch.perdomain_l2_pg[MAPCACHE_SLOT])); + l3tab = __map_domain_page(d->arch.perdomain_l3_pg); + l3tab[l3_table_offset(MAPCACHE_VIRT_START)] = + l3e_from_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT], + __PAGE_HYPERVISOR); + unmap_domain_page(l3tab); + + l2tab = __map_domain_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]); BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 + 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)) > @@ -275,12 +281,16 @@ int mapcache_domain_init(struct domain * ASSERT(i <= MAPCACHE_L2_ENTRIES); dcache->l1tab[i] = alloc_xenheap_pages(0, memf); if ( !dcache->l1tab[i] ) + { + unmap_domain_page(l2tab); return -ENOMEM; + } clear_page(dcache->l1tab[i]); - d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] = - l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR); + l2tab[i] = l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR); } + unmap_domain_page(l2tab); + spin_lock_init(&dcache->lock); return 0; @@ -315,19 +325,21 @@ void mapcache_domain_exit(struct domain xfree(dcache->l1tab); } - free_xenheap_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]); } int mapcache_vcpu_init(struct vcpu *v) { struct domain *d = v->domain; struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache; + l2_pgentry_t *l2tab; unsigned long i; unsigned int memf = MEMF_node(vcpu_to_node(v)); if ( is_hvm_vcpu(v) || !dcache->l1tab ) return 0; + l2tab = __map_domain_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]); + while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES ) { unsigned int ents = dcache->entries + MAPCACHE_VCPU_ENTRIES; @@ -338,10 +350,13 @@ int mapcache_vcpu_init(struct vcpu *v) { dcache->l1tab[i] = alloc_xenheap_pages(0, memf); if ( !dcache->l1tab[i] ) + { + unmap_domain_page(l2tab); return -ENOMEM; + } clear_page(dcache->l1tab[i]); - d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] = - l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR); + l2tab[i] = l2e_from_paddr(__pa(dcache->l1tab[i]), + __PAGE_HYPERVISOR); } /* Populate bit maps. */ @@ -351,18 +366,22 @@ int mapcache_vcpu_init(struct vcpu *v) { struct page_info *pg = alloc_domheap_page(NULL, memf); + if ( pg ) + { + clear_domain_page(page_to_mfn(pg)); + *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR); + pg = alloc_domheap_page(NULL, memf); + } if ( !pg ) + { + unmap_domain_page(l2tab); return -ENOMEM; - clear_domain_page(page_to_mfn(pg)); - *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR); + } i = (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents)); pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)]; ASSERT(!l1e_get_flags(*pl1e)); - pg = alloc_domheap_page(NULL, memf); - if ( !pg ) - return -ENOMEM; clear_domain_page(page_to_mfn(pg)); *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR); } @@ -370,6 +389,8 @@ int mapcache_vcpu_init(struct vcpu *v) dcache->entries = ents; } + unmap_domain_page(l2tab); + /* Mark all maphash entries as not in use. */ BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES); for ( i = 0; i < MAPHASH_ENTRIES; i++ ) --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1322,9 +1322,9 @@ void init_guest_l4_table(l4_pgentry_t l4 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = - l4e_from_pfn(virt_to_mfn(l4tab), __PAGE_HYPERVISOR); + l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = - l4e_from_pfn(virt_to_mfn(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); + l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR); } static int alloc_l4_table(struct page_info *page, int preemptible) --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -369,7 +369,7 @@ static void hap_install_xen_entries_in_l /* Install the per-domain mappings for this domain */ l4e[l4_table_offset(PERDOMAIN_VIRT_START)] = - l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))), + l4e_from_pfn(mfn_x(page_to_mfn(d->arch.perdomain_l3_pg)), __PAGE_HYPERVISOR); /* Install a linear mapping */ --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -1449,7 +1449,7 @@ void sh_install_xen_entries_in_l4(struct /* Install the per-domain mappings for this domain */ sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] = - shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)), + shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg), __PAGE_HYPERVISOR); /* Shadow linear mapping for 4-level shadows. N.B. for 3-level --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -823,9 +823,8 @@ void __init setup_idle_pagetable(void) { /* Install per-domain mappings for idle domain. */ l4e_write(&idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)], - l4e_from_page( - virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3), - __PAGE_HYPERVISOR)); + l4e_from_page(idle_vcpu[0]->domain->arch.perdomain_l3_pg, + __PAGE_HYPERVISOR)); } void __init zap_low_mappings(void) @@ -850,21 +849,18 @@ void *compat_arg_xlat_virt_base(void) int setup_compat_arg_xlat(struct vcpu *v) { unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); - struct page_info *pg; - pg = alloc_domheap_pages(NULL, order, 0); - if ( pg == NULL ) - return -ENOMEM; + v->arch.compat_arg_xlat = alloc_xenheap_pages(order, + MEMF_node(vcpu_to_node(v))); - v->arch.compat_arg_xlat = page_to_virt(pg); - return 0; + return v->arch.compat_arg_xlat ? 0 : -ENOMEM; } void free_compat_arg_xlat(struct vcpu *v) { unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); - if ( v->arch.compat_arg_xlat != NULL ) - free_domheap_pages(virt_to_page(v->arch.compat_arg_xlat), order); + + free_xenheap_pages(v->arch.compat_arg_xlat, order); v->arch.compat_arg_xlat = NULL; } --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -241,9 +241,9 @@ struct pv_domain struct arch_domain { - struct page_info **mm_perdomain_pt_pages; - l2_pgentry_t *mm_perdomain_l2[PERDOMAIN_SLOTS]; - l3_pgentry_t *mm_perdomain_l3; + void **perdomain_pts; + struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS]; + struct page_info *perdomain_l3_pg; unsigned int hv_compat_vstart; @@ -318,13 +318,11 @@ struct arch_domain #define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list)) #define has_arch_mmios(d) (!rangeset_is_empty((d)->iomem_caps)) -#define perdomain_pt_pgidx(v) \ +#define perdomain_pt_idx(v) \ ((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT)) #define perdomain_ptes(d, v) \ - ((l1_pgentry_t *)page_to_virt((d)->arch.mm_perdomain_pt_pages \ - [perdomain_pt_pgidx(v)]) + (((v)->vcpu_id << GDT_LDT_VCPU_SHIFT) & \ - (L1_PAGETABLE_ENTRIES - 1))) -#define perdomain_pt_page(d, n) ((d)->arch.mm_perdomain_pt_pages[n]) + ((l1_pgentry_t *)(d)->arch.perdomain_pts[perdomain_pt_idx(v)] + \ + (((v)->vcpu_id << GDT_LDT_VCPU_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))) struct pv_vcpu {