[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH][3/4] Enable 1GB for Xen HVM host page
This patch changes P2M code to works with 1GB page now. Signed-off-by: Wei Huang <wei.huang2@xxxxxxx> Acked-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx> # HG changeset patch # User huangwei@xxxxxxxxxxxxxxxx # Date 1266853453 21600 # Node ID 72075d4fc39e8cd11a06bff4eb66521ab8fe952b # Parent c9b45664b423e11003358944bb8e6e976e735301 fix p2m code to support 1GB pages diff -r c9b45664b423 -r 72075d4fc39e xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Mon Feb 22 09:44:09 2010 -0600 +++ b/xen/arch/x86/mm/p2m.c Mon Feb 22 09:44:13 2010 -0600 @@ -187,7 +187,36 @@ ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE)); - /* split single large page into 4KB page in P2M table */ + /* split 1GB pages into 2MB pages */ + if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + { + unsigned long flags, pfn; + struct page_info *pg = d->arch.p2m->alloc_page(d); + if ( pg == NULL ) + return 0; + page_list_add_tail(pg, &d->arch.p2m->pages); + pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated; + pg->count_info = 1; + + flags = l1e_get_flags(*p2m_entry); + pfn = l1e_get_pfn(*p2m_entry); + + l1_entry = map_domain_page(mfn_x(page_to_mfn(pg))); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + { + new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags); + paging_write_p2m_entry(d, gfn, l1_entry+i, *table_mfn, new_entry, + 2); + } + unmap_domain_page(l1_entry); + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), + __PAGE_HYPERVISOR|_PAGE_USER); //disable PSE + paging_write_p2m_entry(d, gfn, + p2m_entry, *table_mfn, new_entry, 3); + } + + + /* split single 2MB large page into 4KB page in P2M table */ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; @@ -1064,6 +1093,19 @@ if ( unlikely(d->is_dying) ) goto out_fail; + /* Because PoD does not have cache list for 1GB pages, it has to remap + * 1GB region to 2MB chunks for a retry. */ + if ( order == 18 ) + { + gfn_aligned = (gfn >> order) << order; + for( i = 0; i < (1 << order); i += (1 << 9) ) + set_p2m_entry(d, gfn_aligned + i, _mfn(POPULATE_ON_DEMAND_MFN), 9, + p2m_populate_on_demand); + audit_p2m(d); + p2m_unlock(p2md); + return 0; + } + /* If we're low, start a sweep */ if ( order == 9 && page_list_empty(&p2md->pod.super) ) p2m_pod_emergency_sweep_super(d); @@ -1196,6 +1238,7 @@ l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; l2_pgentry_t l2e_content; + l3_pgentry_t l3e_content; int rv=0; if ( tb_init_done ) @@ -1222,18 +1265,44 @@ goto out; #endif /* + * Try to allocate 1GB page table if this feature is supported. + * * When using PAE Xen, we only allow 33 bits of pseudo-physical * address in translated guests (i.e. 8 GBytes). This restriction * comes from wanting to map the P2M table into the 16MB RO_MPT hole * in Xen's address space for translated PV guests. * When using AMD's NPT on PAE Xen, we are restricted to 4GB. */ - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L3_PAGETABLE_SHIFT - PAGE_SHIFT, - ((CONFIG_PAGING_LEVELS == 3) - ? (d->arch.hvm_domain.hap_enabled ? 4 : 8) - : L3_PAGETABLE_ENTRIES), - PGT_l2_page_table) ) + if ( page_order == 18 ) + { + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + L3_PAGETABLE_SHIFT - PAGE_SHIFT, + L3_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && + !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + { + P2M_ERROR("configure P2M table L3 entry with large page\n"); + domain_crash(d); + goto out; + } + + if ( mfn_valid(mfn) ) + l3e_content = l3e_from_pfn(mfn_x(mfn), + p2m_type_to_flags(p2mt) | _PAGE_PSE); + else + l3e_content = l3e_empty(); + + entry_content.l1 = l3e_content.l3; + paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 3); + + } + else if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L3_PAGETABLE_SHIFT - PAGE_SHIFT, + ((CONFIG_PAGING_LEVELS == 3) + ? (d->arch.hvm_domain.hap_enabled ? 4 : 8) + : L3_PAGETABLE_ENTRIES), + PGT_l2_page_table) ) goto out; if ( page_order == 0 ) @@ -1255,7 +1324,7 @@ /* level 1 entry */ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1); } - else + else if ( page_order == 9 ) { p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, @@ -1352,11 +1421,34 @@ #else l3e += l3_table_offset(addr); #endif +pod_retry_l3: if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) { + if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand ) + { + if ( q != p2m_query ) + { + if ( !p2m_pod_demand_populate(d, gfn, 18, q) ) + goto pod_retry_l3; + } + else + *t = p2m_populate_on_demand; + } unmap_domain_page(l3e); return _mfn(INVALID_MFN); } + else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) + { + mfn = _mfn(l3e_get_pfn(*l3e) + + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + + l1_table_offset(addr)); + *t = p2m_flags_to_type(l3e_get_flags(*l3e)); + unmap_domain_page(l3e); + + ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); + return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); + } + mfn = _mfn(l3e_get_pfn(*l3e)); unmap_domain_page(l3e); } @@ -1437,10 +1529,57 @@ { l1_pgentry_t l1e = l1e_empty(), *p2m_entry; l2_pgentry_t l2e = l2e_empty(); + l3_pgentry_t l3e = l3e_empty(); int ret; ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); + + /* + * Read & process L3 + */ + p2m_entry = (l1_pgentry_t *) + &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START) + + l3_linear_offset(addr)]; + pod_retry_l3: + ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e)); + + if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + { + if ( (l3e_get_flags(l3e) & _PAGE_PSE) && + (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) ) + { + /* The read has succeeded, so we know that mapping exists */ + if ( q != p2m_query ) + { + if ( !p2m_pod_demand_populate(current->domain, gfn, 18, q) ) + goto pod_retry_l3; + p2mt = p2m_invalid; + printk("%s: Allocate 1GB failed!\n", __func__); + goto out; + } + else + { + p2mt = p2m_populate_on_demand; + goto out; + } + } + goto pod_retry_l2; + } + + if ( l3e_get_flags(l3e) & _PAGE_PSE ) + { + p2mt = p2m_flags_to_type(l3e_get_flags(l3e)); + ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt)); + if (p2m_is_valid(p2mt) ) + mfn = _mfn(l3e_get_pfn(l3e) + + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + + l1_table_offset(addr)); + else + p2mt = p2m_mmio_dm; + + goto out; + } /* * Read & process L2 @@ -1596,8 +1735,10 @@ while ( todo ) { if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled ) - order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ? - 9 : 0; + order = ( (((gfn | mfn_x(mfn) | todo) & ((1ul << 18) - 1)) == 0) ) ? + 18 : + (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ? 9 : 0; + else order = 0; if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) ) @@ -1867,6 +2008,31 @@ gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } + + /* check for 1GB super page */ + if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE ) + { + mfn = l3e_get_pfn(l3e[i3]); + ASSERT(mfn_valid(_mfn(mfn))); + /* we have to cover 512x512 4K pages */ + for ( i2 = 0; + i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES); + i2++) + { + m2pfn = get_gpfn_from_mfn(mfn+i2); + if ( m2pfn != (gfn + i2) ) + { + pmbad++; + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" + " -> gfn %#lx\n", gfn+i2, mfn+i2, + m2pfn); + BUG(); + } + gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + } + l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { @@ -2224,7 +2390,7 @@ l1_pgentry_t l1e_content; l1_pgentry_t *l1e; l2_pgentry_t *l2e; - mfn_t l1mfn, l2mfn; + mfn_t l1mfn, l2mfn, l3mfn; unsigned long i1, i2, i3; l3_pgentry_t *l3e; #if CONFIG_PAGING_LEVELS == 4 @@ -2245,6 +2411,7 @@ #if CONFIG_PAGING_LEVELS == 4 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); #else /* CONFIG_PAGING_LEVELS == 3 */ + l3mfn = _mfn(mfn_x(pagetable_get_mfn(d->arch.phys_table))); l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); #endif @@ -2255,6 +2422,7 @@ { continue; } + l3mfn = _mfn(l4e_get_pfn(l4e[i4])); l3e = map_domain_page(l4e_get_pfn(l4e[i4])); #endif for ( i3 = 0; @@ -2265,6 +2433,20 @@ { continue; } + if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) ) + { + flags = l3e_get_flags(l3e[i3]); + if ( p2m_flags_to_type(flags) != ot ) + continue; + mfn = l3e_get_pfn(l3e[i3]); + gfn = get_gpfn_from_mfn(mfn); + flags = p2m_type_to_flags(nt); + l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); + paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l3e[i3], + l3mfn, l1e_content, 3); + continue; + } + l2mfn = _mfn(l3e_get_pfn(l3e[i3])); l2e = map_domain_page(l3e_get_pfn(l3e[i3])); for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |