[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/2] Xen PV support for hugepages
This patch adds support to Xen for hugepages in a PV environment. The patch is against the latest xen-unstable tree on xenbits.xensource.com. It must be enabled via the command-line option "allowhugepage". It is assumed that the guest has guaranteed that the hugepage is physically aligned and contiguous. There is no support yet for save/restore/migrate. Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx> ---- --- xen-unstable//./xen/include/asm-x86/x86_32/page.h 2008-07-17 09:49:27.000000000 -0500 +++ xen-hpage/./xen/include/asm-x86/x86_32/page.h 2008-10-02 15:07:34.000000000 -0500 @@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL. * Permit the NX bit if the hardware supports it. */ -#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX) +#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) --- xen-unstable//./xen/include/asm-x86/x86_64/page.h 2008-10-02 14:23:17.000000000 -0500 +++ xen-hpage/./xen/include/asm-x86/x86_64/page.h 2008-10-02 15:07:34.000000000 -0500 @@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t; * Permit the NX bit if the hardware supports it. * Note that range [62:52] is available for software use on x86/64. */ -#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX) +#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) --- xen-unstable//./xen/arch/x86/mm.c 2008-10-02 14:23:17.000000000 -0500 +++ xen-hpage/./xen/arch/x86/mm.c 2008-10-09 09:07:47.000000000 -0500 @@ -160,6 +160,9 @@ unsigned long total_pages; #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) +static int opt_allow_hugepage = 0; +boolean_param("allowhugepage", opt_allow_hugepage); + #define l1_disallow_mask(d) \ ((d != dom_io) && \ (rangeset_is_empty((d)->iomem_caps) && \ @@ -584,6 +587,26 @@ static int get_page_and_type_from_pagenr return rc; } +static int get_data_page(struct page_info *page, struct domain *d, int writeable) +{ + int rc; + + if ( writeable ) + rc = get_page_and_type(page, d, PGT_writable_page); + else + rc = get_page(page, d); + + return rc; +} + +static void put_data_page(struct page_info *page, int writeable) +{ + if ( writeable ) + put_page_and_type(page); + else + put_page(page); +} + /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -656,6 +679,7 @@ get_page_from_l1e( struct vcpu *curr = current; struct domain *owner; int okay; + int writeable; if ( !(l1f & _PAGE_PRESENT) ) return 1; @@ -698,10 +722,9 @@ get_page_from_l1e( * contribute to writeable mapping refcounts. (This allows the * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ - okay = (((l1f & _PAGE_RW) && - !(unlikely(paging_mode_external(d) && (d != curr->domain)))) - ? get_page_and_type(page, d, PGT_writable_page) - : get_page(page, d)); + writeable = (l1f & _PAGE_RW) && + !( unlikely(paging_mode_external(d) && (d != curr->domain)) ); + okay = get_data_page(page, d, writeable); if ( !okay ) { MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte @@ -759,11 +782,43 @@ get_page_from_l2e( MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return -EINVAL; } + if ( l2e_get_flags(l2e) & _PAGE_PSE ) + { + unsigned long mfn = l2e_get_pfn(l2e); + unsigned long m, me; + struct page_info *page = mfn_to_page(mfn); + int writeable; - rc = get_page_and_type_from_pagenr( - l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); - if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) - rc = 0; + if ( !opt_allow_hugepage ) + return -EINVAL; + + writeable = l2e_get_flags(l2e) & _PAGE_RW; + + rc = get_data_page(page, d, writeable); + if ( unlikely(!rc) ) + return rc; + + for ( m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ ) + { + rc = get_data_page(mfn_to_page(m), d, writeable); + if ( unlikely(!rc) ) + { + for ( --m; m > mfn; --m ) + put_data_page(mfn_to_page(m), writeable); + put_data_page(page, writeable); + return 0; + } + } +#ifdef __x86_64__ + map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES, + PAGE_HYPERVISOR | l2e_get_flags(l2e)); +#endif + } else { + rc = get_page_and_type_from_pagenr( + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); + if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) + rc = 0; + } return rc; } @@ -955,7 +1010,18 @@ static int put_page_from_l2e(l2_pgentry_ if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && (l2e_get_pfn(l2e) != pfn) ) { - put_page_and_type(l2e_get_page(l2e)); + if ( l2e_get_flags(l2e) & _PAGE_PSE ) + { + unsigned long mfn = l2e_get_pfn(l2e); + unsigned long m, me; + struct page_info *page = mfn_to_page(mfn); + int writeable = l2e_get_flags(l2e) & _PAGE_RW; + + for ( m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ ) + put_data_page(mfn_to_page(m), writeable); + put_data_page(page, writeable); + } else + put_page_and_type(l2e_get_page(l2e)); return 0; } return 1; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |