[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 3/8]: PVH: memory manager and paging related changes
--- arch/x86/xen/mmu.c | 179 ++++++++++++++++++++++++++++++++++++--- arch/x86/xen/mmu.h | 2 + include/xen/interface/memory.h | 27 ++++++- include/xen/interface/physdev.h | 10 ++ include/xen/xen-ops.h | 7 ++ 5 files changed, 211 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a761..44a6477 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval) __xen_set_pte(ptep, pteval); } +/* This for PV guest in hvm container */ +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, + int nr_mfns, int add_mapping) +{ + int rc; + struct physdev_map_iomem iomem; + + iomem.first_gfn = pfn; + iomem.first_mfn = mfn; + iomem.nr_mfns = nr_mfns; + iomem.add_mapping = add_mapping; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem); + BUG_ON(rc); +} + +/* This for PV guest in hvm container. + * We need this because during boot early_ioremap path eventually calls + * set_pte that maps io space. Also, ACPI pages are not mapped into to the + * EPT during dom0 creation. The pages are mapped initially here from + * kernel_physical_mapping_init() then later the memtype is changed. */ +static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval) +{ + native_set_pte(ptep, pteval); +} + +static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + native_set_pte(ptep, pteval); +} + static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { @@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void); static void __init xen_pagetable_setup_done(pgd_t *base) { xen_setup_shared_info(); + + if (xen_pvh_domain()) + return; + xen_post_allocator_init(); } @@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot) unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); + /* for PVH, page tables are native. */ + if (xen_pvh_domain()) + return; + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } @@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v) * but that's enough to get __va working. We need to fill in the rest * of the physical mapping once some sort of allocator has been set * up. + * NOTE: for PVH, the page tables are native with HAP required. */ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) @@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); + if (!xen_pvh_domain()) { + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + } l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); @@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); + if (!xen_pvh_domain()) { + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + } /* Switch over */ pgd = init_level4_pgt; @@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, * structure to attach it to, so make sure we just set kernel * pgd. */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); + if (xen_pvh_domain()) { + native_write_cr3(__pa(pgd)); + } else { + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + } memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); @@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { + x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; + + if (xen_pvh_domain()) { + pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; + + /* set_pte* for PCI devices to map iomem. */ + if (xen_initial_domain()) { + pv_mmu_ops.set_pte = xen_dom0pvh_set_pte; + pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at; + } + return; + } + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); @@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void) } #endif +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space + * creating new guest on PVH dom0 and needs to map domU pages. Called from + * exported function, so no need to export this. + */ +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn, + unsigned int domid) +{ + int rc; + struct xen_add_to_physmap pmb = {.foreign_domid = domid}; + + pmb.gpfn = lpfn; + pmb.idx = fgmfn; + pmb.space = XENMAPSPACE_gmfn_foreign; + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb); + if (rc) { + pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", + rc, lpfn, fgmfn); + return 1; + } + return 0; +} + +/* Unmap an entry from xen p2m table */ +int pvh_rem_xen_p2m(unsigned long spfn, int count) +{ + struct xen_remove_from_physmap xrp; + int i, rc; + + for (i=0; i < count; i++) { + xrp.domid = DOMID_SELF; + xrp.gpfn = spfn+i; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + if (rc) { + pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n", + spfn+i, rc, i); + return 1; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m); + +struct pvh_remap_data { + unsigned long fgmfn; /* foreign domain's gmfn */ + pgprot_t prot; + domid_t domid; + struct vm_area_struct *vma; +}; + +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + struct pvh_remap_data *pvhp = data; + struct xen_pvh_sav_pfn_info *savp = pvhp->vma->vm_private_data; + unsigned long pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo++]); + pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot)); + + native_set_pte(ptep, pteval); + if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid)) + return -EFAULT; + + return 0; +} + +/* The only caller at moment passes one gmfn at a time. + * PVH TBD/FIXME: expand this in future to honor batch requests. + */ +static int pvh_remap_gmfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long mfn, int nr, + pgprot_t prot, unsigned domid) +{ + int err; + struct pvh_remap_data pvhdata; + + if (nr > 1) + return -EINVAL; + + pvhdata.fgmfn = mfn; + pvhdata.prot = prot; + pvhdata.domid = domid; + pvhdata.vma = vma; + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + pvh_map_pte_fn, &pvhdata); + flush_tlb_all(); + return err; +} + #define REMAP_BATCH_SIZE 16 struct remap_data { @@ -2342,6 +2490,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == (VM_PFNMAP | VM_RESERVED | VM_IO))); + if (xen_pvh_domain()) { + /* We need to update the local page tables and the xen HAP */ + return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid); + } + rmd.mfn = mfn; rmd.prot = prot; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 73809bb..6d0bb56 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void); extern void xen_init_mmu_ops(void); extern void xen_hvm_init_mmu_ops(void); +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, + int nr_mfns, int add_mapping); #endif /* _XEN_MMU_H */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index eac3ce1..1b213b1 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -163,10 +163,19 @@ struct xen_add_to_physmap { /* Which domain to change the mapping for. */ domid_t domid; + /* Number of pages to go through for gmfn_range */ + uint16_t size; + /* Source mapping space. */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ - unsigned int space; +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */ + uint16_t space; + domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */ + +#define XENMAPIDX_grant_table_status 0x80000000 /* Index into source mapping space. */ unsigned long idx; @@ -234,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); * during a driver critical region. */ extern spinlock_t xen_reservation_lock; + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + unsigned long gpfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); + #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 9ce788d..80f792e 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -258,6 +258,16 @@ struct physdev_pci_device { uint8_t devfn; }; +#define PHYSDEVOP_pvh_map_iomem 29 +struct physdev_map_iomem { + /* IN */ + uint64_t first_gfn; + uint64_t first_mfn; + uint32_t nr_mfns; + uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */ + +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 6a198e4..fa595e1 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -29,4 +29,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long mfn, int nr, pgprot_t prot, unsigned domid); +struct xen_pvh_sav_pfn_info { + struct page **sp_paga; /* save pfn (info) page array */ + int sp_num_pgs; + int sp_next_todo; +}; +extern int pvh_rem_xen_p2m(unsigned long spfn, int count); + #endif /* INCLUDE_XEN_OPS_H */ -- 1.7.2.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |