[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC PATCH 3/8]: PVH: memory manager and paging related changes
On Thu, 16 Aug 2012, Mukesh Rathor wrote: > arch/x86/xen/mmu.c | 179 > ++++++++++++++++++++++++++++++++++++--- > arch/x86/xen/mmu.h | 2 + > include/xen/interface/memory.h | 27 ++++++- > include/xen/interface/physdev.h | 10 ++ > include/xen/xen-ops.h | 7 ++ > 5 files changed, 211 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c > index b65a761..44a6477 100644 > --- a/arch/x86/xen/mmu.c > +++ b/arch/x86/xen/mmu.c > @@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval) > __xen_set_pte(ptep, pteval); > } > > +/* This for PV guest in hvm container */ > +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, > + int nr_mfns, int add_mapping) > +{ > + int rc; > + struct physdev_map_iomem iomem; > + > + iomem.first_gfn = pfn; > + iomem.first_mfn = mfn; > + iomem.nr_mfns = nr_mfns; > + iomem.add_mapping = add_mapping; > + > + rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem); > + BUG_ON(rc); > +} > + > +/* This for PV guest in hvm container. > + * We need this because during boot early_ioremap path eventually calls > + * set_pte that maps io space. Also, ACPI pages are not mapped into to the > + * EPT during dom0 creation. The pages are mapped initially here from > + * kernel_physical_mapping_init() then later the memtype is changed. */ > +static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval) > +{ > + native_set_pte(ptep, pteval); > +} > + > +static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long addr, > + pte_t *ptep, pte_t pteval) > +{ > + native_set_pte(ptep, pteval); > +} > + > static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, > pte_t *ptep, pte_t pteval) > { > @@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void); > static void __init xen_pagetable_setup_done(pgd_t *base) > { > xen_setup_shared_info(); > + > + if (xen_pvh_domain()) > + return; > + > xen_post_allocator_init(); > } > > @@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot) > unsigned long pfn = __pa(addr) >> PAGE_SHIFT; > pte_t pte = pfn_pte(pfn, prot); > > + /* for PVH, page tables are native. */ > + if (xen_pvh_domain()) > + return; > + > if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) > BUG(); > } > @@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v) > * but that's enough to get __va working. We need to fill in the rest > * of the physical mapping once some sort of allocator has been set > * up. > + * NOTE: for PVH, the page tables are native with HAP required. > */ > pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, > unsigned long max_pfn) > @@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, > /* Zap identity mapping */ > init_level4_pgt[0] = __pgd(0); > > - /* Pre-constructed entries are in pfn, so convert to mfn */ > - convert_pfn_mfn(init_level4_pgt); > - convert_pfn_mfn(level3_ident_pgt); > - convert_pfn_mfn(level3_kernel_pgt); > + if (!xen_pvh_domain()) { > + /* Pre-constructed entries are in pfn, so convert to mfn */ > + convert_pfn_mfn(init_level4_pgt); > + convert_pfn_mfn(level3_ident_pgt); > + convert_pfn_mfn(level3_kernel_pgt); > + } > > l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); > l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); > @@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, > set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); > set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); > > - /* Pin down new L4 */ > - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, > - PFN_DOWN(__pa_symbol(init_level4_pgt))); > + if (!xen_pvh_domain()) { > + /* Pin down new L4 */ > + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, > + PFN_DOWN(__pa_symbol(init_level4_pgt))); > > - /* Unpin Xen-provided one */ > - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); > + /* Unpin Xen-provided one */ > + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); > + } > > /* Switch over */ > pgd = init_level4_pgt; > @@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, > * structure to attach it to, so make sure we just set kernel > * pgd. > */ > - xen_mc_batch(); > - __xen_write_cr3(true, __pa(pgd)); > - xen_mc_issue(PARAVIRT_LAZY_CPU); > + if (xen_pvh_domain()) { > + native_write_cr3(__pa(pgd)); > + } else { > + xen_mc_batch(); > + __xen_write_cr3(true, __pa(pgd)); > + xen_mc_issue(PARAVIRT_LAZY_CPU); > + } > > memblock_reserve(__pa(xen_start_info->pt_base), > xen_start_info->nr_pt_frames * PAGE_SIZE); > @@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst > = { > > void __init xen_init_mmu_ops(void) > { > + x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; > + > + if (xen_pvh_domain()) { > + pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; > + > + /* set_pte* for PCI devices to map iomem. */ > + if (xen_initial_domain()) { > + pv_mmu_ops.set_pte = xen_dom0pvh_set_pte; > + pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at; > + } > + return; > + } Considering that the implementation of xen_dom0pvh_set_pte is native_set_pte, can't we just leave it to the default that is native_set_pte? > x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; > x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; > - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; > pv_mmu_ops = xen_mmu_ops; > > memset(dummy_mapping, 0xff, PAGE_SIZE); > @@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void) > } > #endif > > +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space > + * creating new guest on PVH dom0 and needs to map domU pages. Called from > + * exported function, so no need to export this. > + */ > +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn, > + unsigned int domid) > +{ > + int rc; > + struct xen_add_to_physmap pmb = {.foreign_domid = domid}; > + > + pmb.gpfn = lpfn; > + pmb.idx = fgmfn; > + pmb.space = XENMAPSPACE_gmfn_foreign; > + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb); > + if (rc) { > + pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", > + rc, lpfn, fgmfn); > + return 1; > + } > + return 0; > +} > + > +/* Unmap an entry from xen p2m table */ > +int pvh_rem_xen_p2m(unsigned long spfn, int count) > +{ > + struct xen_remove_from_physmap xrp; > + int i, rc; > + > + for (i=0; i < count; i++) { > + xrp.domid = DOMID_SELF; > + xrp.gpfn = spfn+i; > + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); > + if (rc) { > + pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n", > + spfn+i, rc, i); > + return 1; > + } > + } > + return 0; > +} > +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m); > + > +struct pvh_remap_data { > + unsigned long fgmfn; /* foreign domain's gmfn */ > + pgprot_t prot; > + domid_t domid; > + struct vm_area_struct *vma; > +}; > + > +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, > + void *data) > +{ > + struct pvh_remap_data *pvhp = data; > + struct xen_pvh_sav_pfn_info *savp = pvhp->vma->vm_private_data; > + unsigned long pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo++]); > + pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot)); > + > + native_set_pte(ptep, pteval); > + if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid)) > + return -EFAULT; > + > + return 0; > +} > + > +/* The only caller at moment passes one gmfn at a time. > + * PVH TBD/FIXME: expand this in future to honor batch requests. > + */ > +static int pvh_remap_gmfn_range(struct vm_area_struct *vma, > + unsigned long addr, unsigned long mfn, int nr, > + pgprot_t prot, unsigned domid) > +{ > + int err; > + struct pvh_remap_data pvhdata; > + > + if (nr > 1) > + return -EINVAL; > + > + pvhdata.fgmfn = mfn; > + pvhdata.prot = prot; > + pvhdata.domid = domid; > + pvhdata.vma = vma; > + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, > + pvh_map_pte_fn, &pvhdata); > + flush_tlb_all(); > + return err; > +} > + > #define REMAP_BATCH_SIZE 16 > > struct remap_data { > @@ -2342,6 +2490,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct > *vma, > BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == > (VM_PFNMAP | VM_RESERVED | VM_IO))); > > + if (xen_pvh_domain()) { > + /* We need to update the local page tables and the xen HAP */ > + return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid); > + } > + > rmd.mfn = mfn; > rmd.prot = prot; > > diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h > index 73809bb..6d0bb56 100644 > --- a/arch/x86/xen/mmu.h > +++ b/arch/x86/xen/mmu.h > @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void); > > extern void xen_init_mmu_ops(void); > extern void xen_hvm_init_mmu_ops(void); > +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, > + int nr_mfns, int add_mapping); > #endif /* _XEN_MMU_H */ > diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h > index eac3ce1..1b213b1 100644 > --- a/include/xen/interface/memory.h > +++ b/include/xen/interface/memory.h > @@ -163,10 +163,19 @@ struct xen_add_to_physmap { > /* Which domain to change the mapping for. */ > domid_t domid; > > + /* Number of pages to go through for gmfn_range */ > + uint16_t size; > + > /* Source mapping space. */ > #define XENMAPSPACE_shared_info 0 /* shared info page */ > #define XENMAPSPACE_grant_table 1 /* grant table page */ > - unsigned int space; > +#define XENMAPSPACE_gmfn 2 /* GMFN */ > +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */ > +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */ > + uint16_t space; > + domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */ > + > +#define XENMAPIDX_grant_table_status 0x80000000 As you have seen, I have a very similar patch in my series. > /* Index into source mapping space. */ > unsigned long idx; > @@ -234,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); > * during a driver critical region. > */ > extern spinlock_t xen_reservation_lock; > + > +/* > + * Unmaps the page appearing at a particular GPFN from the specified guest's > + * pseudophysical address space. > + * arg == addr of xen_remove_from_physmap_t. > + */ > +#define XENMEM_remove_from_physmap 15 > +struct xen_remove_from_physmap { > + /* Which domain to change the mapping for. */ > + domid_t domid; > + > + /* GPFN of the current mapping of the page. */ > + unsigned long gpfn; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); > + > #endif /* __XEN_PUBLIC_MEMORY_H__ */ > diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h > index 9ce788d..80f792e 100644 > --- a/include/xen/interface/physdev.h > +++ b/include/xen/interface/physdev.h > @@ -258,6 +258,16 @@ struct physdev_pci_device { > uint8_t devfn; > }; > > +#define PHYSDEVOP_pvh_map_iomem 29 > +struct physdev_map_iomem { > + /* IN */ > + uint64_t first_gfn; > + uint64_t first_mfn; > + uint32_t nr_mfns; > + uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */ > + > +}; > + > /* > * Notify that some PIRQ-bound event channels have been unmasked. > * ** This command is obsolete since interface version 0x00030202 and is ** > diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h > index 6a198e4..fa595e1 100644 > --- a/include/xen/xen-ops.h > +++ b/include/xen/xen-ops.h > @@ -29,4 +29,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, > unsigned long mfn, int nr, > pgprot_t prot, unsigned domid); > > +struct xen_pvh_sav_pfn_info { > + struct page **sp_paga; /* save pfn (info) page array */ > + int sp_num_pgs; > + int sp_next_todo; > +}; > +extern int pvh_rem_xen_p2m(unsigned long spfn, int count); > + > #endif /* INCLUDE_XEN_OPS_H */ > -- > 1.7.2.3 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |