Xen project Mailing List

Re: [Xen-devel] [PATCH v1 2/8]: PVH mmu changes

To: Ian Campbell <Ian.Campbell@xxxxxxxxxx>

From: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>

Date: Fri, 5 Oct 2012 19:00:11 -0700

Cc: "Xen-devel@xxxxxxxxxxxxxxxxxxx" <Xen-devel@xxxxxxxxxxxxxxxxxxx>, Stefano Stabellini <Stefano.Stabellini@xxxxxxxxxxxxx>, Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>

Delivery-date: Sat, 06 Oct 2012 02:01:01 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Thu, 4 Oct 2012 09:27:59 +0100 Ian Campbell <Ian.Campbell@xxxxxxxxxx> wrote: > On Wed, 2012-10-03 at 23:29 +0100, Mukesh Rathor wrote: > > On Wed, 3 Oct 2012 16:42:43 +0100 > > Ian Campbell <Ian.Campbell@xxxxxxxxxx> wrote: > > > While implementing the ARM version of this interface it occurred to me > that the num_pgs and next_todo fields here are not really needed > across the arch interface e.g. you already get pi_num_pgs from the nr > argument to xen_remap_domain_mfn_range and pi_next_todo is really > state which fits in struct pvh_remap_data. That would mean that > remap_foo could take a struct page * directly and I think you would > save an allocation. Ok, take a look at the attached and inlined diffs. I redid it, passing struct page to the api, and getting rid of the pi_* struct completely. thanks Mukesh diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ef63895..6cbf59a 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -33,6 +33,7 @@ #include <xen/features.h> #include <xen/page.h> #include <xen/xen-ops.h> +#include <xen/balloon.h> #include "privcmd.h" @@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state) msg->va & PAGE_MASK, msg->mfn, msg->npages, vma->vm_page_prot, - st->domain); + st->domain, NULL); if (rc < 0) return rc; @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata) if (!xen_initial_domain()) return -EPERM; + /* We only support privcmd_ioctl_mmap_batch for auto translated. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) return -EFAULT; @@ -246,6 +251,7 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; + int index; /* A tristate: * 0 for no errors * 1 if at least one error has happened (and no @@ -260,14 +266,20 @@ struct mmap_batch_state { xen_pfn_t __user *user_mfn; }; +/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If + * it's PVH then mfn is pfn (input to HAP). */ static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + struct vm_area_struct *vma = st->vma; + struct page **pages = vma ? vma->vm_private_data : NULL; + struct page *cur_page = pages[st->index++]; int ret; ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain); + st->vma->vm_page_prot, st->domain, + &cur_page); /* Store error code for second pass. */ *(st->err++) = ret; @@ -303,6 +315,32 @@ static int mmap_return_errors_v1(void *data, void *state) return __put_user(*mfnp, st->user_mfn++); } +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update + * the vma with the page info to use later. + * Returns: 0 if success, otherwise -errno + */ +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs) +{ + int rc; + struct page **pages; + + pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); + if (pages == NULL) + return -ENOMEM; + + rc = alloc_xenballooned_pages(numpgs, pages, 0); + if (rc != 0) { + pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, + numpgs, rc); + kfree(pages); + return -ENOMEM; + } + BUG_ON(vma->vm_private_data); + vma->vm_private_data = pages; + + return 0; +} + static struct vm_operations_struct privcmd_vm_ops; static long privcmd_ioctl_mmap_batch(void __user *udata, int version) @@ -370,10 +408,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) up_write(&mm->mmap_sem); goto out; } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) { + up_write(&mm->mmap_sem); + goto out; + } + } state.domain = m.dom; state.vma = vma; state.va = m.addr; + state.index = 0; state.global_error = 0; state.err = err_array; @@ -438,6 +483,20 @@ static long privcmd_ioctl(struct file *file, return ret; } +static void privcmd_close(struct vm_area_struct *vma) +{ + struct page **pages = vma ? vma->vm_private_data : NULL; + int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + if (!pages || !numpgs || !xen_feature(XENFEAT_auto_translated_physmap)) + return; + + xen_unmap_domain_mfn_range(vma); + while (numpgs--) + free_xenballooned_pages(1, &pages[numpgs]); + kfree(pages); +} + static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -448,6 +507,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } static struct vm_operations_struct privcmd_vm_ops = { + .close = privcmd_close, .fault = privcmd_fault }; @@ -464,6 +524,10 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { + if (xen_feature(XENFEAT_auto_translated_physmap)) { + int sz = sizeof(vma->vm_private_data); + return (!__cmpxchg(&vma->vm_private_data, NULL, NULL, sz)); + } return (xchg(&vma->vm_private_data, (void *)1) == NULL); } =============================================================================================== diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5a16824..d5e53ad 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -73,6 +73,7 @@ #include <xen/interface/version.h> #include <xen/interface/memory.h> #include <xen/hvc-console.h> +#include <xen/balloon.h> #include "multicalls.h" #include "mmu.h" @@ -331,6 +332,20 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval) __xen_set_pte(ptep, pteval); } +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn, + int nr_mfns, int add_mapping) +{ + struct physdev_map_iomem iomem; + + iomem.first_gfn = pfn; + iomem.first_mfn = mfn; + iomem.nr_mfns = nr_mfns; + iomem.add_mapping = add_mapping; + + if (HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem)) + BUG(); +} + static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { @@ -1220,6 +1235,8 @@ static void __init xen_pagetable_init(void) #endif paging_init(); xen_setup_shared_info(); + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; #ifdef CONFIG_X86_64 if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long new_mfn_list; @@ -1527,6 +1544,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { struct mmuext_op op; + + if (xen_feature(XENFEAT_writable_page_tables)) + return; + op.cmd = cmd; op.arg1.mfn = pfn_to_mfn(pfn); if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) @@ -1724,6 +1745,10 @@ static void set_page_prot(void *addr, pgprot_t prot) unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); + /* recall for PVH, page tables are native. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } @@ -1801,6 +1826,9 @@ static void convert_pfn_mfn(void *v) pte_t *pte = v; int i; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + /* All levels are converted the same way, so just treat them as ptes. */ for (i = 0; i < PTRS_PER_PTE; i++) @@ -1820,6 +1848,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, (*pt_end)--; } } + /* * Set up the initial kernel pagetable. * @@ -1830,6 +1859,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, * but that's enough to get __va working. We need to fill in the rest * of the physical mapping once some sort of allocator has been set * up. + * NOTE: for PVH, the page tables are native. */ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { @@ -1907,10 +1937,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) * structure to attach it to, so make sure we just set kernel * pgd. */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(init_level4_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - + if (xen_feature(XENFEAT_writable_page_tables)) { + native_write_cr3(__pa(init_level4_pgt)); + } else { + xen_mc_batch(); + __xen_write_cr3(true, __pa(init_level4_pgt)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + } /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for * the initial domain. For guests using the toolstack, they are in: @@ -2177,8 +2210,19 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { - x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_init = xen_pagetable_init; + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; + + /* set_pte* for PCI devices to map iomem. */ + if (xen_initial_domain()) { + pv_mmu_ops.set_pte = native_set_pte; + pv_mmu_ops.set_pte_at = native_set_pte_at; + } + return; + } + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); @@ -2414,6 +2458,94 @@ void __init xen_hvm_init_mmu_ops(void) } #endif +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space + * creating new guest on PVH dom0 and needs to map domU pages. + */ +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn, + unsigned int domid) +{ + int rc; + struct xen_add_to_physmap xatp = { .u.foreign_domid = domid }; + + xatp.gpfn = lpfn; + xatp.idx = fgmfn; + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_gmfn_foreign; + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); + if (rc) + pr_warn("d0: Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", + rc, lpfn, fgmfn); + return rc; +} + +int pvh_rem_xen_p2m(unsigned long spfn, int count) +{ + struct xen_remove_from_physmap xrp; + int i, rc; + + for (i=0; i < count; i++) { + xrp.domid = DOMID_SELF; + xrp.gpfn = spfn+i; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + if (rc) { + pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n", + spfn+i, rc, i); + return 1; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m); + +struct pvh_remap_data { + unsigned long fgmfn; /* foreign domain's gmfn */ + pgprot_t prot; + domid_t domid; + int index; + struct page **pages; +}; + +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + int rc; + struct pvh_remap_data *remap = data; + unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); + pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); + + if ((rc=pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid))) + return rc; + native_set_pte(ptep, pteval); + + return 0; +} + +/* The only caller at moment passes one gmfn at a time. + * PVH TBD/FIXME: expand this in future to honor batch requests. + */ +static int pvh_remap_gmfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) +{ + int err; + struct pvh_remap_data pvhdata; + + if (nr > 1) + return -EINVAL; + + pvhdata.fgmfn = mfn; + pvhdata.prot = prot; + pvhdata.domid = domid; + pvhdata.index = 0; + pvhdata.pages = pages; + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + pvh_map_pte_fn, &pvhdata); + flush_tlb_all(); + /* flush_tlb_page(vma, addr); */ + return err; +} + #define REMAP_BATCH_SIZE 16 struct remap_data { @@ -2438,7 +2570,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) + pgprot_t prot, unsigned domid, + struct page **pages) + { struct remap_data rmd; struct mmu_update mmu_update[REMAP_BATCH_SIZE]; @@ -2446,14 +2580,17 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -EINVAL; - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == (VM_PFNMAP | VM_RESERVED | VM_IO))); + if (xen_feature(XENFEAT_auto_translated_physmap)) { + /* We need to update the local page tables and the xen HAP */ + return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid, + pages); + } + rmd.mfn = mfn; rmd.prot = prot; @@ -2483,3 +2620,25 @@ out: return err; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); + +/* Returns: 0 success */ +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma) +{ + int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + struct page **pages = vma ? vma->vm_private_data : NULL; + + if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + while (numpgs--) { + + /* the mmu has already cleaned up the process mmu resources at + * this point (lookup_address will return NULL). */ + unsigned long pfn = page_to_pfn(pages[numpgs]); + + pvh_rem_xen_p2m(pfn, 1); + } + flush_tlb_all(); + return 0; +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);

Attachment: priv.diff
Description: Text Data

Attachment: mmu.diff
Description: Text Data

_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.