[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 3/8]: PVH: memory manager and paging related changes



On Thu, 16 Aug 2012, Mukesh Rathor wrote:
>  arch/x86/xen/mmu.c              |  179 
> ++++++++++++++++++++++++++++++++++++---
>  arch/x86/xen/mmu.h              |    2 +
>  include/xen/interface/memory.h  |   27 ++++++-
>  include/xen/interface/physdev.h |   10 ++
>  include/xen/xen-ops.h           |    7 ++
>  5 files changed, 211 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index b65a761..44a6477 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
>         __xen_set_pte(ptep, pteval);
>  }
> 
> +/* This for PV guest in hvm container */
> +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> +                             int nr_mfns, int add_mapping)
> +{
> +       int rc;
> +       struct physdev_map_iomem iomem;
> +
> +       iomem.first_gfn = pfn;
> +       iomem.first_mfn = mfn;
> +       iomem.nr_mfns = nr_mfns;
> +       iomem.add_mapping = add_mapping;
> +
> +       rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem);
> +       BUG_ON(rc);
> +}
> +
> +/* This for PV guest in hvm container.
> + * We need this because during boot early_ioremap path eventually calls
> + * set_pte that maps io space. Also, ACPI pages are not mapped into to the
> + * EPT during dom0 creation. The pages are mapped initially here from
> + * kernel_physical_mapping_init() then later the memtype is changed.  */
> +static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval)
> +{
> +       native_set_pte(ptep, pteval);
> +}
> +
> +static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long addr,
> +                                  pte_t *ptep, pte_t pteval)
> +{
> +       native_set_pte(ptep, pteval);
> +}
> +
>  static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
>                     pte_t *ptep, pte_t pteval)
>  {
> @@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void);
>  static void __init xen_pagetable_setup_done(pgd_t *base)
>  {
>         xen_setup_shared_info();
> +
> +       if (xen_pvh_domain())
> +               return;
> +
>         xen_post_allocator_init();
>  }
> 
> @@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
>         unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
>         pte_t pte = pfn_pte(pfn, prot);
> 
> +       /* for PVH, page tables are native. */
> +       if (xen_pvh_domain())
> +               return;
> +
>         if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
>                 BUG();
>  }
> @@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v)
>   * but that's enough to get __va working.  We need to fill in the rest
>   * of the physical mapping once some sort of allocator has been set
>   * up.
> + * NOTE: for PVH, the page tables are native with HAP required.
>   */
>  pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
>                                          unsigned long max_pfn)
> @@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
>         /* Zap identity mapping */
>         init_level4_pgt[0] = __pgd(0);
> 
> -       /* Pre-constructed entries are in pfn, so convert to mfn */
> -       convert_pfn_mfn(init_level4_pgt);
> -       convert_pfn_mfn(level3_ident_pgt);
> -       convert_pfn_mfn(level3_kernel_pgt);
> +       if (!xen_pvh_domain()) {
> +               /* Pre-constructed entries are in pfn, so convert to mfn */
> +               convert_pfn_mfn(init_level4_pgt);
> +               convert_pfn_mfn(level3_ident_pgt);
> +               convert_pfn_mfn(level3_kernel_pgt);
> +       }
> 
>         l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
>         l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
> @@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
>         set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
>         set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
> 
> -       /* Pin down new L4 */
> -       pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> -                         PFN_DOWN(__pa_symbol(init_level4_pgt)));
> +       if (!xen_pvh_domain()) {
> +               /* Pin down new L4 */
> +               pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> +                               PFN_DOWN(__pa_symbol(init_level4_pgt)));
> 
> -       /* Unpin Xen-provided one */
> -       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> +               /* Unpin Xen-provided one */
> +               pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> +       }
> 
>         /* Switch over */
>         pgd = init_level4_pgt;
> @@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
>          * structure to attach it to, so make sure we just set kernel
>          * pgd.
>          */
> -       xen_mc_batch();
> -       __xen_write_cr3(true, __pa(pgd));
> -       xen_mc_issue(PARAVIRT_LAZY_CPU);
> +       if (xen_pvh_domain()) {
> +               native_write_cr3(__pa(pgd));
> +       } else {
> +               xen_mc_batch();
> +               __xen_write_cr3(true, __pa(pgd));
> +               xen_mc_issue(PARAVIRT_LAZY_CPU);
> +       }
> 
>         memblock_reserve(__pa(xen_start_info->pt_base),
>                          xen_start_info->nr_pt_frames * PAGE_SIZE);
> @@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst 
> = {
> 
>  void __init xen_init_mmu_ops(void)
>  {
> +       x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
> +
> +       if (xen_pvh_domain()) {
> +               pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> +
> +               /* set_pte* for PCI devices to map iomem. */
> +               if (xen_initial_domain()) {
> +                       pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
> +                       pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at;
> +               }
> +               return;
> +       }

Considering that the implementation of xen_dom0pvh_set_pte is
native_set_pte, can't we just leave it to the default that is
native_set_pte?


>         x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
>         x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
> -       x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
>         pv_mmu_ops = xen_mmu_ops;
> 
>         memset(dummy_mapping, 0xff, PAGE_SIZE);
> @@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void)
>  }
>  #endif
> 
> +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
> + * creating new guest on PVH dom0 and needs to map domU pages. Called from
> + * exported function, so no need to export this.
> + */
> +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
> +                             unsigned int domid)
> +{
> +       int rc;
> +       struct xen_add_to_physmap pmb = {.foreign_domid = domid};
> +
> +       pmb.gpfn = lpfn;
> +       pmb.idx = fgmfn;
> +       pmb.space = XENMAPSPACE_gmfn_foreign;
> +       rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb);
> +       if (rc) {
> +               pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n",
> +                       rc, lpfn, fgmfn);
> +               return 1;
> +       }
> +       return 0;
> +}
> +
> +/* Unmap an entry from xen p2m table */
> +int pvh_rem_xen_p2m(unsigned long spfn, int count)
> +{
> +       struct xen_remove_from_physmap xrp;
> +       int i, rc;
> +
> +       for (i=0; i < count; i++) {
> +               xrp.domid = DOMID_SELF;
> +               xrp.gpfn = spfn+i;
> +               rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
> +               if (rc) {
> +                       pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
> +                               spfn+i, rc, i);
> +                       return 1;
> +               }
> +       }
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
> +
> +struct pvh_remap_data {
> +       unsigned long fgmfn;            /* foreign domain's gmfn */
> +       pgprot_t prot;
> +       domid_t  domid;
> +       struct vm_area_struct *vma;
> +};
> +
> +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
> +                       void *data)
> +{
> +       struct pvh_remap_data *pvhp = data;
> +       struct xen_pvh_sav_pfn_info *savp = pvhp->vma->vm_private_data;
> +       unsigned long pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo++]);
> +       pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot));
> +
> +       native_set_pte(ptep, pteval);
> +       if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid))
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
> +/* The only caller at moment passes one gmfn at a time.
> + * PVH TBD/FIXME: expand this in future to honor batch requests.
> + */
> +static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
> +                               unsigned long addr, unsigned long mfn, int nr,
> +                               pgprot_t prot, unsigned domid)
> +{
> +       int err;
> +       struct pvh_remap_data pvhdata;
> +
> +       if (nr > 1)
> +               return -EINVAL;
> +
> +       pvhdata.fgmfn = mfn;
> +       pvhdata.prot = prot;
> +       pvhdata.domid = domid;
> +       pvhdata.vma = vma;
> +       err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
> +                                 pvh_map_pte_fn, &pvhdata);
> +       flush_tlb_all();
> +       return err;
> +}
> +
>  #define REMAP_BATCH_SIZE 16
> 
>  struct remap_data {
> @@ -2342,6 +2490,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct 
> *vma,
>         BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
>                                 (VM_PFNMAP | VM_RESERVED | VM_IO)));
> 
> +       if (xen_pvh_domain()) {
> +               /* We need to update the local page tables and the xen HAP */
> +               return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid);
> +       }
> +
>         rmd.mfn = mfn;
>         rmd.prot = prot;
> 
> diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
> index 73809bb..6d0bb56 100644
> --- a/arch/x86/xen/mmu.h
> +++ b/arch/x86/xen/mmu.h
> @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
> 
>  extern void xen_init_mmu_ops(void);
>  extern void xen_hvm_init_mmu_ops(void);
> +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> +                                    int nr_mfns, int add_mapping);
>  #endif /* _XEN_MMU_H */
> diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
> index eac3ce1..1b213b1 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -163,10 +163,19 @@ struct xen_add_to_physmap {
>      /* Which domain to change the mapping for. */
>      domid_t domid;
> 
> +    /* Number of pages to go through for gmfn_range */
> +    uint16_t    size;
> +
>      /* Source mapping space. */
>  #define XENMAPSPACE_shared_info 0 /* shared info page */
>  #define XENMAPSPACE_grant_table 1 /* grant table page */
> -    unsigned int space;
> +#define XENMAPSPACE_gmfn        2 /* GMFN */
> +#define XENMAPSPACE_gmfn_range  3 /* GMFN range */
> +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
> +    uint16_t space;
> +    domid_t foreign_domid;         /* IFF XENMAPSPACE_gmfn_foreign */
> +
> +#define XENMAPIDX_grant_table_status 0x80000000

As you have seen, I have a very similar patch in my series. 


>      /* Index into source mapping space. */
>      unsigned long idx;
> @@ -234,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
>   * during a driver critical region.
>   */
>  extern spinlock_t xen_reservation_lock;
> +
> +/*
> + * Unmaps the page appearing at a particular GPFN from the specified guest's
> + * pseudophysical address space.
> + * arg == addr of xen_remove_from_physmap_t.
> + */
> +#define XENMEM_remove_from_physmap      15
> +struct xen_remove_from_physmap {
> +    /* Which domain to change the mapping for. */
> +    domid_t domid;
> +
> +    /* GPFN of the current mapping of the page. */
> +    unsigned long     gpfn;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
> +
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
> index 9ce788d..80f792e 100644
> --- a/include/xen/interface/physdev.h
> +++ b/include/xen/interface/physdev.h
> @@ -258,6 +258,16 @@ struct physdev_pci_device {
>      uint8_t devfn;
>  };
> 
> +#define PHYSDEVOP_pvh_map_iomem        29
> +struct physdev_map_iomem {
> +    /* IN */
> +    uint64_t first_gfn;
> +    uint64_t first_mfn;
> +    uint32_t nr_mfns;
> +    uint32_t add_mapping;        /* 1 == add mapping;  0 == unmap */
> +
> +};
> +
>  /*
>   * Notify that some PIRQ-bound event channels have been unmasked.
>   * ** This command is obsolete since interface version 0x00030202 and is **
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 6a198e4..fa595e1 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -29,4 +29,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
>                                unsigned long mfn, int nr,
>                                pgprot_t prot, unsigned domid);
> 
> +struct xen_pvh_sav_pfn_info {
> +       struct page **sp_paga;  /* save pfn (info) page array */
> +       int sp_num_pgs;
> +       int sp_next_todo;
> +};
> +extern int pvh_rem_xen_p2m(unsigned long spfn, int count);
> +
>  #endif /* INCLUDE_XEN_OPS_H */
> --
> 1.7.2.3
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.