[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] x86/xen: make mfn_to_pfn() work with MFNs that are 1:1 in the p2m



On Fri, Jan 03, 2014 at 04:45:22PM +0000, David Vrabel wrote:
> From: David Vrabel <david.vrabel@xxxxxxxxxx>
> 
> The _PAGE_IOMAP PTE flag is used to indicate that the PTE contains an
> MFN that is an identify frame (1:1) in the p2m.  This is so the
> correct conversion of MFN to PFN can be done reading a PTE.
> 
> If mfn_to_pfn() returned the correct PFN instead the _PAGE_IOMAP flag
> is not required and can be removed.
> 
> In mfn_to_pfn() the PFN found in the M2P is checked in P2M.  If the
> two MFNs differ then the MFN is one of three possibilities:
> 
>   a) its a foreign MFN with an m2p override.
> 
>   b) it's a foreign MFN with /no/ m2p override.
> 
>   c) it's a identity MFN.
> 
> It is not permitted to call mfn_to_pfn() no a foreign MFN without an
> override was the resulting PFN will not incorrect.  We can therefore
> assume case (c) and return PFN == MFN.
> 
> [ This patch should probably be split into the Xen-specific parts and
> an x86 patch to remove _PAGE_IOMAP. ]

Yeah :-)

> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/pgtable_types.h |   12 +++---
>  arch/x86/include/asm/xen/page.h      |   18 +++++++---
>  arch/x86/mm/init_32.c                |    2 +-
>  arch/x86/mm/init_64.c                |    2 +-
>  arch/x86/pci/i386.c                  |    2 -
>  arch/x86/xen/enlighten.c             |    2 -
>  arch/x86/xen/mmu.c                   |   58 
> +++++-----------------------------
>  7 files changed, 28 insertions(+), 68 deletions(-)
> 
> diff --git a/arch/x86/include/asm/pgtable_types.h 
> b/arch/x86/include/asm/pgtable_types.h
> index 0ecac25..0b12657 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -17,7 +17,7 @@
>  #define _PAGE_BIT_PAT                7       /* on 4KB pages */
>  #define _PAGE_BIT_GLOBAL     8       /* Global TLB entry PPro+ */
>  #define _PAGE_BIT_UNUSED1    9       /* available for programmer */
> -#define _PAGE_BIT_IOMAP              10      /* flag used to indicate IO 
> mapping */
> +#define _PAGE_BIT_UNUSED2    10      /* available for programmer */
>  #define _PAGE_BIT_HIDDEN     11      /* hidden by kmemcheck */
>  #define _PAGE_BIT_PAT_LARGE  12      /* On 2MB or 1GB pages */
>  #define _PAGE_BIT_SPECIAL    _PAGE_BIT_UNUSED1
> @@ -41,7 +41,7 @@
>  #define _PAGE_PSE    (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
>  #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
>  #define _PAGE_UNUSED1        (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
> -#define _PAGE_IOMAP  (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
> +#define _PAGE_UNUSED2        (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2)
>  #define _PAGE_PAT    (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
>  #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
>  #define _PAGE_SPECIAL        (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
> @@ -163,10 +163,10 @@
>  #define __PAGE_KERNEL_LARGE_NOCACHE  (__PAGE_KERNEL | _PAGE_CACHE_UC | 
> _PAGE_PSE)
>  #define __PAGE_KERNEL_LARGE_EXEC     (__PAGE_KERNEL_EXEC | _PAGE_PSE)
>  
> -#define __PAGE_KERNEL_IO             (__PAGE_KERNEL | _PAGE_IOMAP)
> -#define __PAGE_KERNEL_IO_NOCACHE     (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
> -#define __PAGE_KERNEL_IO_UC_MINUS    (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
> -#define __PAGE_KERNEL_IO_WC          (__PAGE_KERNEL_WC | _PAGE_IOMAP)
> +#define __PAGE_KERNEL_IO             (__PAGE_KERNEL)
> +#define __PAGE_KERNEL_IO_NOCACHE     (__PAGE_KERNEL_NOCACHE)
> +#define __PAGE_KERNEL_IO_UC_MINUS    (__PAGE_KERNEL_UC_MINUS)
> +#define __PAGE_KERNEL_IO_WC          (__PAGE_KERNEL_WC)
>  
>  #define PAGE_KERNEL                  __pgprot(__PAGE_KERNEL)
>  #define PAGE_KERNEL_RO                       __pgprot(__PAGE_KERNEL_RO)
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index b913915..eb11963 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -112,11 +112,18 @@ static inline unsigned long mfn_to_pfn(unsigned long 
> mfn)
>       pfn = mfn_to_pfn_no_overrides(mfn);
>       if (get_phys_to_machine(pfn) != mfn) {
>               /*
> -              * If this appears to be a foreign mfn (because the pfn
> -              * doesn't map back to the mfn), then check the local override
> -              * table to see if there's a better pfn to use.
> +              * This is either:
>                *
> -              * m2p_find_override_pfn returns ~0 if it doesn't find anything.
> +              * a) a foreign MFN with an override.
> +              *
> +              * b) a foreign MFN without an override.
> +              *
> +              * c) an identity MFN that is not in the the p2m.
> +              *
> +              * For (a), look in the m2p overrides.  For (b) and
> +              * (c) assume identify MFN since mfn_to_pfn() will
> +              * only be called on foreign MFNs iff they have
> +              * overrides.
>                */
>               pfn = m2p_find_override_pfn(mfn, ~0);
>       }
> @@ -126,8 +133,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
>        * entry doesn't map back to the mfn and m2p_override doesn't have a
>        * valid entry for it.
>        */
> -     if (pfn == ~0 &&
> -                     get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
> +     if (pfn == ~0)
>               pfn = mfn;
>  
>       return pfn;
> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
> index 4287f1f..9031593 100644
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -537,7 +537,7 @@ static void __init pagetable_init(void)
>       permanent_kmaps_init(pgd_base);
>  }
>  
> -pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | 
> _PAGE_IOMAP);
> +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
>  EXPORT_SYMBOL_GPL(__supported_pte_mask);
>  
>  /* user-defined highmem size */
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 104d56a..68bf948 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
>   * around without checking the pgd every time.
>   */
>  
> -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
> +pteval_t __supported_pte_mask __read_mostly = ~0;
>  EXPORT_SYMBOL_GPL(__supported_pte_mask);
>  
>  int force_personality32;
> diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
> index db6b1ab..1f642d6 100644
> --- a/arch/x86/pci/i386.c
> +++ b/arch/x86/pci/i386.c
> @@ -433,8 +433,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct 
> vm_area_struct *vma,
>                */
>               prot |= _PAGE_CACHE_UC_MINUS;
>  
> -     prot |= _PAGE_IOMAP;    /* creating a mapping for IO */
> -
>       vma->vm_page_prot = __pgprot(prot);
>  
>       if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index fa6ade7..f9c2d71 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1458,8 +1458,6 @@ asmlinkage void __init xen_start_kernel(void)
>  #endif
>               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
>  
> -     __supported_pte_mask |= _PAGE_IOMAP;
> -
>       /*
>        * Prevent page tables from being allocated in highmem, even
>        * if CONFIG_HIGHPTE is enabled.
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index ce563be..5fa77a1 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -368,11 +368,9 @@ static pteval_t pte_mfn_to_pfn(pteval_t val)
>       if (val & _PAGE_PRESENT) {
>               unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
>               unsigned long pfn = mfn_to_pfn(mfn);
> -
>               pteval_t flags = val & PTE_FLAGS_MASK;
> -             if (unlikely(pfn == ~0))
> -                     val = flags & ~_PAGE_PRESENT;
> -             else
> +
> +             if (likely(pfn != ~0))
>                       val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
>       }
>  
> @@ -390,6 +388,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
>                       mfn = get_phys_to_machine(pfn);
>               else
>                       mfn = pfn;
> +
>               /*
>                * If there's no mfn for the pfn, then just create an
>                * empty non-present pte.  Unfortunately this loses
> @@ -399,38 +398,15 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
>               if (unlikely(mfn == INVALID_P2M_ENTRY)) {
>                       mfn = 0;
>                       flags = 0;
> -             } else {
> -                     /*
> -                      * Paramount to do this test _after_ the
> -                      * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
> -                      * IDENTITY_FRAME_BIT resolves to true.
> -                      */
> -                     mfn &= ~FOREIGN_FRAME_BIT;
> -                     if (mfn & IDENTITY_FRAME_BIT) {
> -                             mfn &= ~IDENTITY_FRAME_BIT;
> -                             flags |= _PAGE_IOMAP;
> -                     }
> -             }
> +                     mfn = pfn;
> +             } else
> +                     mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
>               val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
>       }
>  
>       return val;
>  }
>  
> -static pteval_t iomap_pte(pteval_t val)
> -{
> -     if (val & _PAGE_PRESENT) {
> -             unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
> -             pteval_t flags = val & PTE_FLAGS_MASK;
> -
> -             /* We assume the pte frame number is a MFN, so
> -                just use it as-is. */
> -             val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
> -     }
> -
> -     return val;
> -}
> -
>  static pteval_t xen_pte_val(pte_t pte)
>  {
>       pteval_t pteval = pte.pte;
> @@ -441,9 +417,6 @@ static pteval_t xen_pte_val(pte_t pte)
>               pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
>       }
>  #endif
> -     if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
> -             return pteval;
> -
>       return pte_mfn_to_pfn(pteval);
>  }
>  PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
> @@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
>  
>  static pte_t xen_make_pte(pteval_t pte)
>  {
> -     phys_addr_t addr = (pte & PTE_PFN_MASK);
>  #if 0
>       /* If Linux is trying to set a WC pte, then map to the Xen WC.
>        * If _PAGE_PAT is set, then it probably means it is really
> @@ -496,19 +468,7 @@ static pte_t xen_make_pte(pteval_t pte)
>                       pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
>       }
>  #endif
> -     /*
> -      * Unprivileged domains are allowed to do IOMAPpings for
> -      * PCI passthrough, but not map ISA space.  The ISA
> -      * mappings are just dummy local mappings to keep other
> -      * parts of the kernel happy.
> -      */
> -     if (unlikely(pte & _PAGE_IOMAP) &&
> -         (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
> -             pte = iomap_pte(pte);
> -     } else {
> -             pte &= ~_PAGE_IOMAP;
> -             pte = pte_pfn_to_mfn(pte);
> -     }
> +     pte = pte_pfn_to_mfn(pte);
>  
>       return native_make_pte(pte);
>  }
> @@ -2084,7 +2044,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t 
> phys, pgprot_t prot)
>  
>       default:
>               /* By default, set_fixmap is used for hardware mappings */
> -             pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
> +             pte = mfn_pte(phys, prot);
>               break;
>       }
>  
> @@ -2524,8 +2484,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct 
> *vma,
>       if (xen_feature(XENFEAT_auto_translated_physmap))
>               return -EINVAL;
>  
> -     prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
> -
>       BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
>  
>       rmd.mfn = mfn;
> -- 
> 1.7.2.5
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.