|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCHv2] x86/xen: avoid m2p lookup when setting early page table entries
On 06/22/2016 07:19 AM, David Vrabel wrote:
> On 22/06/16 11:54, David Vrabel wrote:
>> On 21/06/16 20:31, Boris Ostrovsky wrote:
>>> On 06/21/2016 12:09 PM, David Vrabel wrote:
>>>> When page tables entries are set using xen_set_pte_init() during early
>>>> boot there is no page fault handler that could handle a fault when
>>>> performing an M2P lookup.
>>>>
>>>> In 64 guest (usually dom0) early_ioremap() would fault in
>>>> xen_set_pte_init() because an M2P lookup faults because the MFN is in
>>>> MMIO space and not mapped in the M2P. This lookup is done to see if
>>>> the PFN in in the range used for the initial page table pages, so that
>>>> the PTE may be set as read-only.
>>>>
>>>> The M2P lookup can be avoided by moving the check (and clear of RW)
>>>> earlier when the PFN is still available.
>> [...]
>>>> --- a/arch/x86/xen/mmu.c
>>>> +++ b/arch/x86/xen/mmu.c
>>>> @@ -1562,7 +1562,7 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t
>>>> pte)
>>>> return pte;
>>>> }
>>>> #else /* CONFIG_X86_64 */
>>>> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
>>>> +static pteval_t __init mask_rw_pte(pteval_t pte)
>>>> {
>>>> unsigned long pfn;
>>>>
>>>> @@ -1577,10 +1577,10 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t
>>>> pte)
>>>> * page tables for mapping the p2m list, too, and page tables MUST be
>>>> * mapped read-only.
>>>> */
>>>> - pfn = pte_pfn(pte);
>>>> + pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
>>> Is it obvious that we are holding valid PFN at this point? It wasn't
>>> immediately obvious to me so I wonder whether a comment stating this
>>> would be useful here (yes, you mention it in the commit messages).
>> I don't understand what you mean by a "valid PFN"?
>>
>> This is only called from xen_make_pte_init() which is for converting
>> ptevals containing PFNs to MFNs. Did that answer your question?
Yes.
>>
>> Would it be clearer if I just inlined the two functions like so:
Yes, I think it's much better this way. Otherwise just by looking at
mask_rw_pte() it may not be clear that we can get PFN directly from the
pte (which is what I meant when I said "valid PFN").
Not to mention that now we won't have two versions of mask_rw_pte().
-boris
> It would help if I included the right diff.
>
> 8<---------------------
> x86/xen: avoid m2p lookup when setting early page table entries
>
> When page tables entries are set using xen_set_pte_init() during early
> boot there is no page fault handler that could handle a fault when
> performing an M2P lookup.
>
> In 64 guest (usually dom0) early_ioremap() would fault in
> xen_set_pte_init() because an M2P lookup faults because the MFN is in
> MMIO space and not mapped in the M2P. This lookup is done to see if
> the PFN in in the range used for the initial page table pages, so that
> the PTE may be set as read-only.
>
> The M2P lookup can be avoided by moving the check (and clear of RW)
> earlier when the PFN is still available.
>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> Tested-by: Keven Moraga <kmoragas@xxxxxxxxxx>
> ---
> v3:
> - fold mask_rw_pte()/mask_rw_pteval() into their callers.
>
> v2:
> - Remove __init annotation from xen_make_pte_init() since
> PV_CALLEE_SAVE_REGS_THUNK always puts the thunk in .text.
>
> - mask_rw_pte() -> mask_rw_pteval() for x86-64.
> ---
> arch/x86/xen/mmu.c | 76
> +++++++++++++++++++++++++-----------------------------
> 1 file changed, 35 insertions(+), 41 deletions(-)
>
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 478a2de..64d8f0b 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm,
> pgd_t *pgd)
> #endif
> }
>
> -#ifdef CONFIG_X86_32
> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
> -{
> - /* If there's an existing pte, then don't allow _PAGE_RW to be set */
> - if (pte_val_ma(*ptep) & _PAGE_PRESENT)
> - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
> - pte_val_ma(pte));
> -
> - return pte;
> -}
> -#else /* CONFIG_X86_64 */
> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
> -{
> - unsigned long pfn;
> -
> - if (xen_feature(XENFEAT_writable_page_tables) ||
> - xen_feature(XENFEAT_auto_translated_physmap) ||
> - xen_start_info->mfn_list >= __START_KERNEL_map)
> - return pte;
> -
> - /*
> - * Pages belonging to the initial p2m list mapped outside the default
> - * address range must be mapped read-only. This region contains the
> - * page tables for mapping the p2m list, too, and page tables MUST be
> - * mapped read-only.
> - */
> - pfn = pte_pfn(pte);
> - if (pfn >= xen_start_info->first_p2m_pfn &&
> - pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
> - pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
> -
> - return pte;
> -}
> -#endif /* CONFIG_X86_64 */
> -
> /*
> * Init-time set_pte while constructing initial pagetables, which
> * doesn't allow RO page table pages to be remapped RW.
> @@ -1600,13 +1565,41 @@ static pte_t __init mask_rw_pte(pte_t *ptep,
> pte_t pte)
> * so always write the PTE directly and rely on Xen trapping and
> * emulating any updates as necessary.
> */
> -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> +__visible pte_t xen_make_pte_init(pteval_t pte)
> {
> - if (pte_mfn(pte) != INVALID_P2M_ENTRY)
> - pte = mask_rw_pte(ptep, pte);
> - else
> - pte = __pte_ma(0);
> +#ifdef CONFIG_X86_64
> + unsigned long pfn;
> +
> + /*
> + * Pages belonging to the initial p2m list mapped outside the default
> + * address range must be mapped read-only. This region contains the
> + * page tables for mapping the p2m list, too, and page tables MUST be
> + * mapped read-only.
> + */
> + pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
> + if (xen_start_info->mfn_list < __START_KERNEL_map &&
> + pfn >= xen_start_info->first_p2m_pfn &&
> + pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
> + pte &= ~_PAGE_RW;
> +#endif
> + pte = pte_pfn_to_mfn(pte);
>
> + if ((pte & PTE_PFN_MASK) >> PAGE_SHIFT == INVALID_P2M_ENTRY)
> + pte = 0;
> +
> + return native_make_pte(pte);
> +}
> +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
> +
> +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> +{
> +#ifdef CONFIG_X86_32
> + /* If there's an existing pte, then don't allow _PAGE_RW to be set */
> + if (pte_mfn(pte) != INVALID_P2M_ENTRY
> + && pte_val_ma(*ptep) & _PAGE_PRESENT)
> + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
> + pte_val_ma(pte));
> +#endif
> native_set_pte(ptep, pte);
> }
>
> @@ -2407,6 +2400,7 @@ static void __init xen_post_allocator_init(void)
> pv_mmu_ops.alloc_pud = xen_alloc_pud;
> pv_mmu_ops.release_pud = xen_release_pud;
> #endif
> + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
>
> #ifdef CONFIG_X86_64
> pv_mmu_ops.write_cr3 = &xen_write_cr3;
> @@ -2455,7 +2449,7 @@ static const struct pv_mmu_ops xen_mmu_ops
> __initconst = {
> .pte_val = PV_CALLEE_SAVE(xen_pte_val),
> .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
>
> - .make_pte = PV_CALLEE_SAVE(xen_make_pte),
> + .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
> .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
>
> #ifdef CONFIG_X86_PAE
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |