[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v02 1/7] arm: introduce remoteprocessor iommu module



On Thu, 26 Jun 2014, Andrii Tseglytskyi wrote:
> This is a fisrst patch from patch series which was
> developed to handle remote (external) processors
> memory management units. Remote processors are
> typically used for graphic rendering (GPUs) and
> high quality video decoding (IPUs). They are typically
> installed on such multimedia SoCs as OMAP4 / OMAP5.
> 
> As soon as remoteprocessor MMU typically works with
> pagetables filled by physical addresses, which are
> allocated by domU kernel, it is almost impossible to
> use them under Xen, intermediate physical addresses
> allocated by kernel, need to be translated to machine
> addresses.
> 
> This patch introduces a simple framework to perform
> pfn -> mfn translation for external MMUs.
> It introduces basic data structures and algorithms
> needed for translation.
> 
> Typically, when MMU is configured, some it registers
> are updated by new values. Introduced frameworks
> uses traps as starting point of remoteproc MMUs
> pagetables translation.
> 
> Change-Id: Ia4d311a40289df46a003f5ae8706c150bee1885d
> Signed-off-by: Andrii Tseglytskyi <andrii.tseglytskyi@xxxxxxxxxxxxxxx>

There is one problem with this patch: you need to find a way to "pin"
the p2m entries for the pfns and mfns found in the pagetables translated
by remoteproc_iommu.c.  Otherwise Xen might decide to change the pfn to
mfn mappings for the domain, breaking the pagetables already translated
by remoteproc_iommu.c.  pfn to mfn mappings are not guaranteed to be
stable. At the moment Xen on ARM wouldn't change pfn to mfn mappings
under the guest feet, but features like memory sharing or swapping,
supported on x86, could cause it to happen.

In the past I tried to introduce a way to "pin the mappings, but never
finished to upstream the patches, because it didn't need it anymore.


Give a look at:

http://marc.info/?l=xen-devel&m=138029864707973

In particular you would need to introduce something like
the pin function and call it from mmu_translate_pagetable. 



>  xen/arch/arm/Makefile              |    1 +
>  xen/arch/arm/remoteproc_iommu.c    |  412 
> ++++++++++++++++++++++++++++++++++++
>  xen/include/xen/remoteproc_iommu.h |   79 +++++++
>  3 files changed, 492 insertions(+)
>  create mode 100644 xen/arch/arm/remoteproc_iommu.c
>  create mode 100644 xen/include/xen/remoteproc_iommu.h
> 
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index d70f6d5..0204d1c 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -15,6 +15,7 @@ obj-y += io.o
>  obj-y += irq.o
>  obj-y += kernel.o
>  obj-y += mm.o
> +obj-y += remoteproc_iommu.o
>  obj-y += p2m.o
>  obj-y += percpu.o
>  obj-y += guestcopy.o
> diff --git a/xen/arch/arm/remoteproc_iommu.c b/xen/arch/arm/remoteproc_iommu.c
> new file mode 100644
> index 0000000..b4d22d9
> --- /dev/null
> +++ b/xen/arch/arm/remoteproc_iommu.c
> @@ -0,0 +1,412 @@
> +/*
> + * xen/arch/arm/remoteproc_iommu.c
> + *
> + * Andrii Tseglytskyi <andrii.tseglytskyi@xxxxxxxxxxxxxxx>
> + * Copyright (c) 2014 GlobalLogic
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <xen/config.h>
> +#include <xen/lib.h>
> +#include <xen/errno.h>
> +#include <xen/mm.h>
> +#include <xen/vmap.h>
> +#include <xen/init.h>
> +#include <xen/sched.h>
> +#include <xen/stdbool.h>
> +#include <asm/system.h>
> +#include <asm/current.h>
> +#include <asm/io.h>
> +#include <asm/p2m.h>
> +
> +#include <xen/remoteproc_iommu.h>
> +
> +#include "io.h"
> +
> +static struct mmu_info *mmu_list[] = {
> +};
> +
> +#define mmu_for_each(pfunc, data)                       \
> +({                                                      \
> +    u32 __i;                                            \
> +    int __res = 0;                                      \
> +                                                        \
> +    for ( __i = 0; __i < ARRAY_SIZE(mmu_list); __i++ )  \
> +    {                                                   \
> +        __res |= pfunc(mmu_list[__i], data);            \
> +    }                                                   \
> +    __res;                                              \
> +})
> +
> +static int mmu_check_mem_range(struct mmu_info *mmu, paddr_t addr)
> +{
> +    if ( (addr >= mmu->mem_start) && (addr < (mmu->mem_start + 
> mmu->mem_size)) )
> +        return 1;
> +
> +    return 0;
> +}
> +
> +static inline struct mmu_info *mmu_lookup(u32 addr)
> +{
> +    u32 i;
> +
> +    /* enumerate all registered MMU's and check is address in range */
> +    for ( i = 0; i < ARRAY_SIZE(mmu_list); i++ )
> +    {
> +        if ( mmu_check_mem_range(mmu_list[i], addr) )
> +            return mmu_list[i];
> +    }
> +
> +    return NULL;
> +}
> +
> +static int mmu_mmio_check(struct vcpu *v, paddr_t addr)
> +{
> +    return mmu_for_each(mmu_check_mem_range, addr);
> +}
> +
> +static int mmu_copy_pagetable(struct mmu_info *mmu, struct mmu_pagetable 
> *pgt)
> +{
> +    void __iomem *pagetable = NULL;
> +    u32 maddr, i;
> +
> +    ASSERT(mmu);
> +    ASSERT(pgt);
> +
> +    if ( !pgt->paddr )
> +        return -EINVAL;
> +
> +    /* pagetable size can be more than one page */
> +    for ( i = 0; i < MMU_PGD_TABLE_SIZE(mmu) / PAGE_SIZE; i++ )
> +    {
> +        /* lookup address where remoteproc pagetable is stored by kernel */
> +        maddr = p2m_lookup(current->domain, pgt->paddr + i * PAGE_SIZE, 
> NULL);
> +        if ( !maddr )
> +        {
> +            pr_mmu("failed to translate 0x%08lx to maddr", pgt->paddr + i * 
> PAGE_SIZE);
> +            return -EINVAL;
> +        }
> +
> +        pagetable = ioremap_nocache(maddr, MMU_PGD_TABLE_SIZE(mmu));
> +        if ( !pagetable )
> +        {
> +            pr_mmu("failed to map pagetable");
> +            return -EINVAL;
> +        }
> +
> +        /* copy pagetable to hypervisor memory */
> +        clean_and_invalidate_xen_dcache_va_range(pagetable, PAGE_SIZE);
> +        memcpy((u32*)((u32)pgt->kern_pagetable + i * PAGE_SIZE), pagetable, 
> PAGE_SIZE);
> +
> +        iounmap(pagetable);
> +    }
> +
> +    return 0;
> +}
> +
> +struct mmu_pagetable *mmu_pagetable_lookup(struct mmu_info *mmu, u32 addr, 
> bool is_maddr)
> +{
> +    struct mmu_pagetable *pgt;
> +    u32 pgt_addr;
> +
> +    list_for_each_entry(pgt, &mmu->pagetables_list, link_node)
> +    {
> +        if ( is_maddr )
> +            pgt_addr = pgt->maddr;
> +        else
> +            pgt_addr = pgt->paddr;
> +
> +        if ( pgt_addr == addr )
> +            return pgt;
> +    }
> +
> +    return NULL;
> +}
> +
> +static struct mmu_pagetable *mmu_alloc_pagetable(struct mmu_info *mmu, u32 
> paddr)
> +{
> +    struct mmu_pagetable *pgt;
> +    u32 pgt_size = MMU_PGD_TABLE_SIZE(mmu);
> +
> +    pgt = xzalloc_bytes(sizeof(struct mmu_pagetable));
> +    if ( !pgt )
> +    {
> +        pr_mmu("failed to alloc pagetable structure");
> +        return NULL;
> +    }
> +
> +    /* allocate pagetable managed by hypervisor */
> +    pgt->hyp_pagetable = xzalloc_bytes(pgt_size);
> +    if ( !pgt->hyp_pagetable )
> +    {
> +        pr_mmu("failed to alloc private hyp_pagetable");
> +        return NULL;
> +    }
> +
> +    /* alocate pagetable for ipa storing */
> +    pgt->kern_pagetable = xzalloc_bytes(pgt_size);
> +    if ( !pgt->kern_pagetable )
> +    {
> +        pr_mmu("failed to alloc private kern_pagetable");
> +        return NULL;
> +    }
> +
> +    pr_mmu("private pagetables for 0x%08x paddr %u bytes (main 0x%08x, temp 
> 0x%08x)",
> +           paddr, pgt_size, (u32)__pa(pgt->hyp_pagetable), 
> (u32)__pa(pgt->kern_pagetable));
> +
> +    pgt->paddr = paddr;
> +
> +    list_add(&pgt->link_node, &mmu->pagetables_list);
> +
> +    return pgt;
> +}
> +
> +static u32 mmu_translate_pagetable(struct mmu_info *mmu, u32 paddr)
> +{
> +    struct mmu_pagetable *pgt;
> +    int res;
> +
> +    /* lookup using machine address first */
> +    pgt = mmu_pagetable_lookup(mmu, paddr, true);
> +    if ( !pgt )
> +    {
> +        /* lookup using kernel physical address */
> +        pgt = mmu_pagetable_lookup(mmu, paddr, false);
> +        if ( !pgt )
> +        {
> +            /* if pagetable doesn't exists in lookup list - allocate it */
> +            pgt = mmu_alloc_pagetable(mmu, paddr);
> +        }
> +    }
> +
> +    pgt->maddr = MMU_INVALID_ADDRESS;
> +
> +    /* copy pagetable from domain to hypervisor */
> +    res = mmu_copy_pagetable(mmu, pgt);
> +     if ( res )
> +        return res;
> +
> +    /* translate pagetable */
> +    pgt->maddr = mmu->translate_pfunc(mmu, pgt);
> +    return pgt->maddr;
> +}
> +
> +static u32 mmu_trap_translate_pagetable(struct mmu_info *mmu, mmio_info_t 
> *info)
> +{
> +    register_t *reg;
> +    bool valid_trap = false;
> +    u32 i, paddr;
> +
> +    reg = select_user_reg(guest_cpu_user_regs(), info->dabt.reg);
> +
> +    ASSERT(reg);
> +
> +    paddr = *reg;
> +    if ( !paddr )
> +        return MMU_INVALID_ADDRESS;
> +
> +    /* check is the register is a valid TTB register */
> +    for ( i = 0; i < mmu->num_traps; i++ )
> +    {
> +        if ( mmu->trap_offsets[i] == (info->gpa - mmu->mem_start) )
> +        {
> +            valid_trap = true;
> +            break;
> +        }
> +    }
> +
> +    if ( !valid_trap )
> +        return MMU_INVALID_ADDRESS;
> +
> +    return mmu_translate_pagetable(mmu, paddr);
> +}
> +
> +u32 mmu_translate_second_level(struct mmu_info *mmu, struct mmu_pagetable 
> *pgt,
> +                               u32 maddr, u32 hyp_addr)

Is this actually used anywhere?


> +{
> +    u32 *pte_table = NULL, *hyp_pte_table = NULL, pte_table_size = PAGE_SIZE;
> +    u32 i;
> +
> +    /* remap second level translation table */
> +    pte_table = ioremap_nocache(maddr, MMU_PTE_TABLE_SIZE(mmu));
> +    if ( !pte_table )
> +    {
> +        pr_mmu("failed to map pte_table");
> +        return MMU_INVALID_ADDRESS;
> +    }
> +
> +    /* allocate new second level pagetable once */
> +    if ( 0 == hyp_addr )
> +    {
> +        if ( MMU_PTE_TABLE_SIZE(mmu) > PAGE_SIZE )
> +            pte_table_size = MMU_PTE_TABLE_SIZE(mmu);
> +
> +        hyp_pte_table = xzalloc_bytes(pte_table_size);
> +        if ( !hyp_pte_table )
> +        {
> +            pr_mmu("failed to alloc new iopt");
> +            return MMU_INVALID_ADDRESS;
> +        }
> +    }
> +    else
> +    {
> +        hyp_pte_table = __va(hyp_addr & 
> MMU_SECTION_MASK(mmu->pg_data->pte_shift));
> +    }
> +
> +    /* 2-nd level translation */
> +    for ( i = 0; i < MMU_PTRS_PER_PTE(mmu); i++ )
> +    {
> +        paddr_t pt_maddr, pt_paddr, pt_flags;
> +        u32 pt_mask = MMU_SECTION_MASK(mmu->pg_data->pte_shift);
> +
> +        if ( !pte_table[i] )
> +        {
> +            /* handle the case when page was removed */
> +            if ( unlikely(hyp_pte_table[i]) )
> +            {
> +                hyp_pte_table[i] = 0;
> +            }
> +
> +            continue;
> +        }
> +
> +        pt_paddr = pte_table[i] & pt_mask;
> +        pt_flags = pte_table[i] & ~pt_mask;
> +        pt_maddr = p2m_lookup(current->domain, pt_paddr, NULL);
> +        ASSERT(pt_maddr != INVALID_PADDR);
> +
> +        hyp_pte_table[i] = pt_maddr | pt_flags;
> +        pgt->page_counter++;
> +    }
> +
> +    iounmap(pte_table);
> +
> +    clean_and_invalidate_xen_dcache_va_range(hyp_pte_table, 
> MMU_PTE_TABLE_SIZE(mmu));
> +    return __pa(hyp_pte_table);
> +}
> +
> +static int mmu_mmio_read(struct vcpu *v, mmio_info_t *info)
> +{
> +    struct mmu_info *mmu = NULL;
> +    unsigned long flags;
> +    register_t *r;
> +
> +    r = select_user_reg(guest_cpu_user_regs(), info->dabt.reg);
> +
> +    ASSERT(r);

If you check on the domid in mmu_mmio_write, I would check here too


> +    mmu = mmu_lookup(info->gpa);
> +    if ( !mmu )
> +    {
> +        pr_mmu("can't get mmu for addr 0x%08x", (u32)info->gpa);
> +        return -EINVAL;
> +    }
> +
> +    spin_lock_irqsave(&mmu->lock, flags);
> +    *r = readl(mmu->mem_map + ((u32)(info->gpa) - mmu->mem_start));
> +    spin_unlock_irqrestore(&mmu->lock, flags);
> +
> +    return 1;

It looks like you are returning the mfns here, is that correct?



> +static int mmu_mmio_write(struct vcpu *v, mmio_info_t *info)
> +{
> +    struct mmu_info *mmu = NULL;
> +    unsigned long flags;
> +    register_t *r;
> +    u32 new_addr, val;
> +
> +    r = select_user_reg(guest_cpu_user_regs(), info->dabt.reg);
> +
> +    ASSERT(r);
> +
> +    /* dom0 should not access remoteproc MMU */
> +    if ( 0 == current->domain->domain_id )
> +        return 1;

This is too specific to one particular configuration.
Would it be possible to generalize this somehow? At the very least you
could introduce an XSM label to access the pagetables, so that you can
dynamically configure the domains the can write to them.


> +    /* find corresponding MMU */
> +    mmu = mmu_lookup(info->gpa);
> +    if ( !mmu )
> +    {
> +        pr_mmu("can't get mmu for addr 0x%08x", (u32)info->gpa);
> +        return -EINVAL;
> +    }
> +
> +    ASSERT(v->domain == current->domain);

You can remove this assert.


> +    spin_lock_irqsave(&mmu->lock, flags);
> +
> +    /* get new address of translated pagetable */
> +    new_addr = mmu_trap_translate_pagetable(mmu, info);
> +    if ( MMU_INVALID_ADDRESS != new_addr )
> +        val = new_addr;
> +    else
> +        val = *r;
> +
> +    writel(val, mmu->mem_map + ((u32)(info->gpa) - mmu->mem_start));
> +    spin_unlock_irqrestore(&mmu->lock, flags);
> +
> +    return 1;
> +}
> +
> +static int mmu_init(struct mmu_info *mmu, u32 data)
> +{
> +    ASSERT(mmu);
> +    ASSERT(!mmu->mem_map);
> +
> +    INIT_LIST_HEAD(&mmu->pagetables_list);
> +
> +    /* map MMU memory */
> +    mmu->mem_map = ioremap_nocache(mmu->mem_start, mmu->mem_size);
> +     if ( !mmu->mem_map )
> +    {
> +        pr_mmu("failed to map memory");
> +        return -EINVAL;
> +    }
> +
> +    pr_mmu("memory map = 0x%pS", _p(mmu->mem_map));
> +
> +    spin_lock_init(&mmu->lock);
> +
> +    return 0;
> +}
> +
> +static int mmu_init_all(void)
> +{
> +    int res;
> +
> +    res = mmu_for_each(mmu_init, 0);
> +    if ( res )
> +    {
> +        printk("%s error during init %d\n", __func__, res);
> +        return res;
> +    }
> +
> +    return 0;
> +}
> +
> +const struct mmio_handler remoteproc_mmio_handler = {
> +    .check_handler = mmu_mmio_check,
> +    .read_handler  = mmu_mmio_read,
> +    .write_handler = mmu_mmio_write,
> +};
> +
> +__initcall(mmu_init_all);
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/xen/remoteproc_iommu.h 
> b/xen/include/xen/remoteproc_iommu.h
> new file mode 100644
> index 0000000..22e2951
> --- /dev/null
> +++ b/xen/include/xen/remoteproc_iommu.h
> @@ -0,0 +1,79 @@
> +/*
> + * xen/include/xen/remoteproc_iommu.h
> + *
> + * Andrii Tseglytskyi <andrii.tseglytskyi@xxxxxxxxxxxxxxx>
> + * Copyright (c) 2014 GlobalLogic
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef _REMOTEPROC_IOMMU_H_
> +#define _REMOTEPROC_IOMMU_H_
> +
> +#include <asm/types.h>
> +
> +#define MMU_SECTION_SIZE(shift)     (1UL << (shift))
> +#define MMU_SECTION_MASK(shift)     (~(MMU_SECTION_SIZE(shift) - 1))
> +
> +/* 4096 first level descriptors for "supersection" and "section" */
> +#define MMU_PTRS_PER_PGD(mmu)       (1UL << (32 - (mmu->pg_data->pgd_shift)))
> +#define MMU_PGD_TABLE_SIZE(mmu)     (MMU_PTRS_PER_PGD(mmu) * sizeof(u32))
> +
> +/* 256 second level descriptors for "small" and "large" pages */
> +#define MMU_PTRS_PER_PTE(mmu)       (1UL << ((mmu->pg_data->pgd_shift) - 
> (mmu->pg_data->pte_shift)))
> +#define MMU_PTE_TABLE_SIZE(mmu)     (MMU_PTRS_PER_PTE(mmu) * sizeof(u32))
> +
> +/* 16 sections in supersection */
> +#define MMU_SECTION_PER_SUPER(mmu)  (1UL << ((mmu->pg_data->super_shift) - 
> (mmu->pg_data->section_shift)))
> +
> +#define MMU_INVALID_ADDRESS ((u32)(-1))
> +
> +#define pr_mmu(fmt, ...) \
> +     printk("%s: %s: "fmt"\n", __func__, ((mmu) ? (mmu)->name : ""), 
> ##__VA_ARGS__)
> +
> +struct pagetable_data {
> +     /* 1st level translation */
> +     u32 pgd_shift;
> +     u32 pte_shift;
> +     u32 super_shift;
> +     u32 section_shift;
> +     /* 2nd level translation */
> +     u32 pte_large_shift;
> +};
> +
> +struct mmu_pagetable {
> +     u32                                     *hyp_pagetable;
> +     u32                                     *kern_pagetable;
> +     u32                                     paddr;
> +     u32                                     maddr;
> +     struct list_head        link_node;
> +     u32                                     page_counter;
> +};
> +
> +struct mmu_info {
> +     const char                              *name;
> +     const struct pagetable_data *pg_data;
> +     /* register where phys pointer to pagetable is stored */
> +     u32                                     *trap_offsets;
> +     paddr_t                         mem_start;
> +     u32                                     mem_size;
> +     spinlock_t                      lock;
> +     struct list_head        pagetables_list;
> +     u32                                     num_traps;
> +     void __iomem            *mem_map;
> +     u32     (*translate_pfunc)(struct mmu_info *, struct mmu_pagetable *);
> +     void (*print_pagetable_pfunc)(struct mmu_info *);
> +};
> +
> +u32 mmu_translate_second_level(struct mmu_info *mmu, struct mmu_pagetable 
> *pgt,
> +                               u32 maddr, u32 hyp_addr);
> +
> +#endif /* _REMOTEPROC_IOMMU_H_ */
> -- 
> 1.7.9.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.