[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v1 2/7] iommu/arm: ipmmu-vmsa: Add Xen changes for main driver



Hi,

On 26/07/17 16:09, Oleksandr Tyshchenko wrote:
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>

Modify the Linux IPMMU driver to be functional inside Xen.
All devices within a single Xen domain must use the same
IOMMU context no matter what IOMMU domains they are attached to.
This is the main difference between drivers in Linux
and Xen. Having 8 separate contexts allow us to passthrough
devices to 8 guest domain at the same time.

Also wrap following code in #if 0:
- All DMA related stuff
- Linux PM callbacks
- Driver remove callback
- iommu_group management

Maybe, it would be more correct to move different Linux2Xen wrappers,
define-s, helpers from IPMMU-VMSA and SMMU to some common file
before introducing IPMMU-VMSA patch series. And this common file
might be reused by possible future IOMMUs on ARM.

Yes please if we go forward with the Linux way.


Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
CC: Julien Grall <julien.grall@xxxxxxx>
CC: Stefano Stabellini <sstabellini@xxxxxxxxxx>
---
 xen/drivers/passthrough/arm/ipmmu-vmsa.c | 984 +++++++++++++++++++++++++++++--
 1 file changed, 948 insertions(+), 36 deletions(-)

diff --git a/xen/drivers/passthrough/arm/ipmmu-vmsa.c 
b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
index 2b380ff..e54b507 100644
--- a/xen/drivers/passthrough/arm/ipmmu-vmsa.c
+++ b/xen/drivers/passthrough/arm/ipmmu-vmsa.c
@@ -6,31 +6,212 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; version 2 of the License.
+ *
+ * Based on Linux drivers/iommu/ipmmu-vmsa.c
+ * => commit f4747eba89c9b5d90fdf0a5458866283c47395d8
+ * (iommu/ipmmu-vmsa: Restrict IOMMU Domain Geometry to 32-bit address space)
+ *
+ * Xen modification:
+ * Oleksandr Tyshchenko <Oleksandr_Tyshchenko@xxxxxxxx>
+ * Copyright (C) 2016-2017 EPAM Systems Inc.
  */

-#include <linux/bitmap.h>
-#include <linux/delay.h>
-#include <linux/dma-iommu.h>
-#include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/iommu.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_iommu.h>
-#include <linux/platform_device.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-
-#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
-#include <asm/dma-iommu.h>
-#include <asm/pgalloc.h>
-#endif
+#include <xen/config.h>
+#include <xen/delay.h>
+#include <xen/errno.h>
+#include <xen/err.h>
+#include <xen/irq.h>
+#include <xen/lib.h>
+#include <xen/list.h>
+#include <xen/mm.h>
+#include <xen/vmap.h>
+#include <xen/rbtree.h>
+#include <xen/sched.h>
+#include <xen/sizes.h>
+#include <asm/atomic.h>
+#include <asm/device.h>
+#include <asm/io.h>
+#include <asm/platform.h>

 #include "io-pgtable.h"

+/* TODO:
+ * 1. Optimize xen_domain->lock usage.
+ * 2. Show domain_id in every printk which is per Xen domain.
+ *
+ */
+
+/***** Start of Xen specific code *****/
+
+#define IOMMU_READ     (1 << 0)
+#define IOMMU_WRITE    (1 << 1)
+#define IOMMU_CACHE    (1 << 2) /* DMA cache coherency */
+#define IOMMU_NOEXEC   (1 << 3)
+#define IOMMU_MMIO     (1 << 4) /* e.g. things like MSI doorbells */
+
+#define __fls(x) (fls(x) - 1)
+#define __ffs(x) (ffs(x) - 1)
+
+#define IO_PGTABLE_QUIRK_ARM_NS                BIT(0)
+
+#define ioread32 readl
+#define iowrite32 writel
+
+#define dev_info dev_notice
+
+#define devm_request_irq(unused, irq, func, flags, name, dev) \
+       request_irq(irq, flags, func, name, dev)
+
+/* Alias to Xen device tree helpers */
+#define device_node dt_device_node
+#define of_phandle_args dt_phandle_args
+#define of_device_id dt_device_match
+#define of_match_node dt_match_node
+#define of_parse_phandle_with_args dt_parse_phandle_with_args
+#define of_find_property dt_find_property
+#define of_count_phandle_with_args dt_count_phandle_with_args
+
+/* Xen: Helpers to get device MMIO and IRQs */
+struct resource
+{
+       u64 addr;
+       u64 size;
+       unsigned int type;
+};
+
+#define resource_size(res) (res)->size;
+
+#define platform_device dt_device_node
+
+#define IORESOURCE_MEM 0
+#define IORESOURCE_IRQ 1
+
+static struct resource *platform_get_resource(struct platform_device *pdev,
+                                             unsigned int type,
+                                             unsigned int num)
+{
+       /*
+        * The resource is only used between 2 calls of platform_get_resource.
+        * It's quite ugly but it's avoid to add too much code in the part
+        * imported from Linux
+        */
+       static struct resource res;
+       int ret = 0;
+
+       res.type = type;
+
+       switch (type) {
+       case IORESOURCE_MEM:
+               ret = dt_device_get_address(pdev, num, &res.addr, &res.size);
+
+               return ((ret) ? NULL : &res);
+
+       case IORESOURCE_IRQ:
+               ret = platform_get_irq(pdev, num);
+               if (ret < 0)
+                       return NULL;
+
+               res.addr = ret;
+               res.size = 1;
+
+               return &res;
+
+       default:
+               return NULL;
+       }
+}
+
+enum irqreturn {
+       IRQ_NONE        = (0 << 0),
+       IRQ_HANDLED     = (1 << 0),
+};
+
+typedef enum irqreturn irqreturn_t;
+
+/* Device logger functions */
+#define dev_print(dev, lvl, fmt, ...)                                          
\
+        printk(lvl "ipmmu: %s: " fmt, dt_node_full_name(dev_to_dt(dev)), ## 
__VA_ARGS__)
+
+#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## 
__VA_ARGS__)
+#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## 
__VA_ARGS__)
+#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## 
__VA_ARGS__)
+#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
+
+#define dev_err_ratelimited(dev, fmt, ...)                                     
\
+        dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
+
+#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
+
+/* Alias to Xen allocation helpers */
+#define kfree xfree
+#define kmalloc(size, flags)           _xmalloc(size, sizeof(void *))
+#define kzalloc(size, flags)           _xzalloc(size, sizeof(void *))
+#define devm_kzalloc(dev, size, flags) _xzalloc(size, sizeof(void *))
+#define kmalloc_array(size, n, flags)  _xmalloc_array(size, sizeof(void *), n)
+#define kcalloc(size, n, flags)                _xzalloc_array(size, 
sizeof(void *), n)
+
+static void __iomem *devm_ioremap_resource(struct device *dev,
+                                          struct resource *res)
+{
+       void __iomem *ptr;
+
+       if (!res || res->type != IORESOURCE_MEM) {
+               dev_err(dev, "Invalid resource\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       ptr = ioremap_nocache(res->addr, res->size);
+       if (!ptr) {
+               dev_err(dev,
+                       "ioremap failed (addr 0x%"PRIx64" size 0x%"PRIx64")\n",
+                       res->addr, res->size);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return ptr;
+}
+
+/* Xen doesn't handle IOMMU fault */
+#define report_iommu_fault(...)        1
+
+#define MODULE_DEVICE_TABLE(type, name)
+#define module_param_named(name, value, type, perm)
+#define MODULE_PARM_DESC(_parm, desc)
+
+/* Xen: Dummy iommu_domain */
+struct iommu_domain
+{
+       atomic_t ref;
+       /* Used to link iommu_domain contexts for a same domain.
+        * There is at least one per-IPMMU to used by the domain.
+        * */
+       struct list_head                list;
+};
+
+/* Xen: Describes informations required for a Xen domain */
+struct ipmmu_vmsa_xen_domain {
+       spinlock_t                      lock;
+       /* List of context (i.e iommu_domain) associated to this domain */
+       struct list_head                contexts;
+       struct iommu_domain             *base_context;
+};
+
+/*
+ * Xen: Information about each device stored in dev->archdata.iommu
+ *
+ * On Linux the dev->archdata.iommu only stores the arch specific information,
+ * but, on Xen, we also have to store the iommu domain.
+ */
+struct ipmmu_vmsa_xen_device {
+       struct iommu_domain *domain;
+       struct ipmmu_vmsa_archdata *archdata;
+};
+
+#define dev_iommu(dev) ((struct ipmmu_vmsa_xen_device *)dev->archdata.iommu)
+#define dev_iommu_domain(dev) (dev_iommu(dev)->domain)
+
+/***** Start of Linux IPMMU code *****/
+
 #define IPMMU_CTX_MAX 8

 struct ipmmu_features {
@@ -64,7 +245,9 @@ struct ipmmu_vmsa_device {
        struct hw_register *reg_backup[IPMMU_CTX_MAX];
 #endif

+#if 0 /* Xen: Not needed */
        struct dma_iommu_mapping *mapping;
+#endif
 };

 struct ipmmu_vmsa_domain {
@@ -77,6 +260,9 @@ struct ipmmu_vmsa_domain {

        unsigned int context_id;
        spinlock_t lock;                        /* Protects mappings */
+
+       /* Xen: Domain associated to this configuration */
+       struct domain *d;
 };

 struct ipmmu_vmsa_archdata {
@@ -94,14 +280,20 @@ struct ipmmu_vmsa_archdata {
 static DEFINE_SPINLOCK(ipmmu_devices_lock);
 static LIST_HEAD(ipmmu_devices);

+#if 0 /* Xen: Not needed */
 static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
 static LIST_HEAD(ipmmu_slave_devices);
+#endif

 static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
 {
        return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
 }

+/*
+ * Xen: Rewrite Linux helpers to manipulate with archdata on Xen.
+ */
+#if 0
 #if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
 static struct ipmmu_vmsa_archdata *to_archdata(struct device *dev)
 {
@@ -120,6 +312,16 @@ static void set_archdata(struct device *dev, struct 
ipmmu_vmsa_archdata *p)
 {
 }
 #endif
+#else
+static struct ipmmu_vmsa_archdata *to_archdata(struct device *dev)
+{
+       return dev_iommu(dev)->archdata;
+}
+static void set_archdata(struct device *dev, struct ipmmu_vmsa_archdata *p)
+{
+       dev_iommu(dev)->archdata = p;
+}
+#endif

 #define TLB_LOOP_TIMEOUT               100     /* 100us */

@@ -355,6 +557,10 @@ static struct hw_register *root_pgtable[IPMMU_CTX_MAX] = {

 static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu)
 {
+       /* Xen: Fix */

Hmmm. Can we get a bit more details?

+       if (!mmu)
+               return false;
+
        if (mmu->features->has_cache_leaf_nodes)
                return mmu->is_leaf ? false : true;
        else
@@ -405,14 +611,28 @@ static void ipmmu_ctx_write(struct ipmmu_vmsa_domain 
*domain, unsigned int reg,
        ipmmu_write(domain->root, domain->context_id * IM_CTX_SIZE + reg, data);
 }

-static void ipmmu_ctx_write2(struct ipmmu_vmsa_domain *domain, unsigned int 
reg,
+/* Xen: Write the context for cache IPMMU only. */

Same here. Why does it need to be different with Xen?

+static void ipmmu_ctx_write1(struct ipmmu_vmsa_domain *domain, unsigned int 
reg,
                             u32 data)
 {
        if (domain->mmu != domain->root)
-               ipmmu_write(domain->mmu,
-                           domain->context_id * IM_CTX_SIZE + reg, data);
+               ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + 
reg, data);
+}

-       ipmmu_write(domain->root, domain->context_id * IM_CTX_SIZE + reg, data);
+/*
+ * Xen: Write the context for both root IPMMU and all cache IPMMUs
+ * that assigned to this Xen domain.
+ */
+static void ipmmu_ctx_write2(struct ipmmu_vmsa_domain *domain, unsigned int 
reg,
+                            u32 data)
+{
+       struct ipmmu_vmsa_xen_domain *xen_domain = 
dom_iommu(domain->d)->arch.priv;
+       struct iommu_domain *io_domain;
+
+       list_for_each_entry(io_domain, &xen_domain->contexts, list)
+               ipmmu_ctx_write1(to_vmsa_domain(io_domain), reg, data);
+
+       ipmmu_ctx_write(domain, reg, data);
 }

 /* 
-----------------------------------------------------------------------------
@@ -488,6 +708,10 @@ static void ipmmu_tlb_flush_all(void *cookie)
 {
        struct ipmmu_vmsa_domain *domain = cookie;

+       /* Xen: Just return if context_id has non-existent value */

Same here.

+       if (domain->context_id >= domain->root->num_ctx)
+               return;
+
        ipmmu_tlb_invalidate(domain);
 }

@@ -549,8 +773,10 @@ static int ipmmu_domain_init_context(struct 
ipmmu_vmsa_domain *domain)
        domain->cfg.ias = 32;
        domain->cfg.oas = 40;
        domain->cfg.tlb = &ipmmu_gather_ops;
+#if 0 /* Xen: Not needed */
        domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
        domain->io_domain.geometry.force_aperture = true;
+#endif
        /*
         * TODO: Add support for coherent walk through CCI with DVM and remove
         * cache handling. For now, delegate it to the io-pgtable code.
@@ -562,6 +788,9 @@ static int ipmmu_domain_init_context(struct 
ipmmu_vmsa_domain *domain)
        if (!domain->iop)
                return -EINVAL;

+       /* Xen: Initialize context_id with non-existent value */
+       domain->context_id = domain->root->num_ctx;

Why do you need to do that for Xen? Overall I think you need a bit more explanation of why you need those changes for Xen compare to the Linux driver.

+
        /*
         * Find an unused context.
         */
@@ -578,6 +807,11 @@ static int ipmmu_domain_init_context(struct 
ipmmu_vmsa_domain *domain)

        /* TTBR0 */
        ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
+
+       /* Xen: */
+       dev_notice(domain->root->dev, "d%d: Set IPMMU context %u (pgd 
0x%"PRIx64")\n",
+                       domain->d->domain_id, domain->context_id, ttbr);

If you want to keep driver close to Linux, then you need to avoid unecessary change.

+
        ipmmu_ctx_write(domain, IMTTLBR0, ttbr);
        ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32);

@@ -616,8 +850,9 @@ static int ipmmu_domain_init_context(struct 
ipmmu_vmsa_domain *domain)
         * translation table format doesn't use TEX remapping. Don't enable AF
         * software management as we have no use for it. Flush the TLB as
         * required when modifying the context registers.
+        * Xen: Enable the context for the root IPMMU only.
         */
-       ipmmu_ctx_write2(domain, IMCTR,
+       ipmmu_ctx_write(domain, IMCTR,
                         IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);

        return 0;
@@ -638,13 +873,18 @@ static void ipmmu_domain_free_context(struct 
ipmmu_vmsa_device *mmu,

 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 {
+       /* Xen: Just return if context_id has non-existent value */
+       if (domain->context_id >= domain->root->num_ctx)
+               return;
+
        /*
         * Disable the context. Flush the TLB as required when modifying the
         * context registers.
         *
         * TODO: Is TLB flush really needed ?
+        * Xen: Disable the context for the root IPMMU only.
         */
-       ipmmu_ctx_write2(domain, IMCTR, IMCTR_FLUSH);
+       ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
        ipmmu_tlb_sync(domain);

 #ifdef CONFIG_RCAR_DDR_BACKUP
@@ -652,12 +892,16 @@ static void ipmmu_domain_destroy_context(struct 
ipmmu_vmsa_domain *domain)
 #endif

        ipmmu_domain_free_context(domain->root, domain->context_id);
+
+       /* Xen: Initialize context_id with non-existent value */
+       domain->context_id = domain->root->num_ctx;
 }

 /* 
-----------------------------------------------------------------------------
  * Fault Handling
  */

+/* Xen: Show domain_id in every printk */
 static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
 {
        const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF;
@@ -681,11 +925,11 @@ static irqreturn_t ipmmu_domain_irq(struct 
ipmmu_vmsa_domain *domain)

        /* Log fatal errors. */
        if (status & IMSTR_MHIT)
-               dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n",
-                                   iova);
+               dev_err_ratelimited(mmu->dev, "d%d: Multiple TLB hits 
@0x%08x\n",
+                               domain->d->domain_id, iova);
        if (status & IMSTR_ABORT)
-               dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n",
-                                   iova);
+               dev_err_ratelimited(mmu->dev, "d%d: Page Table Walk Abort 
@0x%08x\n",
+                               domain->d->domain_id, iova);

        if (!(status & (IMSTR_PF | IMSTR_TF)))
                return IRQ_NONE;
@@ -700,8 +944,8 @@ static irqreturn_t ipmmu_domain_irq(struct 
ipmmu_vmsa_domain *domain)
                return IRQ_HANDLED;

        dev_err_ratelimited(mmu->dev,
-                           "Unhandled fault: status 0x%08x iova 0x%08x\n",
-                           status, iova);
+                       "d%d: Unhandled fault: status 0x%08x iova 0x%08x\n",
+                       domain->d->domain_id, status, iova);

        return IRQ_HANDLED;
 }
@@ -730,6 +974,16 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
        return status;
 }

+/* Xen: Interrupt handlers wrapper */
+static void ipmmu_irq_xen(int irq, void *dev,
+                                     struct cpu_user_regs *regs)
+{
+       ipmmu_irq(irq, dev);
+}
+
+#define ipmmu_irq ipmmu_irq_xen
+
+#if 0 /* Xen: Not needed */
 /* 
-----------------------------------------------------------------------------
  * IOMMU Operations
  */
@@ -759,6 +1013,7 @@ static void ipmmu_domain_free(struct iommu_domain 
*io_domain)
        free_io_pgtable_ops(domain->iop);
        kfree(domain);
 }
+#endif

 static int ipmmu_attach_device(struct iommu_domain *io_domain,
                               struct device *dev)
@@ -787,7 +1042,20 @@ static int ipmmu_attach_device(struct iommu_domain 
*io_domain,
                /* The domain hasn't been used yet, initialize it. */
                domain->mmu = mmu;
                domain->root = root;
+
+/*
+ * Xen: We have already initialized and enabled context for root IPMMU
+ * for this Xen domain. Enable context for given cache IPMMU only.
+ * Flush the TLB as required when modifying the context registers.

Why?

+ */
+#if 0
                ret = ipmmu_domain_init_context(domain);
+#endif
+               ipmmu_ctx_write1(domain, IMCTR,
+                               ipmmu_ctx_read(domain, IMCTR) | IMCTR_FLUSH);
+
+               dev_info(dev, "Using IPMMU context %u\n", domain->context_id);
+#if 0 /* Xen: Not needed */
                if (ret < 0) {
                        dev_err(dev, "Unable to initialize IPMMU context\n");
                        domain->mmu = NULL;
@@ -795,6 +1063,7 @@ static int ipmmu_attach_device(struct iommu_domain 
*io_domain,
                        dev_info(dev, "Using IPMMU context %u\n",
                                 domain->context_id);
                }
+#endif
        } else if (domain->mmu != mmu) {
                /*
                 * Something is wrong, we can't attach two devices using
@@ -834,6 +1103,14 @@ static void ipmmu_detach_device(struct iommu_domain 
*io_domain,
         */
 }

+/*
+ * Xen: The current implementation of these callbacks is insufficient for us
+ * since they are intended to be called from Linux IOMMU core that
+ * has already done all required actions such as doing various checks,
+ * splitting into memory block the hardware supports and so on.

Can you expand it here? Why can't our IOMMU framework could do that?

IHMO, if we want to get driver from Linux, we need to get an interface very close to it. Otherwise it is not worth it because you would have to implement for each IOMMU.

My overall feeling at the moment is Xen is not ready to welcome this driver directly from Linux. This is also a BSP driver, so no thorough review done by the community.

I have been told the BSP driver was in pretty bad state, so I think we really need to weight pros and cons of using it.

Cheers,

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.