[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 11/13] xen/iommu: smmu: Introduce automatic stream-id-masking



From: Andreas Herrmann <andreas.herrmann@xxxxxxxxxxx>

Try to determine mask/id values that match several stream IDs of a
master device when doing Stream ID matching. Thus the number of used
SMR groups that are required to map all stream IDs of a master device
to a context should be less than the number of SMR groups used so far
(currently one SMR group is used for one stream ID).

Taken from the Linux ML:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-January/226100.html

Changes compare to the Linux ML version:
    - _fls doesn't exist on Xen so use fls
    - Use num_s2crs rather than num_streamids in the arm_smmu_free_smrs.
    This former is the field used to configure SRMS

Cc: Andreas Herrmann <herrmann.der.user@xxxxxxxxxxxxxx>
Signed-off-by: Andreas Herrmann <andreas.herrmann@xxxxxxxxxxx>
Signed-off-by: Julien Grall <julien.grall@xxxxxxxxxx>

---

    This patch was sent on Linux ML back to January 2014. It has never
    been pushed upstream as it was only useful for Calxeda.

    The SMMU used to protect the SATA on this platform has more stream
    id than the number of stream matching registers. Therefore we have
    to use stream id masking to configure correctly all the stream IDs.

    Note that SMMU bindings for Calxeda is not upstreamed.
---
 xen/drivers/passthrough/arm/smmu.c | 177 +++++++++++++++++++++++++++++++++----
 1 file changed, 162 insertions(+), 15 deletions(-)

diff --git a/xen/drivers/passthrough/arm/smmu.c 
b/xen/drivers/passthrough/arm/smmu.c
index bfc1069..8a6514f 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -43,6 +43,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/bitops.h>
 
 #include <linux/amba/bus.h>
 
@@ -346,8 +347,10 @@ struct arm_smmu_smr {
 };
 
 struct arm_smmu_master_cfg {
-       int                             num_streamids;
+       u32                             num_streamids;
        u16                             streamids[MAX_MASTER_STREAMIDS];
+       int                             num_s2crs;
+
        struct arm_smmu_smr             *smrs;
 };
 
@@ -392,6 +395,9 @@ struct arm_smmu_device {
        u32                             num_context_irqs;
        unsigned int                    *irqs;
 
+       u32                             smr_mask_mask;
+       u32                             smr_id_mask;
+
        struct list_head                list;
        struct rb_root                  masters;
 };
@@ -1113,6 +1119,137 @@ static void arm_smmu_free_pgtables(struct 
arm_smmu_domain *smmu_domain)
        kfree(pgd_base);
 }
 
+/*
+ * For a given set N of 2**order different stream IDs (no duplicates
+ * please!) we determine values mask and id such that
+ *
+ * (1)          (x & mask) == id
+ *
+ * for each stream ID x from the given set N.
+ *
+ * If the number of bits that are set in mask equals n, then there
+ * exist 2**n different values y for which
+ *
+ * (2)          (y & mask) == id
+ *
+ * Thus if n equals order we know that for the calculated mask and id
+ * values there are exactly 2**order == 2**n stream IDs for which (1)
+ * is true. And we finally can use mask and id to configure an SMR to
+ * match all stream IDs in the set N.
+ */
+static int determine_smr_mask(struct arm_smmu_device *smmu,
+                             struct arm_smmu_master_cfg *cfg,
+                             struct arm_smmu_smr *smr, int start, int order)
+{
+       u16 i, zero_bits_mask, one_bits_mask, const_mask;
+       int nr;
+
+       nr = 1 << order;
+
+       if (nr == 1) {
+               /* no mask, use streamid to match and be done with it */
+               smr->mask = 0;
+               smr->id = cfg->streamids[start];
+               return 0;
+       }
+
+       zero_bits_mask = 0;
+       one_bits_mask = 0xffff;
+       for (i = start; i < start + nr; i++) {
+               zero_bits_mask |= cfg->streamids[i];    /* const 0 bits */
+               one_bits_mask &= cfg->streamids[i];     /* const 1 bits */
+       }
+       zero_bits_mask = ~zero_bits_mask;
+
+       /* bits having constant values (either 0 or 1) */
+       const_mask = zero_bits_mask | one_bits_mask;
+
+       i = hweight16(~const_mask);
+       if (i == order) {
+               /*
+                * We have found a mask/id pair that matches exactly
+                * nr = 2**order stream IDs which we used for its
+                * calculation.
+                */
+               smr->mask = ~const_mask;
+               smr->id = one_bits_mask;
+       } else {
+               /*
+                * No usable mask/id pair for this set of streamids.
+                * If i > order then mask/id would match more than nr
+                * streamids.
+                * If i < order then mask/id would match less than nr
+                * streamids. (In this case we potentially have used
+                * some duplicate streamids for the calculation.)
+                */
+               return 1;
+       }
+
+       if (((smr->mask & smmu->smr_mask_mask) != smr->mask) ||
+               ((smr->id & smmu->smr_id_mask) != smr->id))
+               /* insufficient number of mask/id bits */
+               return 1;
+
+       return 0;
+}
+
+static int determine_smr_mapping(struct arm_smmu_device *smmu,
+                                struct arm_smmu_master_cfg *cfg,
+                                struct arm_smmu_smr *smrs, int max_smrs)
+{
+       int nr_sid, nr, i, bit, start;
+
+       /*
+        * This function is called only once -- when a master is added
+        * to a domain. If cfg->num_s2crs != 0 then this master
+        * was already added to a domain.
+        */
+       if (cfg->num_s2crs)
+               return -EINVAL;
+
+       start = nr = 0;
+       nr_sid = cfg->num_streamids;
+       do {
+               /*
+                * largest power-of-2 number of streamids for which to
+                * determine a usable mask/id pair for stream matching
+                */
+               bit = fls(nr_sid) - 1;
+               if (bit < 0)
+                       return 0;
+
+               /*
+                * iterate over power-of-2 numbers to determine
+                * largest possible mask/id pair for stream matching
+                * of next 2**i streamids
+                */
+               for (i = bit; i >= 0; i--) {
+                       if (!determine_smr_mask(smmu, cfg,
+                                               &smrs[cfg->num_s2crs],
+                                               start, i))
+                               break;
+               }
+
+               if (i < 0)
+                       goto out;
+
+               nr = 1 << i;
+               nr_sid -= nr;
+               start += nr;
+               cfg->num_s2crs++;
+       } while (cfg->num_s2crs <= max_smrs);
+
+out:
+       if (nr_sid) {
+               /* not enough mapping groups available */
+               cfg->num_s2crs = 0;
+               return -ENOSPC;
+       }
+
+       return 0;
+}
+
+
 static void arm_smmu_domain_destroy(struct iommu_domain *domain)
 {
        struct arm_smmu_domain *smmu_domain = domain->priv;
@@ -1129,7 +1266,7 @@ static void arm_smmu_domain_destroy(struct iommu_domain 
*domain)
 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
                                          struct arm_smmu_master_cfg *cfg)
 {
-       int i;
+       int i, max_smrs, ret;
        struct arm_smmu_smr *smrs;
        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
@@ -1139,31 +1276,32 @@ static int arm_smmu_master_configure_smrs(struct 
arm_smmu_device *smmu,
        if (cfg->smrs)
                return -EEXIST;
 
-       smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
+       max_smrs = min(smmu->num_mapping_groups, cfg->num_streamids);
+       smrs = kmalloc(sizeof(*smrs) * max_smrs, GFP_KERNEL);
        if (!smrs) {
                dev_err(smmu->dev, "failed to allocate %d SMRs\n",
-                       cfg->num_streamids);
+                       max_smrs);
                return -ENOMEM;
        }
 
+       ret = determine_smr_mapping(smmu, cfg, smrs, max_smrs);
+       if (ret)
+               goto err_free_smrs;
+
        /* Allocate the SMRs on the SMMU */
-       for (i = 0; i < cfg->num_streamids; ++i) {
+       for (i = 0; i < cfg->num_s2crs; ++i) {
                int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
                                                  smmu->num_mapping_groups);
                if (IS_ERR_VALUE(idx)) {
                        dev_err(smmu->dev, "failed to allocate free SMR\n");
-                       goto err_free_smrs;
+                       goto err_free_bitmap;
                }
 
-               smrs[i] = (struct arm_smmu_smr) {
-                       .idx    = idx,
-                       .mask   = 0, /* We don't currently share SMRs */
-                       .id     = cfg->streamids[i],
-               };
+               smrs[i].idx = idx;
        }
 
        /* It worked! Now, poke the actual hardware */
-       for (i = 0; i < cfg->num_streamids; ++i) {
+       for (i = 0; i < cfg->num_s2crs; ++i) {
                u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
                          smrs[i].mask << SMR_MASK_SHIFT;
                writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
@@ -1172,9 +1310,11 @@ static int arm_smmu_master_configure_smrs(struct 
arm_smmu_device *smmu,
        cfg->smrs = smrs;
        return 0;
 
-err_free_smrs:
+err_free_bitmap:
        while (--i >= 0)
                __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
+       cfg->num_s2crs = 0;
+err_free_smrs:
        kfree(smrs);
        return -ENOSPC;
 }
@@ -1190,13 +1330,15 @@ static void arm_smmu_master_free_smrs(struct 
arm_smmu_device *smmu,
                return;
 
        /* Invalidate the SMRs before freeing back to the allocator */
-       for (i = 0; i < cfg->num_streamids; ++i) {
+       for (i = 0; i < cfg->num_s2crs; ++i) {
                u8 idx = smrs[i].idx;
 
                writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
                __arm_smmu_free_bitmap(smmu->smr_map, idx);
        }
 
+       cfg->num_s2crs = 0;
+
        cfg->smrs = NULL;
        kfree(smrs);
 }
@@ -1213,12 +1355,15 @@ static int arm_smmu_domain_add_master(struct 
arm_smmu_domain *smmu_domain,
        if (ret)
                return ret == -EEXIST ? 0 : ret;
 
-       for (i = 0; i < cfg->num_streamids; ++i) {
+       if (!cfg->num_s2crs)
+               cfg->num_s2crs = cfg->num_streamids;
+       for (i = 0; i < cfg->num_s2crs; ++i) {
                u32 idx, s2cr;
 
                idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
                s2cr = S2CR_TYPE_TRANS |
                       (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
+               dev_dbg(smmu->dev, "S2CR%d: 0x%x\n", idx, s2cr);
                writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
        }
 
@@ -1890,6 +2035,8 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)
                                mask, sid);
                        return -ENODEV;
                }
+               smmu->smr_mask_mask = mask;
+               smmu->smr_id_mask = sid;
 
                dev_notice(smmu->dev,
                           "\tstream matching with %u register groups, mask 
0x%x",
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.