|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 6/7] x86: add xen_iommu_ops to modify IOMMU mappings
This patch adds iommu_ops to add (map) or remove (unmap) frames in the
domain's IOMMU mappings.
Currently the flags value for each op must include the
XEN_IOMMUOP_map/unmap_all flag as the implementation does not yet support
per-device mappings. The sbdf field of each hypercall is accordingly
ignored.
Mappings added by the map operation are tracked and only those mappings
may be removed by a subsequent unmap operation. Frames are specified by the
owning domain and GFN. It is, of course, permissable for a domain to map
and unmap its own frames using DOMID_SELF.
NOTE: The owning domain and GFN must also be specified in the unmap
operation, as well as the DFN, so that they can be cross-checked
with the existent mapping.
Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Julien Grall <julien.grall@xxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
v7:
- Get rid of explicit flush xen_iommu_op. Flush at the end of a batch
instead.
v6:
- Add placeholder sbdf field and flag to control scope of map, unmap and
flush.
v4:
- Fixed logic inversion when checking return of iommu_unmap_page().
v3:
- Add type pinning.
v2:
- Heavily re-worked in v2, including explicit tracking of mappings.
This avoids the need to clear non-reserved mappings from IOMMU
at start of day, which would be prohibitively slow on a large host.
---
xen/common/iommu_op.c | 280 ++++++++++++++++++++++++++++++++++++++--
xen/drivers/passthrough/iommu.c | 2 +
xen/include/public/iommu_op.h | 98 ++++++++++++++
xen/include/xen/iommu.h | 6 +
xen/include/xlat.lst | 3 +
5 files changed, 376 insertions(+), 13 deletions(-)
diff --git a/xen/common/iommu_op.c b/xen/common/iommu_op.c
index 9d914a67db..0876414df5 100644
--- a/xen/common/iommu_op.c
+++ b/xen/common/iommu_op.c
@@ -78,7 +78,205 @@ static int iommu_op_query_reserved(struct
xen_iommu_op_query_reserved *op)
return 0;
}
-static void iommu_op(xen_iommu_op_t *op)
+static int iommu_op_enable_modification(
+ struct xen_iommu_op_enable_modification *op)
+{
+ struct domain *currd = current->domain;
+ struct domain_iommu *iommu = dom_iommu(currd);
+ const struct iommu_ops *ops = iommu->platform_ops;
+ int rc;
+
+ if ( op->cap || op->pad )
+ return -EINVAL;
+
+ spin_lock(&iommu->lock);
+
+ /* Has modification already been enabled? */
+ rc = 0;
+ if ( iommu->domain_control )
+ goto unlock;
+
+ /*
+ * Modificaton of IOMMU mappings cannot be put under domain control if:
+ * - this domain does not have IOMMU page tables, or
+ * - HAP is enabled for this domain and the IOMMU shares the tables.
+ */
+ rc = -EACCES;
+ if ( !has_iommu_pt(currd) || iommu_use_hap_pt(currd) )
+ goto unlock;
+
+ /*
+ * The IOMMU implementation must provide the lookup method if
+ * modification of the mappings is to be supported.
+ */
+ rc = -EOPNOTSUPP;
+ if ( !ops->lookup_page )
+ goto unlock;
+
+ rc = 0;
+ iommu->need_sync = false; /* Disable synchronization, if enabled */
+ iommu->domain_control = true; /* Enable control */
+
+ unlock:
+ /*
+ * XEN_IOMMU_CAP_per_device_mappings is not supported yet so we can
+ * leave op->cap alone.
+ */
+
+ spin_unlock(&iommu->lock);
+
+ return rc;
+}
+
+static int iommuop_map(struct xen_iommu_op_map *op)
+{
+ struct domain *d, *currd = current->domain;
+ struct domain_iommu *iommu = dom_iommu(currd);
+ bool readonly = op->flags & XEN_IOMMUOP_map_readonly;
+ dfn_t dfn = _dfn(op->dfn);
+ p2m_type_t p2mt;
+ struct page_info *page;
+ mfn_t ignore;
+ unsigned int flags;
+ int rc;
+
+ if ( op->pad || (op->flags & ~(XEN_IOMMUOP_map_all |
+ XEN_IOMMUOP_map_readonly)) )
+ return -EINVAL;
+
+ if ( !iommu->domain_control )
+ return -EOPNOTSUPP;
+
+ /* Per-device mapping not yet supported */
+ if ( !(op->flags & XEN_IOMMUOP_map_all) )
+ return -EINVAL;
+
+ /* Check whether the specified DFN falls in a reserved region */
+ if ( rangeset_contains_singleton(iommu->reserved_ranges, dfn_x(dfn)) )
+ return -EINVAL;
+
+ d = rcu_lock_domain_by_any_id(op->domid);
+ if ( !d )
+ return -ESRCH;
+
+ rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
+ if ( rc )
+ goto unlock_domain;
+
+ rc = -EINVAL;
+ if ( p2mt != p2m_ram_rw ||
+ (!readonly && !get_page_type(page, PGT_writable_page)) )
+ {
+ put_page(page);
+ goto unlock_domain;
+ }
+
+ spin_lock(&iommu->lock);
+
+ rc = iommu_lookup_page(currd, dfn, &ignore, &flags);
+
+ /* Treat a non-reference-counted entry as non-existent */
+ if ( !rc )
+ rc = !(flags & IOMMUF_refcount) ? -ENOENT : -EEXIST;
+
+ if ( rc != -ENOENT )
+ goto unlock_iommu;
+
+ flags = IOMMUF_readable | IOMMUF_refcount;
+ if ( !readonly )
+ flags |= IOMMUF_writable;
+
+ rc = iommu_map_page_nocrash(currd, dfn, page_to_mfn(page), flags);
+
+ unlock_iommu:
+ spin_unlock(&iommu->lock);
+
+ if ( rc ) /* retain references if mapping is successful */
+ {
+ if ( !readonly )
+ put_page_type(page);
+ put_page(page);
+ }
+
+ unlock_domain:
+ rcu_unlock_domain(d);
+ return rc;
+}
+
+static int iommuop_unmap(struct xen_iommu_op_unmap *op)
+{
+ struct domain *d, *currd = current->domain;
+ struct domain_iommu *iommu = dom_iommu(currd);
+ dfn_t dfn = _dfn(op->dfn);
+ mfn_t mfn;
+ unsigned int flags;
+ bool readonly;
+ p2m_type_t p2mt;
+ struct page_info *page;
+ int rc;
+
+ if ( op->pad ||
+ (op->flags & ~XEN_IOMMUOP_unmap_all) )
+ return -EINVAL;
+
+ if ( !iommu->domain_control )
+ return -EOPNOTSUPP;
+
+ /* Per-device unmapping not yet supported */
+ if ( !(op->flags & XEN_IOMMUOP_unmap_all) )
+ return -EINVAL;
+
+ d = rcu_lock_domain_by_any_id(op->domid);
+ if ( !d )
+ return -ESRCH;
+
+ spin_lock(&iommu->lock);
+
+ rc = iommu_lookup_page(currd, dfn, &mfn, &flags);
+
+ /* Treat a non-reference-counted entry as non-existent */
+ if ( !rc )
+ rc = !(flags & IOMMUF_refcount) ? -ENOENT : 0;
+
+ if ( rc )
+ goto unlock;
+
+ readonly = !(flags & IOMMUF_writable);
+
+ /* Make sure the mapped frame matches */
+ rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
+ if ( rc )
+ goto unlock;
+
+ rc = !mfn_eq(mfn, page_to_mfn(page)) ? -EINVAL : 0;
+
+ /* Release reference taken above */
+ put_page(page);
+
+ if ( rc )
+ goto unlock;
+
+ /* Release references taken in map */
+ if ( !readonly )
+ put_page_type(page);
+ put_page(page);
+
+ /*
+ * This really should not fail. If it does, there is an implicit
+ * domain_crash() (except in the case of the hardware domain) since
+ * there is not a lot else that can be done to ensure the released
+ * page can be safely re-used.
+ */
+ rc = iommu_unmap_page(currd, dfn);
+
+ unlock:
+ spin_unlock(&iommu->lock);
+ rcu_unlock_domain(d);
+
+ return rc;
+}
+
+static void iommu_op(xen_iommu_op_t *op, bool *need_flush)
{
switch ( op->op )
{
@@ -86,13 +284,30 @@ static void iommu_op(xen_iommu_op_t *op)
op->status = iommu_op_query_reserved(&op->u.query_reserved);
break;
+ case XEN_IOMMUOP_enable_modification:
+ op->status =
+ iommu_op_enable_modification(&op->u.enable_modification);
+ break;
+
+ case XEN_IOMMUOP_map:
+ op->status = iommuop_map(&op->u.map);
+ if ( !op->status )
+ *need_flush = true;
+ break;
+
+ case XEN_IOMMUOP_unmap:
+ op->status = iommuop_unmap(&op->u.unmap);
+ if ( !op->status )
+ *need_flush = true;
+ break;
+
default:
op->status = -EOPNOTSUPP;
break;
}
}
-int do_one_iommu_op(xen_iommu_op_buf_t *buf)
+int do_one_iommu_op(xen_iommu_op_buf_t *buf, bool *need_flush)
{
const XEN_GUEST_HANDLE(xen_iommu_op_t) h =
guest_handle_cast(buf->h, xen_iommu_op_t);
@@ -101,6 +316,10 @@ int do_one_iommu_op(xen_iommu_op_buf_t *buf)
static const size_t op_size[] = {
[XEN_IOMMUOP_query_reserved] =
sizeof(struct xen_iommu_op_query_reserved),
+ [XEN_IOMMUOP_enable_modification] =
+ sizeof(struct xen_iommu_op_enable_modification),
+ [XEN_IOMMUOP_map] = sizeof(struct xen_iommu_op_map),
+ [XEN_IOMMUOP_unmap] = sizeof(struct xen_iommu_op_unmap),
};
size_t size;
int rc;
@@ -130,10 +349,12 @@ int do_one_iommu_op(xen_iommu_op_buf_t *buf)
if ( copy_from_guest_offset((void *)&op.u, buf->h, offset, size) )
return -EFAULT;
- iommu_op(&op);
+ iommu_op(&op, need_flush);
- if ( op.op == XEN_IOMMUOP_query_reserved &&
- __copy_field_to_guest(h, &op, u.query_reserved.nr_entries) )
+ if ( (op.op == XEN_IOMMUOP_query_reserved &&
+ __copy_field_to_guest(h, &op, u.query_reserved.nr_entries)) ||
+ (op.op == XEN_IOMMUOP_enable_modification &&
+ __copy_field_to_guest(h, &op, u.enable_modification.cap)) )
return -EFAULT;
if ( __copy_field_to_guest(h, &op, status) )
@@ -146,8 +367,11 @@ long do_iommu_op(unsigned int nr_bufs,
XEN_GUEST_HANDLE_PARAM(xen_iommu_op_buf_t) bufs)
{
unsigned int i;
+ bool need_flush = false;
long rc = 0;
+ this_cpu(iommu_dont_flush_iotlb) = 1;
+
for ( i = 0; i < nr_bufs; i++ )
{
xen_iommu_op_buf_t buf;
@@ -164,11 +388,13 @@ long do_iommu_op(unsigned int nr_bufs,
break;
}
- rc = do_one_iommu_op(&buf);
+ rc = do_one_iommu_op(&buf, &need_flush);
if ( rc )
break;
}
+ this_cpu(iommu_dont_flush_iotlb) = 0;
+
if ( rc > 0 )
{
ASSERT(rc < nr_bufs);
@@ -177,7 +403,8 @@ long do_iommu_op(unsigned int nr_bufs,
rc = hypercall_create_continuation(__HYPERVISOR_iommu_op,
"ih", nr_bufs, bufs);
- }
+ } else if ( !rc && need_flush )
+ rc = iommu_iotlb_flush_all(current->domain);
return rc;
}
@@ -186,7 +413,7 @@ long do_iommu_op(unsigned int nr_bufs,
CHECK_iommu_reserved_range;
-int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
+int compat_one_iommu_op(compat_iommu_op_buf_t *buf, bool *need_flush)
{
const COMPAT_HANDLE(compat_iommu_op_t) h =
compat_handle_cast(buf->h, compat_iommu_op_t);
@@ -195,6 +422,10 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
static const size_t op_size[] = {
[XEN_IOMMUOP_query_reserved] =
sizeof(struct compat_iommu_op_query_reserved),
+ [XEN_IOMMUOP_enable_modification] =
+ sizeof(struct compat_iommu_op_enable_modification),
+ [XEN_IOMMUOP_map] = sizeof(struct compat_iommu_op_map),
+ [XEN_IOMMUOP_unmap] = sizeof(struct compat_iommu_op_unmap),
};
size_t size;
xen_iommu_op_t nat;
@@ -228,9 +459,15 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
/*
* The xlat magic doesn't quite know how to handle the union so
- * we need to fix things up here.
+ * we need to fix things up here. Also, none of the sub-ops, apart from
+ * query_reserved, actually need any translation but the xlat magic
+ * can't deal with that either so all sub-ops must be marked for
+ * translation in xlat.lst.
*/
#define XLAT_iommu_op_u_query_reserved XEN_IOMMUOP_query_reserved
+#define XLAT_iommu_op_u_enable_modification XEN_IOMMUOP_enable_modification
+#define XLAT_iommu_op_u_map XEN_IOMMUOP_map
+#define XLAT_iommu_op_u_unmap XEN_IOMMUOP_unmap
u = cmp.op;
#define XLAT_iommu_op_query_reserved_HNDL_ranges(_d_, _s_) \
@@ -258,9 +495,12 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
XLAT_iommu_op(&nat, &cmp);
#undef XLAT_iommu_op_query_reserved_HNDL_ranges
+#undef XLAT_iommu_op_u_unmap
+#undef XLAT_iommu_op_u_map
+#undef XLAT_iommu_op_u_enable_modification
#undef XLAT_iommu_op_u_query_reserved
- iommu_op(&nat);
+ iommu_op(&nat, need_flush);
#define XLAT_iommu_op_query_reserved_HNDL_ranges(_d_, _s_) \
do \
@@ -282,7 +522,8 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
/*
* Avoid the full (and lengthy) XLAT code as the only things that
* need copying back are the reserved ranges (in the case of the
- * query op) and the status field (for all ops).
+ * query op), capabilities (in the case of the enable op) and the
+ * status field (for all ops).
*/
cmp.status = nat.status;
@@ -296,6 +537,13 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
if ( __copy_field_to_compat(h, &cmp, u.query_reserved.nr_entries) )
return -EFAULT;
}
+ else if ( cmp.op == XEN_IOMMUOP_enable_modification )
+ {
+ cmp.u.enable_modification.cap = nat.u.enable_modification.cap;
+
+ if ( __copy_field_to_compat(h, &cmp, u.enable_modification.cap) )
+ return -EFAULT;
+ }
#undef XLAT_iommu_op_query_reserved_HNDL_ranges
@@ -309,8 +557,11 @@ int compat_iommu_op(unsigned int nr_bufs,
XEN_GUEST_HANDLE_PARAM(compat_iommu_op_buf_t) bufs)
{
unsigned int i;
+ bool need_flush = false;
long rc = 0;
+ this_cpu(iommu_dont_flush_iotlb) = 1;
+
for ( i = 0; i < nr_bufs; i++ )
{
compat_iommu_op_buf_t buf;
@@ -327,11 +578,13 @@ int compat_iommu_op(unsigned int nr_bufs,
break;
}
- rc = compat_one_iommu_op(&buf);
+ rc = compat_one_iommu_op(&buf, &need_flush);
if ( rc )
break;
}
+ this_cpu(iommu_dont_flush_iotlb) = 0;
+
if ( rc > 0 )
{
ASSERT(rc < nr_bufs);
@@ -340,7 +593,8 @@ int compat_iommu_op(unsigned int nr_bufs,
rc = hypercall_create_continuation(__HYPERVISOR_iommu_op,
"ih", nr_bufs, bufs);
- }
+ } else if ( !rc && need_flush )
+ rc = iommu_iotlb_flush_all(current->domain);
return rc;
}
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index bc67cfe843..47c608cc89 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -184,6 +184,8 @@ int iommu_domain_init(struct domain *d)
if ( !hd->reserved_ranges )
return -ENOMEM;
+ spin_lock_init(&hd->lock);
+
hd->platform_ops = iommu_get_ops();
return hd->platform_ops->init(d);
}
diff --git a/xen/include/public/iommu_op.h b/xen/include/public/iommu_op.h
index 001f515bb3..fcca47e8d2 100644
--- a/xen/include/public/iommu_op.h
+++ b/xen/include/public/iommu_op.h
@@ -61,6 +61,101 @@ struct xen_iommu_op_query_reserved {
XEN_GUEST_HANDLE(xen_iommu_reserved_range_t) ranges;
};
+/*
+ * XEN_IOMMUOP_enable_modification: Enable operations that modify IOMMU
+ * mappings.
+ */
+#define XEN_IOMMUOP_enable_modification 2
+
+struct xen_iommu_op_enable_modification {
+ /*
+ * OUT - On successful return this is set to the bitwise OR of capabilities
+ * defined below. On entry this must be set to zero.
+ */
+ uint32_t cap;
+ uint32_t pad;
+
+ /* Does the implementation support per-device mappings? */
+#define _XEN_IOMMU_CAP_per_device_mappings 0
+#define XEN_IOMMU_CAP_per_device_mappings (1u <<
_XEN_IOMMU_CAP_per_device_mappings)
+};
+
+/*
+ * XEN_IOMMUOP_map: Map a guest page in the IOMMU.
+ */
+#define XEN_IOMMUOP_map 3
+
+struct xen_iommu_op_map {
+ /* IN - The domid of the guest */
+ domid_t domid;
+ /*
+ * IN - flags controlling the mapping. This should be a bitwise OR of the
+ * flags defined below.
+ */
+ uint16_t flags;
+
+ /*
+ * Should the mapping be created for all initiators?
+ *
+ * NOTE: This flag is currently required as the implementation does not yet
+ * support pre-device mappings.
+ */
+#define _XEN_IOMMUOP_map_all 0
+#define XEN_IOMMUOP_map_all (1 << (_XEN_IOMMUOP_map_all))
+
+ /* Should the mapping be read-only to the initiator(s)? */
+#define _XEN_IOMMUOP_map_readonly 1
+#define XEN_IOMMUOP_map_readonly (1 << (_XEN_IOMMUOP_map_readonly))
+
+ uint32_t pad;
+ /*
+ * IN - Segment/Bus/Device/Function of the initiator.
+ *
+ * NOTE: This is ignored if XEN_IOMMUOP_map_all is set.
+ */
+ uint64_t sbdf;
+ /* IN - The IOMMU frame number which will hold the new mapping */
+ xen_dfn_t dfn;
+ /* IN - The guest frame number of the page to be mapped */
+ xen_pfn_t gfn;
+};
+
+/*
+ * XEN_IOMMUOP_unmap_gfn: Remove a mapping in the IOMMU.
+ */
+#define XEN_IOMMUOP_unmap 4
+
+struct xen_iommu_op_unmap {
+ /* IN - The domid of the guest */
+ domid_t domid;
+ /*
+ * IN - flags controlling the unmapping. This should be a bitwise OR of the
+ * flags defined below.
+ */
+ uint16_t flags;
+
+ /*
+ * Should the mapping be destroyed for all initiators?
+ *
+ * NOTE: This flag is currently required as the implementation does not yet
+ * support pre-device mappings.
+ */
+#define _XEN_IOMMUOP_unmap_all 0
+#define XEN_IOMMUOP_unmap_all (1 << (_XEN_IOMMUOP_unmap_all))
+
+ uint32_t pad;
+ /*
+ * IN - Segment/Bus/Device/Function of the initiator.
+ *
+ * NOTE: This is ignored if XEN_IOMMUOP_unmap_all is set.
+ */
+ uint64_t sbdf;
+ /* IN - The IOMMU frame number which holds the mapping to be removed */
+ xen_dfn_t dfn;
+ /* IN - The guest frame number of the page that is mapped */
+ xen_pfn_t gfn;
+};
+
struct xen_iommu_op {
uint16_t op; /* op type */
uint16_t pad;
@@ -68,6 +163,9 @@ struct xen_iommu_op {
/* 0 for success otherwise, negative errno */
union {
struct xen_iommu_op_query_reserved query_reserved;
+ struct xen_iommu_op_enable_modification enable_modification;
+ struct xen_iommu_op_map map;
+ struct xen_iommu_op_unmap unmap;
} u;
};
typedef struct xen_iommu_op xen_iommu_op_t;
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index a56d03b719..a04c312aeb 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -143,6 +143,12 @@ struct domain_iommu {
* must not be modified after initialization.
*/
struct rangeset *reserved_ranges;
+
+ /*
+ * PV-IOMMU fields
+ */
+ bool domain_control;
+ spinlock_t lock;
};
#define dom_iommu(d) (&(d)->iommu)
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index d2f9b1034b..3f5b0ac004 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -79,7 +79,10 @@
? vcpu_hvm_x86_64 hvm/hvm_vcpu.h
! iommu_op iommu_op.h
! iommu_op_buf iommu_op.h
+! iommu_op_enable_modification iommu_op.h
+! iommu_op_map iommu_op.h
! iommu_op_query_reserved iommu_op.h
+! iommu_op_unmap iommu_op.h
? iommu_reserved_range iommu_op.h
? kexec_exec kexec.h
! kexec_image kexec.h
--
2.11.0
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |