|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v7 6/7] x86: add xen_iommu_ops to modify IOMMU mappings
>>> On 15.10.18 at 12:35, <paul.durrant@xxxxxxxxxx> wrote:
> --- a/xen/common/iommu_op.c
> +++ b/xen/common/iommu_op.c
> @@ -78,7 +78,205 @@ static int iommu_op_query_reserved(struct
> xen_iommu_op_query_reserved *op)
> return 0;
> }
>
> -static void iommu_op(xen_iommu_op_t *op)
> +static int iommu_op_enable_modification(
> + struct xen_iommu_op_enable_modification *op)
> +{
> + struct domain *currd = current->domain;
> + struct domain_iommu *iommu = dom_iommu(currd);
> + const struct iommu_ops *ops = iommu->platform_ops;
> + int rc;
> +
> + if ( op->cap || op->pad )
> + return -EINVAL;
> +
> + spin_lock(&iommu->lock);
> +
> + /* Has modification already been enabled? */
> + rc = 0;
> + if ( iommu->domain_control )
> + goto unlock;
> +
> + /*
> + * Modificaton of IOMMU mappings cannot be put under domain control if:
> + * - this domain does not have IOMMU page tables, or
> + * - HAP is enabled for this domain and the IOMMU shares the tables.
> + */
> + rc = -EACCES;
> + if ( !has_iommu_pt(currd) || iommu_use_hap_pt(currd) )
> + goto unlock;
I understand the latter restriction, but why the former? Wouldn't
it be reasonable for a domain to request self management at boot
time even if a first pass-through device was assigned only at a
later point in time?
> +static int iommuop_map(struct xen_iommu_op_map *op)
const?
> +{
> + struct domain *d, *currd = current->domain;
> + struct domain_iommu *iommu = dom_iommu(currd);
> + bool readonly = op->flags & XEN_IOMMUOP_map_readonly;
> + dfn_t dfn = _dfn(op->dfn);
> + p2m_type_t p2mt;
> + struct page_info *page;
> + mfn_t ignore;
> + unsigned int flags;
> + int rc;
> +
> + if ( op->pad || (op->flags & ~(XEN_IOMMUOP_map_all |
> + XEN_IOMMUOP_map_readonly)) )
> + return -EINVAL;
> +
> + if ( !iommu->domain_control )
> + return -EOPNOTSUPP;
-EACCES or -EPERM?
> + /* Per-device mapping not yet supported */
> + if ( !(op->flags & XEN_IOMMUOP_map_all) )
> + return -EINVAL;
> +
> + /* Check whether the specified DFN falls in a reserved region */
> + if ( rangeset_contains_singleton(iommu->reserved_ranges, dfn_x(dfn)) )
> + return -EINVAL;
> +
> + d = rcu_lock_domain_by_any_id(op->domid);
> + if ( !d )
> + return -ESRCH;
> +
> + rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
> + if ( rc )
> + goto unlock_domain;
> +
> + rc = -EINVAL;
> + if ( p2mt != p2m_ram_rw ||
> + (!readonly && !get_page_type(page, PGT_writable_page)) )
Why would r/o mappings of p2m_ram_ro not be permitted?
> + {
> + put_page(page);
> + goto unlock_domain;
> + }
> +
> + spin_lock(&iommu->lock);
> +
> + rc = iommu_lookup_page(currd, dfn, &ignore, &flags);
> +
> + /* Treat a non-reference-counted entry as non-existent */
> + if ( !rc )
> + rc = !(flags & IOMMUF_refcount) ? -ENOENT : -EEXIST;
> +
> + if ( rc != -ENOENT )
> + goto unlock_iommu;
Isn't the combination of the two if()s the wrong way round, allowing
non-ref-counted entries to get replaced? I.e. don't you need to
swap the two latter operands of the conditional expression above?
The comment also looks to be somewhat off: Aiui you mean to
permit mapping into holes (-ENOENT), i.e. what you call "non-
existent" above. But maybe I'm confused ...
> +static int iommuop_unmap(struct xen_iommu_op_unmap *op)
> +{
> + struct domain *d, *currd = current->domain;
> + struct domain_iommu *iommu = dom_iommu(currd);
> + dfn_t dfn = _dfn(op->dfn);
> + mfn_t mfn;
> + unsigned int flags;
> + bool readonly;
> + p2m_type_t p2mt;
> + struct page_info *page;
> + int rc;
> +
> + if ( op->pad ||
> + (op->flags & ~XEN_IOMMUOP_unmap_all) )
> + return -EINVAL;
> +
> + if ( !iommu->domain_control )
> + return -EOPNOTSUPP;
> +
> + /* Per-device unmapping not yet supported */
> + if ( !(op->flags & XEN_IOMMUOP_unmap_all) )
> + return -EINVAL;
> +
> + d = rcu_lock_domain_by_any_id(op->domid);
> + if ( !d )
> + return -ESRCH;
> +
> + spin_lock(&iommu->lock);
> +
> + rc = iommu_lookup_page(currd, dfn, &mfn, &flags);
> +
> + /* Treat a non-reference-counted entry as non-existent */
> + if ( !rc )
> + rc = !(flags & IOMMUF_refcount) ? -ENOENT : 0;
> +
> + if ( rc )
> + goto unlock;
> +
> + readonly = !(flags & IOMMUF_writable);
> +
> + /* Make sure the mapped frame matches */
> + rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
I'm not convinced that P2M_UNSHARE is really intended or necessary
for unmaps of writable mappings. With this there's then little point
having the local variable.
> + if ( rc )
> + goto unlock;
> +
> + rc = !mfn_eq(mfn, page_to_mfn(page)) ? -EINVAL : 0;
> +
> + /* Release reference taken above */
> + put_page(page);
> +
> + if ( rc )
> + goto unlock;
> +
> + /* Release references taken in map */
> + if ( !readonly )
> + put_page_type(page);
> + put_page(page);
You may not drop these references before ...
> + /*
> + * This really should not fail. If it does, there is an implicit
> + * domain_crash() (except in the case of the hardware domain) since
> + * there is not a lot else that can be done to ensure the released
> + * page can be safely re-used.
> + */
> + rc = iommu_unmap_page(currd, dfn);
... having completed the unmap _and_ the associated flush.
> @@ -86,13 +284,30 @@ static void iommu_op(xen_iommu_op_t *op)
> op->status = iommu_op_query_reserved(&op->u.query_reserved);
> break;
>
> + case XEN_IOMMUOP_enable_modification:
> + op->status =
> + iommu_op_enable_modification(&op->u.enable_modification);
> + break;
> +
> + case XEN_IOMMUOP_map:
> + op->status = iommuop_map(&op->u.map);
> + if ( !op->status )
> + *need_flush = true;
Aren't you going quite a bit too far here? Replacing a not-present
entry with another one should, in the common case, not require
a flush. There may be a hardware dependency here (different
IOMMU kinds may behave differently).
> @@ -177,7 +403,8 @@ long do_iommu_op(unsigned int nr_bufs,
>
> rc = hypercall_create_continuation(__HYPERVISOR_iommu_op,
> "ih", nr_bufs, bufs);
> - }
> + } else if ( !rc && need_flush )
> + rc = iommu_iotlb_flush_all(current->domain);
Seems rather heavy a flush for perhaps just a single page map/unmap.
> @@ -296,6 +537,13 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
> if ( __copy_field_to_compat(h, &cmp, u.query_reserved.nr_entries) )
> return -EFAULT;
> }
> + else if ( cmp.op == XEN_IOMMUOP_enable_modification )
Convert to switch()?
> --- a/xen/include/public/iommu_op.h
> +++ b/xen/include/public/iommu_op.h
> @@ -61,6 +61,101 @@ struct xen_iommu_op_query_reserved {
> XEN_GUEST_HANDLE(xen_iommu_reserved_range_t) ranges;
> };
>
> +/*
> + * XEN_IOMMUOP_enable_modification: Enable operations that modify IOMMU
> + * mappings.
> + */
> +#define XEN_IOMMUOP_enable_modification 2
> +
> +struct xen_iommu_op_enable_modification {
> + /*
> + * OUT - On successful return this is set to the bitwise OR of
> capabilities
> + * defined below. On entry this must be set to zero.
> + */
> + uint32_t cap;
> + uint32_t pad;
What's the point of this padding field?
> +struct xen_iommu_op_map {
> + /* IN - The domid of the guest */
> + domid_t domid;
> + /*
> + * IN - flags controlling the mapping. This should be a bitwise OR of the
> + * flags defined below.
> + */
> + uint16_t flags;
> +
> + /*
> + * Should the mapping be created for all initiators?
> + *
> + * NOTE: This flag is currently required as the implementation does not
> yet
> + * support pre-device mappings.
> + */
> +#define _XEN_IOMMUOP_map_all 0
> +#define XEN_IOMMUOP_map_all (1 << (_XEN_IOMMUOP_map_all))
> +
> + /* Should the mapping be read-only to the initiator(s)? */
> +#define _XEN_IOMMUOP_map_readonly 1
> +#define XEN_IOMMUOP_map_readonly (1 << (_XEN_IOMMUOP_map_readonly))
> +
> + uint32_t pad;
> + /*
> + * IN - Segment/Bus/Device/Function of the initiator.
> + *
> + * NOTE: This is ignored if XEN_IOMMUOP_map_all is set.
> + */
> + uint64_t sbdf;
Does this need to be 64 bits wide? Iirc segment numbers are 16 bit,
bus ones 8 bit, devices ones 5, and function ones 3. Sums up to 32.
Another question is whether, for portability's sake, this shouldn't
be a union with a PCI sub-structure. In this case for PCI 32 bits
would suffice, but reserving 64 bits for the general case would
perhaps indeed be a good idea.
> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -79,7 +79,10 @@
> ? vcpu_hvm_x86_64 hvm/hvm_vcpu.h
> ! iommu_op iommu_op.h
> ! iommu_op_buf iommu_op.h
> +! iommu_op_enable_modification iommu_op.h
Shouldn't this be ? instead?
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |