|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Ping: [PATCH v3 1/2] IOMMU/x86: disallow device assignment to PoD guests
On 04.01.2022 10:41, Jan Beulich wrote:
> While it is okay for IOMMU page tables to get set up for guests starting
> in PoD mode, actual device assignment may only occur once all PoD
> entries have been removed from the P2M. So far this was enforced only
> for boot-time assignment, and only in the tool stack.
>
> Also use the new function to replace p2m_pod_entry_count(): Its unlocked
> access to p2m->pod.entry_count wasn't really okay (irrespective of the
> result being stale by the time the caller gets to see it).
>
> To allow the tool stack to see a consistent snapshot of PoD state, move
> the tail of XENMEM_{get,set}_pod_target handling into a function, adding
> proper locking there.
>
> In libxl take the liberty to use the new local variable r also for a
> pre-existing call into libxc.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Ping? (Anthony, I realize you weren't Cc-ed on the original submission.)
Jan
> ---
> If p2m->pod.entry_count == p2m->pod.count it is in principle possible to
> permit device assignment by actively resolving all remaining PoD entries.
>
> Initially I thought this was introduced by f89f555827a6 ("remove late
> (on-demand) construction of IOMMU page tables"), but without
> arch_iommu_use_permitted() checking for PoD I think the issue has been
> there before that.
> ---
> v3: In p2m_pod_set_mem_target() move check down.
> v2: New.
>
> --- a/tools/libs/light/libxl_pci.c
> +++ b/tools/libs/light/libxl_pci.c
> @@ -1619,8 +1619,13 @@ void libxl__device_pci_add(libxl__egc *e
> pas->callback = device_pci_add_stubdom_done;
>
> if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
> - rc = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci));
> - if (rc) {
> + int r;
> + uint64_t cache, ents;
> +
> + rc = ERROR_FAIL;
> +
> + r = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci));
> + if (r) {
> LOGD(ERROR, domid,
> "PCI device %04x:%02x:%02x.%u %s?",
> pci->domain, pci->bus, pci->dev, pci->func,
> @@ -1628,6 +1633,22 @@ void libxl__device_pci_add(libxl__egc *e
> : "already assigned to a different guest");
> goto out;
> }
> +
> + r = xc_domain_get_pod_target(ctx->xch, domid, NULL, &cache, &ents);
> + if (r) {
> + LOGED(ERROR, domid, "Cannot determine PoD status");
> + goto out;
> + }
> + /*
> + * In principle it is sufficient for the domain to have ballooned
> down
> + * enough such that ents <= cache. But any remaining entries would
> + * need resolving first. Until such time when this gets effected,
> + * refuse assignment as long as any entries are left.
> + */
> + if (ents /* > cache */) {
> + LOGD(ERROR, domid, "Cannot assign device with PoD still active");
> + goto out;
> + }
> }
>
> rc = libxl__device_pci_setdefault(gc, domid, pci, !starting);
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -20,6 +20,7 @@
> */
>
> #include <xen/event.h>
> +#include <xen/iocap.h>
> #include <xen/ioreq.h>
> #include <xen/mm.h>
> #include <xen/sched.h>
> @@ -359,7 +360,10 @@ p2m_pod_set_mem_target(struct domain *d,
>
> ASSERT( pod_target >= p2m->pod.count );
>
> - ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
> + if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
> + ret = -ENOTEMPTY;
> + else
> + ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
>
> out:
> pod_unlock(p2m);
> @@ -367,6 +371,23 @@ out:
> return ret;
> }
>
> +void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target)
> +{
> + struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +
> + ASSERT(is_hvm_domain(d));
> +
> + pod_lock(p2m);
> + lock_page_alloc(p2m);
> +
> + target->tot_pages = domain_tot_pages(d);
> + target->pod_cache_pages = p2m->pod.count;
> + target->pod_entries = p2m->pod.entry_count;
> +
> + unlock_page_alloc(p2m);
> + pod_unlock(p2m);
> +}
> +
> int p2m_pod_empty_cache(struct domain *d)
> {
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> @@ -1384,6 +1405,9 @@ guest_physmap_mark_populate_on_demand(st
> if ( !paging_mode_translate(d) )
> return -EINVAL;
>
> + if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
> + return -ENOTEMPTY;
> +
> do {
> rc = mark_populate_on_demand(d, gfn, chunk_order);
>
> @@ -1405,3 +1429,20 @@ void p2m_pod_init(struct p2m_domain *p2m
> for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
> p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
> }
> +
> +bool p2m_pod_active(const struct domain *d)
> +{
> + struct p2m_domain *p2m;
> + bool res;
> +
> + if ( !is_hvm_domain(d) )
> + return false;
> +
> + p2m = p2m_get_hostp2m(d);
> +
> + pod_lock(p2m);
> + res = p2m->pod.entry_count | p2m->pod.count;
> + pod_unlock(p2m);
> +
> + return res;
> +}
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -4778,7 +4778,6 @@ long arch_memory_op(unsigned long cmd, X
> {
> xen_pod_target_t target;
> struct domain *d;
> - struct p2m_domain *p2m;
>
> if ( copy_from_guest(&target, arg, 1) )
> return -EFAULT;
> @@ -4787,7 +4786,9 @@ long arch_memory_op(unsigned long cmd, X
> if ( d == NULL )
> return -ESRCH;
>
> - if ( cmd == XENMEM_set_pod_target )
> + if ( !is_hvm_domain(d) )
> + rc = -EINVAL;
> + else if ( cmd == XENMEM_set_pod_target )
> rc = xsm_set_pod_target(XSM_PRIV, d);
> else
> rc = xsm_get_pod_target(XSM_PRIV, d);
> @@ -4813,10 +4814,7 @@ long arch_memory_op(unsigned long cmd, X
> }
> else if ( rc >= 0 )
> {
> - p2m = p2m_get_hostp2m(d);
> - target.tot_pages = domain_tot_pages(d);
> - target.pod_cache_pages = p2m->pod.count;
> - target.pod_entries = p2m->pod.entry_count;
> + p2m_pod_get_mem_target(d, &target);
>
> if ( __copy_to_guest(arg, &target, 1) )
> {
> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -639,7 +639,7 @@ int vm_event_domctl(struct domain *d, st
>
> rc = -EXDEV;
> /* Disallow paging in a PoD guest */
> - if ( p2m_pod_entry_count(p2m_get_hostp2m(d)) )
> + if ( p2m_pod_active(d) )
> break;
>
> /* domain_pause() not required here, see XSA-99 */
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -454,11 +454,12 @@ bool arch_iommu_use_permitted(const stru
> {
> /*
> * Prevent device assign if mem paging, mem sharing or log-dirty
> - * have been enabled for this domain.
> + * have been enabled for this domain, or if PoD is still in active use.
> */
> return d == dom_io ||
> (likely(!mem_sharing_enabled(d)) &&
> likely(!mem_paging_enabled(d)) &&
> + likely(!p2m_pod_active(d)) &&
> likely(!p2m_get_hostp2m(d)->global_logdirty));
> }
>
> --- a/xen/arch/x86/include/asm/p2m.h
> +++ b/xen/arch/x86/include/asm/p2m.h
> @@ -661,6 +661,12 @@ int p2m_pod_empty_cache(struct domain *d
> * domain matches target */
> int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
>
> +/* Obtain a consistent snapshot of PoD related domain state. */
> +void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t
> *target);
> +
> +/* Check whether PoD is (still) active in a domain. */
> +bool p2m_pod_active(const struct domain *d);
> +
> /* Scan pod cache when offline/broken page triggered */
> int
> p2m_pod_offline_or_broken_hit(struct page_info *p);
> @@ -669,11 +675,6 @@ p2m_pod_offline_or_broken_hit(struct pag
> void
> p2m_pod_offline_or_broken_replace(struct page_info *p);
>
> -static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
> -{
> - return p2m->pod.entry_count;
> -}
> -
> void p2m_pod_init(struct p2m_domain *p2m);
>
> #else
> @@ -689,6 +690,11 @@ static inline int p2m_pod_empty_cache(st
> return 0;
> }
>
> +static inline bool p2m_pod_active(const struct domain *d)
> +{
> + return false;
> +}
> +
> static inline int p2m_pod_offline_or_broken_hit(struct page_info *p)
> {
> return 0;
> @@ -699,11 +705,6 @@ static inline void p2m_pod_offline_or_br
> ASSERT_UNREACHABLE();
> }
>
> -static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
> -{
> - return 0;
> -}
> -
> static inline void p2m_pod_init(struct p2m_domain *p2m) {}
>
> #endif
>
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |