[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Ping: [PATCH v3 1/2] IOMMU/x86: disallow device assignment to PoD guests
On 04.01.2022 10:41, Jan Beulich wrote: > While it is okay for IOMMU page tables to get set up for guests starting > in PoD mode, actual device assignment may only occur once all PoD > entries have been removed from the P2M. So far this was enforced only > for boot-time assignment, and only in the tool stack. > > Also use the new function to replace p2m_pod_entry_count(): Its unlocked > access to p2m->pod.entry_count wasn't really okay (irrespective of the > result being stale by the time the caller gets to see it). > > To allow the tool stack to see a consistent snapshot of PoD state, move > the tail of XENMEM_{get,set}_pod_target handling into a function, adding > proper locking there. > > In libxl take the liberty to use the new local variable r also for a > pre-existing call into libxc. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Ping? (Anthony, I realize you weren't Cc-ed on the original submission.) Jan > --- > If p2m->pod.entry_count == p2m->pod.count it is in principle possible to > permit device assignment by actively resolving all remaining PoD entries. > > Initially I thought this was introduced by f89f555827a6 ("remove late > (on-demand) construction of IOMMU page tables"), but without > arch_iommu_use_permitted() checking for PoD I think the issue has been > there before that. > --- > v3: In p2m_pod_set_mem_target() move check down. > v2: New. > > --- a/tools/libs/light/libxl_pci.c > +++ b/tools/libs/light/libxl_pci.c > @@ -1619,8 +1619,13 @@ void libxl__device_pci_add(libxl__egc *e > pas->callback = device_pci_add_stubdom_done; > > if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) { > - rc = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci)); > - if (rc) { > + int r; > + uint64_t cache, ents; > + > + rc = ERROR_FAIL; > + > + r = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci)); > + if (r) { > LOGD(ERROR, domid, > "PCI device %04x:%02x:%02x.%u %s?", > pci->domain, pci->bus, pci->dev, pci->func, > @@ -1628,6 +1633,22 @@ void libxl__device_pci_add(libxl__egc *e > : "already assigned to a different guest"); > goto out; > } > + > + r = xc_domain_get_pod_target(ctx->xch, domid, NULL, &cache, &ents); > + if (r) { > + LOGED(ERROR, domid, "Cannot determine PoD status"); > + goto out; > + } > + /* > + * In principle it is sufficient for the domain to have ballooned > down > + * enough such that ents <= cache. But any remaining entries would > + * need resolving first. Until such time when this gets effected, > + * refuse assignment as long as any entries are left. > + */ > + if (ents /* > cache */) { > + LOGD(ERROR, domid, "Cannot assign device with PoD still active"); > + goto out; > + } > } > > rc = libxl__device_pci_setdefault(gc, domid, pci, !starting); > --- a/xen/arch/x86/mm/p2m-pod.c > +++ b/xen/arch/x86/mm/p2m-pod.c > @@ -20,6 +20,7 @@ > */ > > #include <xen/event.h> > +#include <xen/iocap.h> > #include <xen/ioreq.h> > #include <xen/mm.h> > #include <xen/sched.h> > @@ -359,7 +360,10 @@ p2m_pod_set_mem_target(struct domain *d, > > ASSERT( pod_target >= p2m->pod.count ); > > - ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/); > + if ( has_arch_pdevs(d) || cache_flush_permitted(d) ) > + ret = -ENOTEMPTY; > + else > + ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/); > > out: > pod_unlock(p2m); > @@ -367,6 +371,23 @@ out: > return ret; > } > > +void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target) > +{ > + struct p2m_domain *p2m = p2m_get_hostp2m(d); > + > + ASSERT(is_hvm_domain(d)); > + > + pod_lock(p2m); > + lock_page_alloc(p2m); > + > + target->tot_pages = domain_tot_pages(d); > + target->pod_cache_pages = p2m->pod.count; > + target->pod_entries = p2m->pod.entry_count; > + > + unlock_page_alloc(p2m); > + pod_unlock(p2m); > +} > + > int p2m_pod_empty_cache(struct domain *d) > { > struct p2m_domain *p2m = p2m_get_hostp2m(d); > @@ -1384,6 +1405,9 @@ guest_physmap_mark_populate_on_demand(st > if ( !paging_mode_translate(d) ) > return -EINVAL; > > + if ( has_arch_pdevs(d) || cache_flush_permitted(d) ) > + return -ENOTEMPTY; > + > do { > rc = mark_populate_on_demand(d, gfn, chunk_order); > > @@ -1405,3 +1429,20 @@ void p2m_pod_init(struct p2m_domain *p2m > for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i ) > p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN); > } > + > +bool p2m_pod_active(const struct domain *d) > +{ > + struct p2m_domain *p2m; > + bool res; > + > + if ( !is_hvm_domain(d) ) > + return false; > + > + p2m = p2m_get_hostp2m(d); > + > + pod_lock(p2m); > + res = p2m->pod.entry_count | p2m->pod.count; > + pod_unlock(p2m); > + > + return res; > +} > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -4778,7 +4778,6 @@ long arch_memory_op(unsigned long cmd, X > { > xen_pod_target_t target; > struct domain *d; > - struct p2m_domain *p2m; > > if ( copy_from_guest(&target, arg, 1) ) > return -EFAULT; > @@ -4787,7 +4786,9 @@ long arch_memory_op(unsigned long cmd, X > if ( d == NULL ) > return -ESRCH; > > - if ( cmd == XENMEM_set_pod_target ) > + if ( !is_hvm_domain(d) ) > + rc = -EINVAL; > + else if ( cmd == XENMEM_set_pod_target ) > rc = xsm_set_pod_target(XSM_PRIV, d); > else > rc = xsm_get_pod_target(XSM_PRIV, d); > @@ -4813,10 +4814,7 @@ long arch_memory_op(unsigned long cmd, X > } > else if ( rc >= 0 ) > { > - p2m = p2m_get_hostp2m(d); > - target.tot_pages = domain_tot_pages(d); > - target.pod_cache_pages = p2m->pod.count; > - target.pod_entries = p2m->pod.entry_count; > + p2m_pod_get_mem_target(d, &target); > > if ( __copy_to_guest(arg, &target, 1) ) > { > --- a/xen/common/vm_event.c > +++ b/xen/common/vm_event.c > @@ -639,7 +639,7 @@ int vm_event_domctl(struct domain *d, st > > rc = -EXDEV; > /* Disallow paging in a PoD guest */ > - if ( p2m_pod_entry_count(p2m_get_hostp2m(d)) ) > + if ( p2m_pod_active(d) ) > break; > > /* domain_pause() not required here, see XSA-99 */ > --- a/xen/drivers/passthrough/x86/iommu.c > +++ b/xen/drivers/passthrough/x86/iommu.c > @@ -454,11 +454,12 @@ bool arch_iommu_use_permitted(const stru > { > /* > * Prevent device assign if mem paging, mem sharing or log-dirty > - * have been enabled for this domain. > + * have been enabled for this domain, or if PoD is still in active use. > */ > return d == dom_io || > (likely(!mem_sharing_enabled(d)) && > likely(!mem_paging_enabled(d)) && > + likely(!p2m_pod_active(d)) && > likely(!p2m_get_hostp2m(d)->global_logdirty)); > } > > --- a/xen/arch/x86/include/asm/p2m.h > +++ b/xen/arch/x86/include/asm/p2m.h > @@ -661,6 +661,12 @@ int p2m_pod_empty_cache(struct domain *d > * domain matches target */ > int p2m_pod_set_mem_target(struct domain *d, unsigned long target); > > +/* Obtain a consistent snapshot of PoD related domain state. */ > +void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t > *target); > + > +/* Check whether PoD is (still) active in a domain. */ > +bool p2m_pod_active(const struct domain *d); > + > /* Scan pod cache when offline/broken page triggered */ > int > p2m_pod_offline_or_broken_hit(struct page_info *p); > @@ -669,11 +675,6 @@ p2m_pod_offline_or_broken_hit(struct pag > void > p2m_pod_offline_or_broken_replace(struct page_info *p); > > -static inline long p2m_pod_entry_count(const struct p2m_domain *p2m) > -{ > - return p2m->pod.entry_count; > -} > - > void p2m_pod_init(struct p2m_domain *p2m); > > #else > @@ -689,6 +690,11 @@ static inline int p2m_pod_empty_cache(st > return 0; > } > > +static inline bool p2m_pod_active(const struct domain *d) > +{ > + return false; > +} > + > static inline int p2m_pod_offline_or_broken_hit(struct page_info *p) > { > return 0; > @@ -699,11 +705,6 @@ static inline void p2m_pod_offline_or_br > ASSERT_UNREACHABLE(); > } > > -static inline long p2m_pod_entry_count(const struct p2m_domain *p2m) > -{ > - return 0; > -} > - > static inline void p2m_pod_init(struct p2m_domain *p2m) {} > > #endif > >
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |