Xen project Mailing List

Re: [Xen-devel] [PATCH RFC v2] x86/p2m: use large pages for MMIO mappings

On 09/22/2015 01:56 PM, Jan Beulich wrote: > When mapping large BARs (e.g. the frame buffer of a graphics card) the > overhead of establishing such mappings using only 4k pages has, > particularly after the XSA-125 fix, become unacceptable. Alter the > XEN_DOMCTL_memory_mapping semantics once again, so that there's no > longer a fixed amount of guest frames that represents the upper limit > of what a single invocation can map. Instead bound execution time by > limiting the number of iterations (regardless of page size). FWIW most of the code looks OK to me. The final version should probably mention somewhere exactly what the changed semantics are -- specifically, that returning >0 means, "I mapped this many, keep mapping the rest". > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- > RFC reasons: > - ARM side unimplemented (and hence libxc for now made cope with both > models), the main issue (besides my inability to test any change > there) being the many internal uses of map_mmio_regions()) > - error unmapping in map_mmio_regions() and error propagation to caller > from unmap_mmio_regions() are not really satisfactory (for the latter > a possible model might be to have the function - and hence the > domctl - return the [non-zero] number of completed entries upon > error, requiring the caller to re-invoke the hypercall to then obtain > the actual error for the failed slot) > - iommu_{,un}map_page() interfaces don't support "order" (hence > mmio_order() for now returns zero when !iommu_hap_pt_share, which in > particular means the AMD side isn't being take care of just yet) > --- > v2: Produce valid entries for large p2m_mmio_direct mappings in > p2m_pt_set_entry(). Don't open code iommu_use_hap_pt() in > mmio_order(). Update function comment of set_typed_p2m_entry() and > clear_mmio_p2m_entry(). Use PRI_mfn. Add ASSERT()s to > {,un}map_mmio_regions() to detect otherwise endless loops. > --- > TODO: The p2m_pt_set_entry() change in v2 points out an apparent > inconsistency with PoD handling: 2M mappings get valid entries > created, while 4k mappings don't. It would seem to me that the > 4k case needs changing. > > --- a/tools/libxc/xc_domain.c > +++ b/tools/libxc/xc_domain.c > @@ -2215,7 +2215,7 @@ int xc_domain_memory_mapping( > { > DECLARE_DOMCTL; > xc_dominfo_t info; > - int ret = 0, err; > + int ret = 0, rc; > unsigned long done = 0, nr, max_batch_sz; > > if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 || > @@ -2240,19 +2240,24 @@ int xc_domain_memory_mapping( > domctl.u.memory_mapping.nr_mfns = nr; > domctl.u.memory_mapping.first_gfn = first_gfn + done; > domctl.u.memory_mapping.first_mfn = first_mfn + done; > - err = do_domctl(xch, &domctl); > - if ( err && errno == E2BIG ) > + rc = do_domctl(xch, &domctl); > + if ( rc < 0 && errno == E2BIG ) > { > if ( max_batch_sz <= 1 ) > break; > max_batch_sz >>= 1; > continue; > } > + if ( rc > 0 ) > + { > + done += rc; > + continue; > + } > /* Save the first error... */ > if ( !ret ) > - ret = err; > + ret = rc; > /* .. and ignore the rest of them when removing. */ > - if ( err && add_mapping != DPCI_REMOVE_MAPPING ) > + if ( rc && add_mapping != DPCI_REMOVE_MAPPING ) > break; Would it make more sense to structure this something like this: --- rc = do_domctl() if( rc < 0 ) { if ( errno == E2BIG ) { blah; continue; } /* Other error stuff */ } else if ( rc > 0 ) nr = rc; done += nr; --- > > done += nr; > --- a/xen/arch/x86/domain_build.c > +++ b/xen/arch/x86/domain_build.c > @@ -436,7 +436,7 @@ static __init void pvh_add_mem_mapping(s > else > a = p2m_access_rwx; > > - if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) ) > + if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), 0, a)) ) > panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n", > gfn, mfn, i, rc); > if ( !(i & 0xfffff) ) > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -2396,7 +2396,8 @@ static int vmx_alloc_vlapic_mapping(stru > share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable); > d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va); > set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), > - _mfn(virt_to_mfn(apic_va)), p2m_get_hostp2m(d)->default_access); > + _mfn(virt_to_mfn(apic_va)), 0, > + p2m_get_hostp2m(d)->default_access); > > return 0; > } > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -896,41 +896,55 @@ void p2m_change_type_range(struct domain > p2m_unlock(p2m); > } > > -/* Returns: 0 for success, -errno for failure */ > +/* > + * Returns: > + * 0 for success > + * -errno for failure > + * order+1 for caller to retry with order (guaranteed smaller than > + * the order value passed in) > + */ > static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t > mfn, > - p2m_type_t gfn_p2mt, p2m_access_t access) > + unsigned int order, p2m_type_t gfn_p2mt, > + p2m_access_t access) > { > int rc = 0; > p2m_access_t a; > p2m_type_t ot; > mfn_t omfn; > + unsigned int cur_order = 0; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > if ( !paging_mode_translate(d) ) > return -EIO; > > - gfn_lock(p2m, gfn, 0); > - omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL); > + gfn_lock(p2m, gfn, order); > + omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL); > + if ( cur_order < order ) > + { > + gfn_unlock(p2m, gfn, order); > + return cur_order + 1; > + } > if ( p2m_is_grant(ot) || p2m_is_foreign(ot) ) > { > - gfn_unlock(p2m, gfn, 0); > + gfn_unlock(p2m, gfn, order); > domain_crash(d); > return -ENOENT; > } > else if ( p2m_is_ram(ot) ) > { > + unsigned long i; > + > ASSERT(mfn_valid(omfn)); > - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); > + for ( i = 0; i < (1UL << order); ++i ) > + set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY); > } > > P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn)); > - rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt, > - access); > - gfn_unlock(p2m, gfn, 0); > + rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access); > + gfn_unlock(p2m, gfn, order); > if ( rc ) > - gdprintk(XENLOG_ERR, > - "p2m_set_entry failed! mfn=%08lx rc:%d\n", > - mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc); > + gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n", > + gfn, order, rc, mfn_x(mfn)); > return rc; > } > > @@ -938,14 +952,14 @@ static int set_typed_p2m_entry(struct do > static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, > mfn_t mfn) > { > - return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign, > + return set_typed_p2m_entry(d, gfn, mfn, 0, p2m_map_foreign, > p2m_get_hostp2m(d)->default_access); > } > > int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, > - p2m_access_t access) > + unsigned int order, p2m_access_t access) > { > - return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct, access); > + return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access); > } > > int set_identity_p2m_entry(struct domain *d, unsigned long gfn, > @@ -989,20 +1003,33 @@ int set_identity_p2m_entry(struct domain > return ret; > } > > -/* Returns: 0 for success, -errno for failure */ > -int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) > +/* > + * Returns: > + * 0 for success > + * -errno for failure > + * order+1 for caller to retry with order (guaranteed smaller than > + * the order value passed in) > + */ > +int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, > + unsigned int order) > { > int rc = -EINVAL; > mfn_t actual_mfn; > p2m_access_t a; > p2m_type_t t; > + unsigned int cur_order = 0; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > if ( !paging_mode_translate(d) ) > return -EIO; > > - gfn_lock(p2m, gfn, 0); > - actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, NULL, NULL); > + gfn_lock(p2m, gfn, order); > + actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL); > + if ( cur_order < order ) > + { > + rc = cur_order + 1; > + goto out; > + } > > /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */ > if ( (INVALID_MFN == mfn_x(actual_mfn)) || (t != p2m_mmio_direct) ) > @@ -1015,11 +1042,11 @@ int clear_mmio_p2m_entry(struct domain * > gdprintk(XENLOG_WARNING, > "no mapping between mfn %08lx and gfn %08lx\n", > mfn_x(mfn), gfn); > - rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, > p2m_invalid, > + rc = p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), order, p2m_invalid, > p2m->default_access); > > out: > - gfn_unlock(p2m, gfn, 0); > + gfn_unlock(p2m, gfn, order); > > return rc; > } > @@ -2038,6 +2065,25 @@ unsigned long paging_gva_to_gfn(struct v > return hostmode->gva_to_gfn(v, hostp2m, va, pfec); > } > > +static unsigned int mmio_order(const struct domain *d, > + unsigned long start_fn, unsigned long nr) > +{ > + if ( !need_iommu(d) || !iommu_use_hap_pt(d) || > + (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) > ) > + return 0; > + > + if ( !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) > && > + opt_hap_1gb && hvm_hap_has_1gb(d) ) > + return PAGE_ORDER_1G; > + > + if ( opt_hap_2mb && hvm_hap_has_2mb(d) ) > + return PAGE_ORDER_2M; > + > + return 0; > +} > + > +#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */ > + > int map_mmio_regions(struct domain *d, > unsigned long start_gfn, > unsigned long nr, > @@ -2045,22 +2091,47 @@ int map_mmio_regions(struct domain *d, > { > int ret = 0; > unsigned long i; > + unsigned int iter, order; > > if ( !paging_mode_translate(d) ) > return 0; > > - for ( i = 0; !ret && i < nr; i++ ) > + for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER; > + i += 1UL << order, ++iter ) > { > - ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i), > - p2m_get_hostp2m(d)->default_access); > - if ( ret ) > + for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ; > + order = ret - 1 ) I think you need a comment explaining what this (start_gfn + 1 ) | (mfn + i) stuff is about; maybe something like: /* OR'ing the gfn and mfn values will return an order suitable to both */ (I assume that's what's going on here?) No comments otherwise. -George _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.