[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] IOMMU/x86: perform PV Dom0 mappings in batches
commit c1e1564c8995d8a08891bd9313e4289bbe4662b4 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Mon Jul 25 15:32:59 2022 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Mon Jul 25 15:32:59 2022 +0200 IOMMU/x86: perform PV Dom0 mappings in batches For large page mappings to be easily usable (i.e. in particular without un-shattering of smaller page mappings) and for mapping operations to then also be more efficient, pass batches of Dom0 memory to iommu_map(). In dom0_construct_pv() and its helpers (covering strict mode) this additionally requires establishing the type of those pages (albeit with zero type references). The earlier establishing of PGT_writable_page | PGT_validated requires the existing places where this gets done (through get_page_and_type()) to be updated: For pages which actually have a mapping, the type refcount needs to be 1. There is actually a related bug that gets fixed here as a side effect: Typically the last L1 table would get marked as such only after get_page_and_type(..., PGT_writable_page). While this is fine as far as refcounting goes, the page did remain mapped in the IOMMU in this case (when "iommu=dom0-strict"). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Paul Durrant <paul@xxxxxxx> --- xen/arch/x86/pv/dom0_build.c | 97 ++++++++++++++++++++++++++++++++++--- xen/drivers/passthrough/x86/iommu.c | 52 +++++++++++++++----- 2 files changed, 129 insertions(+), 20 deletions(-) diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index e501979a86..323c49b0bd 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -46,7 +46,8 @@ void __init dom0_update_physmap(bool compat, unsigned long pfn, static __init void mark_pv_pt_pages_rdonly(struct domain *d, l4_pgentry_t *l4start, unsigned long vpt_start, - unsigned long nr_pt_pages) + unsigned long nr_pt_pages, + unsigned int *flush_flags) { unsigned long count; struct page_info *page; @@ -71,6 +72,14 @@ static __init void mark_pv_pt_pages_rdonly(struct domain *d, ASSERT((page->u.inuse.type_info & PGT_type_mask) <= PGT_root_page_table); ASSERT(!(page->u.inuse.type_info & ~(PGT_type_mask | PGT_pae_xen_l2))); + /* + * Page table pages need to be removed from the IOMMU again in case + * iommu_memory_setup() ended up mapping them. + */ + if ( need_iommu_pt_sync(d) && + iommu_unmap(d, _dfn(mfn_x(page_to_mfn(page))), 1, 0, flush_flags) ) + BUG(); + /* Read-only mapping + PGC_allocated + page-table page. */ page->count_info = PGC_allocated | 3; page->u.inuse.type_info |= PGT_validated | 1; @@ -107,11 +116,56 @@ static __init void mark_pv_pt_pages_rdonly(struct domain *d, unmap_domain_page(pl3e); } +static void __init iommu_memory_setup(struct domain *d, const char *what, + struct page_info *page, unsigned long nr, + unsigned int *flush_flags) +{ + long rc; + mfn_t mfn = page_to_mfn(page); + + if ( !need_iommu_pt_sync(d) ) + return; + + while ( (rc = iommu_map(d, _dfn(mfn_x(mfn)), mfn, nr, + IOMMUF_readable | IOMMUF_writable | IOMMUF_preempt, + flush_flags)) > 0 ) + { + mfn_add(mfn, rc); + nr -= rc; + /* See comment below. */ + for ( ; rc--; ++page ) + { + ASSERT(!page->u.inuse.type_info); + page->u.inuse.type_info = PGT_writable_page | PGT_validated; + } + process_pending_softirqs(); + } + if ( rc ) + { + printk(XENLOG_ERR + "pre-mapping %s MFN [%lx,%lx) into IOMMU failed: %ld\n", + what, mfn_x(mfn), mfn_x(mfn) + nr, rc); + return; + } + + /* + * For successfully established IOMMU mappings the type of the page(s) + * needs to match (for _get_page_type() to unmap upon type change). Set + * the page(s) to writable with no type ref. + */ + for ( ; nr--; ++page ) + { + ASSERT(!page->u.inuse.type_info); + page->u.inuse.type_info = PGT_writable_page | PGT_validated; + } +} + static __init void setup_pv_physmap(struct domain *d, unsigned long pgtbl_pfn, unsigned long v_start, unsigned long v_end, unsigned long vphysmap_start, unsigned long vphysmap_end, - unsigned long nr_pages) + unsigned long nr_pages, + unsigned int *flush_flags) { struct page_info *page = NULL; l4_pgentry_t *pl4e, *l4start = map_domain_page(_mfn(pgtbl_pfn)); @@ -177,6 +231,10 @@ static __init void setup_pv_physmap(struct domain *d, unsigned long pgtbl_pfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, MEMF_no_scrub)) != NULL ) { + iommu_memory_setup(d, "P2M 1G", page, + SUPERPAGE_PAGES * SUPERPAGE_PAGES, + flush_flags); + *pl3e = l3e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); vphysmap_start += 1UL << L3_PAGETABLE_SHIFT; continue; @@ -203,6 +261,9 @@ static __init void setup_pv_physmap(struct domain *d, unsigned long pgtbl_pfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, MEMF_no_scrub)) != NULL ) { + iommu_memory_setup(d, "P2M 2M", page, SUPERPAGE_PAGES, + flush_flags); + *pl2e = l2e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); vphysmap_start += 1UL << L2_PAGETABLE_SHIFT; continue; @@ -311,6 +372,7 @@ int __init dom0_construct_pv(struct domain *d, unsigned long initrd_pfn = -1, initrd_mfn = 0; unsigned long count; struct page_info *page = NULL; + unsigned int flush_flags = 0; start_info_t *si; struct vcpu *v = d->vcpu[0]; void *image_base = bootstrap_map(image); @@ -573,6 +635,9 @@ int __init dom0_construct_pv(struct domain *d, BUG(); } initrd->mod_end = 0; + + iommu_memory_setup(d, "initrd", mfn_to_page(_mfn(initrd_mfn)), + PFN_UP(initrd_len), &flush_flags); } printk("PHYSICAL MEMORY ARRANGEMENT:\n" @@ -606,6 +671,13 @@ int __init dom0_construct_pv(struct domain *d, process_pending_softirqs(); + /* + * Map the full range here and then punch holes for page tables + * alongside marking them as such in mark_pv_pt_pages_rdonly(). + */ + iommu_memory_setup(d, "init-alloc", mfn_to_page(_mfn(alloc_spfn)), + alloc_epfn - alloc_spfn, &flush_flags); + mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn); if ( vinitrd_start ) mpt_alloc -= PAGE_ALIGN(initrd_len); @@ -690,7 +762,8 @@ int __init dom0_construct_pv(struct domain *d, l1tab++; page = mfn_to_page(_mfn(mfn)); - if ( !page->u.inuse.type_info && + if ( (!page->u.inuse.type_info || + page->u.inuse.type_info == (PGT_writable_page | PGT_validated)) && !get_page_and_type(page, d, PGT_writable_page) ) BUG(); } @@ -719,7 +792,7 @@ int __init dom0_construct_pv(struct domain *d, } /* Pages that are part of page tables must be read only. */ - mark_pv_pt_pages_rdonly(d, l4start, vpt_start, nr_pt_pages); + mark_pv_pt_pages_rdonly(d, l4start, vpt_start, nr_pt_pages, &flush_flags); /* Mask all upcalls... */ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) @@ -794,7 +867,7 @@ int __init dom0_construct_pv(struct domain *d, { pfn = pagetable_get_pfn(v->arch.guest_table); setup_pv_physmap(d, pfn, v_start, v_end, vphysmap_start, vphysmap_end, - nr_pages); + nr_pages, &flush_flags); } /* Write the phys->machine and machine->phys table entries. */ @@ -825,7 +898,9 @@ int __init dom0_construct_pv(struct domain *d, if ( get_gpfn_from_mfn(mfn) >= count ) { BUG_ON(compat); - if ( !page->u.inuse.type_info && + if ( (!page->u.inuse.type_info || + page->u.inuse.type_info == (PGT_writable_page | + PGT_validated)) && !get_page_and_type(page, d, PGT_writable_page) ) BUG(); @@ -841,8 +916,12 @@ int __init dom0_construct_pv(struct domain *d, #endif while ( pfn < nr_pages ) { - if ( (page = alloc_chunk(d, nr_pages - domain_tot_pages(d))) == NULL ) + count = domain_tot_pages(d); + if ( (page = alloc_chunk(d, nr_pages - count)) == NULL ) panic("Not enough RAM for DOM0 reservation\n"); + + iommu_memory_setup(d, "chunk", page, domain_tot_pages(d) - count, + &flush_flags); while ( pfn < domain_tot_pages(d) ) { mfn = mfn_x(page_to_mfn(page)); @@ -857,6 +936,10 @@ int __init dom0_construct_pv(struct domain *d, } } + /* Use while() to avoid compiler warning. */ + while ( iommu_iotlb_flush_all(d, flush_flags) ) + break; + if ( initrd_len != 0 ) { si->mod_start = vinitrd_start ?: initrd_pfn; diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c index 0ba95473c9..be7617b1eb 100644 --- a/xen/drivers/passthrough/x86/iommu.c +++ b/xen/drivers/passthrough/x86/iommu.c @@ -348,8 +348,8 @@ static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d, void __hwdom_init arch_iommu_hwdom_init(struct domain *d) { - unsigned long i, top, max_pfn; - unsigned int flush_flags = 0; + unsigned long i, top, max_pfn, start, count; + unsigned int flush_flags = 0, start_perms = 0; BUG_ON(!is_hardware_domain(d)); @@ -380,31 +380,57 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d) * First Mb will get mapped in one go by pvh_populate_p2m(). Avoid * setting up potentially conflicting mappings here. */ - i = paging_mode_translate(d) ? PFN_DOWN(MB(1)) : 0; + start = paging_mode_translate(d) ? PFN_DOWN(MB(1)) : 0; - for ( ; i < top; i++ ) + for ( i = start, count = 0; i < top; ) { unsigned long pfn = pdx_to_pfn(i); unsigned int perms = hwdom_iommu_map(d, pfn, max_pfn); - int rc; if ( !perms ) - rc = 0; + /* nothing */; else if ( paging_mode_translate(d) ) + { + int rc; + rc = p2m_add_identity_entry(d, pfn, perms & IOMMUF_writable ? p2m_access_rw : p2m_access_r, 0); - else - rc = iommu_map(d, _dfn(pfn), _mfn(pfn), 1ul << PAGE_ORDER_4K, - perms, &flush_flags); + if ( rc ) + printk(XENLOG_WARNING + "%pd: identity mapping of %lx failed: %d\n", + d, pfn, rc); + } + else if ( pfn != start + count || perms != start_perms ) + { + long rc; - if ( rc ) - printk(XENLOG_WARNING "%pd: identity %smapping of %lx failed: %d\n", - d, !paging_mode_translate(d) ? "IOMMU " : "", pfn, rc); + commit: + while ( (rc = iommu_map(d, _dfn(start), _mfn(start), count, + start_perms | IOMMUF_preempt, + &flush_flags)) > 0 ) + { + start += rc; + count -= rc; + process_pending_softirqs(); + } + if ( rc ) + printk(XENLOG_WARNING + "%pd: IOMMU identity mapping of [%lx,%lx) failed: %ld\n", + d, start, start + count, rc); + start = pfn; + count = 1; + start_perms = perms; + } + else + ++count; - if (!(i & 0xfffff)) + if ( !(++i & 0xfffff) ) process_pending_softirqs(); + + if ( i == top && count ) + goto commit; } /* Use if to avoid compiler warning */ -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |