[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v3 02/23] VT-d: have callers specify the target level for page table walks
In order to be able to insert/remove super-pages we need to allow callers of the walking function to specify at which point to stop the walk. For intel_iommu_lookup_page() integrate the last level access into the main walking function. dma_pte_clear_one() gets only partly adjusted for now: Error handling and order parameter get put in place, but the order parameter remains ignored (just like intel_iommu_map_page()'s order part of the flags). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- I was actually wondering whether it wouldn't make sense to integrate dma_pte_clear_one() into its only caller intel_iommu_unmap_page(), for better symmetry with intel_iommu_map_page(). --- v2: Fix build. --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -347,63 +347,116 @@ static u64 bus_to_context_maddr(struct v return maddr; } -static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) +/* + * This function walks (and if requested allocates) page tables to the + * designated target level. It returns + * - 0 when a non-present entry was encountered and no allocation was + * requested, + * - a small positive value (the level, i.e. below PAGE_SIZE) upon allocation + * failure, + * - for target > 0 the physical address of the page table holding the leaf + * PTE for the requested address, + * - for target == 0 the full PTE. + */ +static uint64_t addr_to_dma_page_maddr(struct domain *domain, daddr_t addr, + unsigned int target, + unsigned int *flush_flags, bool alloc) { struct domain_iommu *hd = dom_iommu(domain); int addr_width = agaw_to_width(hd->arch.vtd.agaw); struct dma_pte *parent, *pte = NULL; - int level = agaw_to_level(hd->arch.vtd.agaw); - int offset; + unsigned int level = agaw_to_level(hd->arch.vtd.agaw), offset; u64 pte_maddr = 0; addr &= (((u64)1) << addr_width) - 1; ASSERT(spin_is_locked(&hd->arch.mapping_lock)); + ASSERT(target || !alloc); + if ( !hd->arch.vtd.pgd_maddr ) { struct page_info *pg; - if ( !alloc || !(pg = iommu_alloc_pgtable(domain)) ) + if ( !alloc ) + goto out; + + pte_maddr = level; + if ( !(pg = iommu_alloc_pgtable(domain)) ) goto out; hd->arch.vtd.pgd_maddr = page_to_maddr(pg); } - parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.vtd.pgd_maddr); - while ( level > 1 ) + pte_maddr = hd->arch.vtd.pgd_maddr; + parent = map_vtd_domain_page(pte_maddr); + while ( level > target ) { offset = address_level_offset(addr, level); pte = &parent[offset]; pte_maddr = dma_pte_addr(*pte); - if ( !pte_maddr ) + if ( !dma_pte_present(*pte) || (level > 1 && dma_pte_superpage(*pte)) ) { struct page_info *pg; + /* + * Higher level tables always set r/w, last level page table + * controls read/write. + */ + struct dma_pte new_pte = { DMA_PTE_PROT }; if ( !alloc ) - break; + { + pte_maddr = 0; + if ( !dma_pte_present(*pte) ) + break; + + /* + * When the leaf entry was requested, pass back the full PTE, + * with the address adjusted to account for the residual of + * the walk. + */ + pte_maddr = pte->val + + (addr & ((1UL << level_to_offset_bits(level)) - 1) & + PAGE_MASK); + if ( !target ) + break; + } + pte_maddr = level - 1; pg = iommu_alloc_pgtable(domain); if ( !pg ) break; pte_maddr = page_to_maddr(pg); - dma_set_pte_addr(*pte, pte_maddr); + dma_set_pte_addr(new_pte, pte_maddr); - /* - * high level table always sets r/w, last level - * page table control read/write - */ - dma_set_pte_readable(*pte); - dma_set_pte_writable(*pte); + if ( dma_pte_present(*pte) ) + { + struct dma_pte *split = map_vtd_domain_page(pte_maddr); + unsigned long inc = 1UL << level_to_offset_bits(level - 1); + + split[0].val = pte->val; + if ( inc == PAGE_SIZE ) + split[0].val &= ~DMA_PTE_SP; + + for ( offset = 1; offset < PTE_NUM; ++offset ) + split[offset].val = split[offset - 1].val + inc; + + iommu_sync_cache(split, PAGE_SIZE); + unmap_vtd_domain_page(split); + + if ( flush_flags ) + *flush_flags |= IOMMU_FLUSHF_modified; + } + + write_atomic(&pte->val, new_pte.val); iommu_sync_cache(pte, sizeof(struct dma_pte)); } - if ( level == 2 ) + if ( --level == target ) break; unmap_vtd_domain_page(parent); parent = map_vtd_domain_page(pte_maddr); - level--; } unmap_vtd_domain_page(parent); @@ -430,7 +483,7 @@ static uint64_t domain_pgd_maddr(struct if ( !hd->arch.vtd.pgd_maddr ) { /* Ensure we have pagetables allocated down to leaf PTE. */ - addr_to_dma_page_maddr(d, 0, 1); + addr_to_dma_page_maddr(d, 0, 1, NULL, true); if ( !hd->arch.vtd.pgd_maddr ) return 0; @@ -770,8 +823,9 @@ static int __must_check iommu_flush_iotl } /* clear one page's page table */ -static void dma_pte_clear_one(struct domain *domain, uint64_t addr, - unsigned int *flush_flags) +static int dma_pte_clear_one(struct domain *domain, daddr_t addr, + unsigned int order, + unsigned int *flush_flags) { struct domain_iommu *hd = dom_iommu(domain); struct dma_pte *page = NULL, *pte = NULL; @@ -779,11 +833,11 @@ static void dma_pte_clear_one(struct dom spin_lock(&hd->arch.mapping_lock); /* get last level pte */ - pg_maddr = addr_to_dma_page_maddr(domain, addr, 0); - if ( pg_maddr == 0 ) + pg_maddr = addr_to_dma_page_maddr(domain, addr, 1, flush_flags, false); + if ( pg_maddr < PAGE_SIZE ) { spin_unlock(&hd->arch.mapping_lock); - return; + return pg_maddr ? -ENOMEM : 0; } page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); @@ -793,7 +847,7 @@ static void dma_pte_clear_one(struct dom { spin_unlock(&hd->arch.mapping_lock); unmap_vtd_domain_page(page); - return; + return 0; } dma_clear_pte(*pte); @@ -803,6 +857,8 @@ static void dma_pte_clear_one(struct dom iommu_sync_cache(pte, sizeof(struct dma_pte)); unmap_vtd_domain_page(page); + + return 0; } static int iommu_set_root_entry(struct vtd_iommu *iommu) @@ -1914,8 +1970,9 @@ static int __must_check intel_iommu_map_ return 0; } - pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1); - if ( !pg_maddr ) + pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1, flush_flags, + true); + if ( pg_maddr < PAGE_SIZE ) { spin_unlock(&hd->arch.mapping_lock); return -ENOMEM; @@ -1965,17 +2022,14 @@ static int __must_check intel_iommu_unma if ( iommu_hwdom_passthrough && is_hardware_domain(d) ) return 0; - dma_pte_clear_one(d, dfn_to_daddr(dfn), flush_flags); - - return 0; + return dma_pte_clear_one(d, dfn_to_daddr(dfn), 0, flush_flags); } static int intel_iommu_lookup_page(struct domain *d, dfn_t dfn, mfn_t *mfn, unsigned int *flags) { struct domain_iommu *hd = dom_iommu(d); - struct dma_pte *page, val; - u64 pg_maddr; + uint64_t val; /* * If VT-d shares EPT page table or if the domain is the hardware @@ -1987,25 +2041,16 @@ static int intel_iommu_lookup_page(struc spin_lock(&hd->arch.mapping_lock); - pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 0); - if ( !pg_maddr ) - { - spin_unlock(&hd->arch.mapping_lock); - return -ENOENT; - } - - page = map_vtd_domain_page(pg_maddr); - val = page[dfn_x(dfn) & LEVEL_MASK]; + val = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 0, NULL, false); - unmap_vtd_domain_page(page); spin_unlock(&hd->arch.mapping_lock); - if ( !dma_pte_present(val) ) + if ( val < PAGE_SIZE ) return -ENOENT; - *mfn = maddr_to_mfn(dma_pte_addr(val)); - *flags = dma_pte_read(val) ? IOMMUF_readable : 0; - *flags |= dma_pte_write(val) ? IOMMUF_writable : 0; + *mfn = maddr_to_mfn(val); + *flags = val & DMA_PTE_READ ? IOMMUF_readable : 0; + *flags |= val & DMA_PTE_WRITE ? IOMMUF_writable : 0; return 0; }
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |