Xen project Mailing List

[PATCH v3 02/23] VT-d: have callers specify the target level for page table walks

To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>

Date: Mon, 10 Jan 2022 17:22:51 +0100

Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none

Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=JfXG07vrt481Wpu05jJkO/De1cKPglNTB2enOwwMPZg=; b=XYOvXFRLSr4sCABI85JX2SUUQqA3Kbba83/jKmVq1cj74VK8l/NGdlJqrUvH9pHTImyOrVqgofDD1BoWgAILQ+LfQGnaG0jtlR7frhKvMByEdb8o5DPHewsJuucMCmQtAAVjgsZ4rKBZfSf30sxS/E1GV4f0Viys1JHNmiIbflHieEQYhc5yP4jO1Rb22PtQdnKhr3qbZSX3LmmutkWPXvWNkdtoHVnNTB3KSe+scSLQUgo45rYcUrJcfHEv+a+HFoLvGo2hy1zPOGQ80OwYXhIZ3d45vyZG9u9S9z8UpQLQ2jOCD9KDFyQ1x4VM4n2OW4ZLhAwKMn0kh8VBXzz7mg==

Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=GUeLs/T18jsWZIr7lp/1FLB2eVg2ZktDCCHatpM0oSbCu+d+uDkbzRjks6IM8xVgbN3NV91sriLrJbxXIMeoe/Wubo+mSGk0NvLaQgLGnTyt9lCu+ylbZRN7JlWmtxA5JN9+UVIOUbBht1EHbKdwhaCKue7gNM7inNI6FGzdiO926TImQzqHI7qiaZ7KabuqE0YJt5+VRKgYdb6zD9tzJ3tKS02q0I7ZS2Kprf6F+N7+rs1DvlFB1K549sSfhDmDYfcvy5i684YU1ewBHMsN8NIquibSqSjftKb1FPrqWVq5JvI8JURK8ReXKeAdUYEtIbH5UXjxlzCvrR8SZHnTKA==

Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;

Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Paul Durrant <paul@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Kevin Tian <kevin.tian@xxxxxxxxx>

Delivery-date: Mon, 10 Jan 2022 16:23:03 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

In order to be able to insert/remove super-pages we need to allow callers of the walking function to specify at which point to stop the walk. For intel_iommu_lookup_page() integrate the last level access into the main walking function. dma_pte_clear_one() gets only partly adjusted for now: Error handling and order parameter get put in place, but the order parameter remains ignored (just like intel_iommu_map_page()'s order part of the flags). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- I was actually wondering whether it wouldn't make sense to integrate dma_pte_clear_one() into its only caller intel_iommu_unmap_page(), for better symmetry with intel_iommu_map_page(). --- v2: Fix build. --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -347,63 +347,116 @@ static u64 bus_to_context_maddr(struct v return maddr; } -static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) +/* + * This function walks (and if requested allocates) page tables to the + * designated target level. It returns + * - 0 when a non-present entry was encountered and no allocation was + * requested, + * - a small positive value (the level, i.e. below PAGE_SIZE) upon allocation + * failure, + * - for target > 0 the physical address of the page table holding the leaf + * PTE for the requested address, + * - for target == 0 the full PTE. + */ +static uint64_t addr_to_dma_page_maddr(struct domain *domain, daddr_t addr, + unsigned int target, + unsigned int *flush_flags, bool alloc) { struct domain_iommu *hd = dom_iommu(domain); int addr_width = agaw_to_width(hd->arch.vtd.agaw); struct dma_pte *parent, *pte = NULL; - int level = agaw_to_level(hd->arch.vtd.agaw); - int offset; + unsigned int level = agaw_to_level(hd->arch.vtd.agaw), offset; u64 pte_maddr = 0; addr &= (((u64)1) << addr_width) - 1; ASSERT(spin_is_locked(&hd->arch.mapping_lock)); + ASSERT(target || !alloc); + if ( !hd->arch.vtd.pgd_maddr ) { struct page_info *pg; - if ( !alloc || !(pg = iommu_alloc_pgtable(domain)) ) + if ( !alloc ) + goto out; + + pte_maddr = level; + if ( !(pg = iommu_alloc_pgtable(domain)) ) goto out; hd->arch.vtd.pgd_maddr = page_to_maddr(pg); } - parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.vtd.pgd_maddr); - while ( level > 1 ) + pte_maddr = hd->arch.vtd.pgd_maddr; + parent = map_vtd_domain_page(pte_maddr); + while ( level > target ) { offset = address_level_offset(addr, level); pte = &parent[offset]; pte_maddr = dma_pte_addr(*pte); - if ( !pte_maddr ) + if ( !dma_pte_present(*pte) || (level > 1 && dma_pte_superpage(*pte)) ) { struct page_info *pg; + /* + * Higher level tables always set r/w, last level page table + * controls read/write. + */ + struct dma_pte new_pte = { DMA_PTE_PROT }; if ( !alloc ) - break; + { + pte_maddr = 0; + if ( !dma_pte_present(*pte) ) + break; + + /* + * When the leaf entry was requested, pass back the full PTE, + * with the address adjusted to account for the residual of + * the walk. + */ + pte_maddr = pte->val + + (addr & ((1UL << level_to_offset_bits(level)) - 1) & + PAGE_MASK); + if ( !target ) + break; + } + pte_maddr = level - 1; pg = iommu_alloc_pgtable(domain); if ( !pg ) break; pte_maddr = page_to_maddr(pg); - dma_set_pte_addr(*pte, pte_maddr); + dma_set_pte_addr(new_pte, pte_maddr); - /* - * high level table always sets r/w, last level - * page table control read/write - */ - dma_set_pte_readable(*pte); - dma_set_pte_writable(*pte); + if ( dma_pte_present(*pte) ) + { + struct dma_pte *split = map_vtd_domain_page(pte_maddr); + unsigned long inc = 1UL << level_to_offset_bits(level - 1); + + split[0].val = pte->val; + if ( inc == PAGE_SIZE ) + split[0].val &= ~DMA_PTE_SP; + + for ( offset = 1; offset < PTE_NUM; ++offset ) + split[offset].val = split[offset - 1].val + inc; + + iommu_sync_cache(split, PAGE_SIZE); + unmap_vtd_domain_page(split); + + if ( flush_flags ) + *flush_flags |= IOMMU_FLUSHF_modified; + } + + write_atomic(&pte->val, new_pte.val); iommu_sync_cache(pte, sizeof(struct dma_pte)); } - if ( level == 2 ) + if ( --level == target ) break; unmap_vtd_domain_page(parent); parent = map_vtd_domain_page(pte_maddr); - level--; } unmap_vtd_domain_page(parent); @@ -430,7 +483,7 @@ static uint64_t domain_pgd_maddr(struct if ( !hd->arch.vtd.pgd_maddr ) { /* Ensure we have pagetables allocated down to leaf PTE. */ - addr_to_dma_page_maddr(d, 0, 1); + addr_to_dma_page_maddr(d, 0, 1, NULL, true); if ( !hd->arch.vtd.pgd_maddr ) return 0; @@ -770,8 +823,9 @@ static int __must_check iommu_flush_iotl } /* clear one page's page table */ -static void dma_pte_clear_one(struct domain *domain, uint64_t addr, - unsigned int *flush_flags) +static int dma_pte_clear_one(struct domain *domain, daddr_t addr, + unsigned int order, + unsigned int *flush_flags) { struct domain_iommu *hd = dom_iommu(domain); struct dma_pte *page = NULL, *pte = NULL; @@ -779,11 +833,11 @@ static void dma_pte_clear_one(struct dom spin_lock(&hd->arch.mapping_lock); /* get last level pte */ - pg_maddr = addr_to_dma_page_maddr(domain, addr, 0); - if ( pg_maddr == 0 ) + pg_maddr = addr_to_dma_page_maddr(domain, addr, 1, flush_flags, false); + if ( pg_maddr < PAGE_SIZE ) { spin_unlock(&hd->arch.mapping_lock); - return; + return pg_maddr ? -ENOMEM : 0; } page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); @@ -793,7 +847,7 @@ static void dma_pte_clear_one(struct dom { spin_unlock(&hd->arch.mapping_lock); unmap_vtd_domain_page(page); - return; + return 0; } dma_clear_pte(*pte); @@ -803,6 +857,8 @@ static void dma_pte_clear_one(struct dom iommu_sync_cache(pte, sizeof(struct dma_pte)); unmap_vtd_domain_page(page); + + return 0; } static int iommu_set_root_entry(struct vtd_iommu *iommu) @@ -1914,8 +1970,9 @@ static int __must_check intel_iommu_map_ return 0; } - pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1); - if ( !pg_maddr ) + pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1, flush_flags, + true); + if ( pg_maddr < PAGE_SIZE ) { spin_unlock(&hd->arch.mapping_lock); return -ENOMEM; @@ -1965,17 +2022,14 @@ static int __must_check intel_iommu_unma if ( iommu_hwdom_passthrough && is_hardware_domain(d) ) return 0; - dma_pte_clear_one(d, dfn_to_daddr(dfn), flush_flags); - - return 0; + return dma_pte_clear_one(d, dfn_to_daddr(dfn), 0, flush_flags); } static int intel_iommu_lookup_page(struct domain *d, dfn_t dfn, mfn_t *mfn, unsigned int *flags) { struct domain_iommu *hd = dom_iommu(d); - struct dma_pte *page, val; - u64 pg_maddr; + uint64_t val; /* * If VT-d shares EPT page table or if the domain is the hardware @@ -1987,25 +2041,16 @@ static int intel_iommu_lookup_page(struc spin_lock(&hd->arch.mapping_lock); - pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 0); - if ( !pg_maddr ) - { - spin_unlock(&hd->arch.mapping_lock); - return -ENOENT; - } - - page = map_vtd_domain_page(pg_maddr); - val = page[dfn_x(dfn) & LEVEL_MASK]; + val = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 0, NULL, false); - unmap_vtd_domain_page(page); spin_unlock(&hd->arch.mapping_lock); - if ( !dma_pte_present(val) ) + if ( val < PAGE_SIZE ) return -ENOENT; - *mfn = maddr_to_mfn(dma_pte_addr(val)); - *flags = dma_pte_read(val) ? IOMMUF_readable : 0; - *flags |= dma_pte_write(val) ? IOMMUF_writable : 0; + *mfn = maddr_to_mfn(val); + *flags = val & DMA_PTE_READ ? IOMMUF_readable : 0; + *flags |= val & DMA_PTE_WRITE ? IOMMUF_writable : 0; return 0; }

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.