|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 3/3] x86/iommu: use a rangeset for hwdom setup
On Fri, Nov 17, 2023 at 10:47:49AM +0100, Roger Pau Monne wrote:
> The current loop that iterates from 0 to the maximum RAM address in order to
> setup the IOMMU mappings is highly inefficient, and it will get worse as the
> amount of RAM increases. It's also not accounting for any reserved regions
> past the last RAM address.
>
> Instead of iterating over memory addresses, iterate over the memory map
> regions
> and use a rangeset in order to keep track of which ranges need to be identity
> mapped in the hardware domain physical address space.
>
> On an AMD EPYC 7452 with 512GiB of RAM, the time to execute
> arch_iommu_hwdom_init() in nanoseconds is:
>
> x old
> + new
> N Min Max Median Avg Stddev
> x 5 2.2364154e+10 2.338244e+10 2.2474685e+10 2.2622409e+10 4.2949869e+08
> + 5 1025012 1033036 1026188 1028276.2 3623.1194
> Difference at 95.0% confidence
> -2.26214e+10 +/- 4.42931e+08
> -99.9955% +/- 9.05152e-05%
> (Student's t, pooled s = 3.03701e+08)
>
> Execution time of arch_iommu_hwdom_init() goes down from ~22s to ~0.001s.
>
> Note there's a change for HVM domains (ie: PVH dom0) that get switched to
> create the p2m mappings using map_mmio_regions() instead of
> p2m_add_identity_entry(), so that ranges can be mapped with a single function
> call if possible. Note that the interface of map_mmio_regions() doesn't
> allow creating read-only mappings, but so far there are no such mappings
> created for PVH dom0 in arch_iommu_hwdom_init().
>
> No change intended in the resulting mappings that a hardware domain gets.
>
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> xen/arch/x86/hvm/io.c | 15 +-
> xen/arch/x86/include/asm/hvm/io.h | 4 +-
> xen/drivers/passthrough/x86/iommu.c | 355 +++++++++++++++++-----------
> 3 files changed, 231 insertions(+), 143 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
> index d75af83ad01f..7c4b7317b13a 100644
> --- a/xen/arch/x86/hvm/io.c
> +++ b/xen/arch/x86/hvm/io.c
> @@ -364,9 +364,20 @@ static const struct hvm_mmcfg *vpci_mmcfg_find(const
> struct domain *d,
> return NULL;
> }
>
> -bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr)
> +int vpci_subtract_mmcfg(const struct domain *d, struct rangeset *r)
> {
> - return vpci_mmcfg_find(d, addr);
> + const struct hvm_mmcfg *mmcfg;
> +
> + list_for_each_entry ( mmcfg, &d->arch.hvm.mmcfg_regions, next )
> + {
> + int rc = rangeset_remove_range(r, PFN_DOWN(mmcfg->addr),
> + PFN_DOWN(mmcfg->addr + mmcfg->size -
> 1));
> +
> + if ( rc )
> + return rc;
> + }
> +
> + return 0;
> }
>
> static unsigned int vpci_mmcfg_decode_addr(const struct hvm_mmcfg *mmcfg,
> diff --git a/xen/arch/x86/include/asm/hvm/io.h
> b/xen/arch/x86/include/asm/hvm/io.h
> index e5225e75ef26..c9d058fd5695 100644
> --- a/xen/arch/x86/include/asm/hvm/io.h
> +++ b/xen/arch/x86/include/asm/hvm/io.h
> @@ -153,8 +153,8 @@ int register_vpci_mmcfg_handler(struct domain *d, paddr_t
> addr,
> /* Destroy tracked MMCFG areas. */
> void destroy_vpci_mmcfg(struct domain *d);
>
> -/* Check if an address is between a MMCFG region for a domain. */
> -bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr);
> +/* Remove MMCFG regions from a given rangeset. */
> +int vpci_subtract_mmcfg(const struct domain *d, struct rangeset *r);
>
> #endif /* __ASM_X86_HVM_IO_H__ */
>
> diff --git a/xen/drivers/passthrough/x86/iommu.c
> b/xen/drivers/passthrough/x86/iommu.c
> index d70cee9fea77..be2c909f61d8 100644
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -301,129 +301,133 @@ void iommu_identity_map_teardown(struct domain *d)
> }
> }
>
> -static int __hwdom_init xen_in_range(unsigned long mfn)
> +static int __hwdom_init remove_xen_ranges(struct rangeset *r)
> {
> paddr_t start, end;
> - int i;
> -
> - enum { region_s3, region_ro, region_rw, region_bss, nr_regions };
> - static struct {
> - paddr_t s, e;
> - } xen_regions[nr_regions] __hwdom_initdata;
> + int rc;
>
> - /* initialize first time */
> - if ( !xen_regions[0].s )
> - {
> - /* S3 resume code (and other real mode trampoline code) */
> - xen_regions[region_s3].s = bootsym_phys(trampoline_start);
> - xen_regions[region_s3].e = bootsym_phys(trampoline_end);
> + /* S3 resume code (and other real mode trampoline code) */
> + rc = rangeset_remove_range(r, PFN_DOWN(bootsym_phys(trampoline_start)),
> + PFN_DOWN(bootsym_phys(trampoline_end)));
> + if ( rc )
> + return rc;
>
> - /*
> - * This needs to remain in sync with the uses of the same symbols in
> - * - __start_xen()
> - * - is_xen_fixed_mfn()
> - * - tboot_shutdown()
> - */
> + /*
> + * This needs to remain in sync with the uses of the same symbols in
> + * - __start_xen()
> + * - is_xen_fixed_mfn()
> + * - tboot_shutdown()
> + */
> + /* hypervisor .text + .rodata */
> + rc = rangeset_remove_range(r, PFN_DOWN(__pa(&_stext)),
> + PFN_DOWN(__pa(&__2M_rodata_end)));
> + if ( rc )
> + return rc;
>
> - /* hypervisor .text + .rodata */
> - xen_regions[region_ro].s = __pa(&_stext);
> - xen_regions[region_ro].e = __pa(&__2M_rodata_end);
> - /* hypervisor .data + .bss */
> - xen_regions[region_rw].s = __pa(&__2M_rwdata_start);
> - xen_regions[region_rw].e = __pa(&__2M_rwdata_end);
> - if ( efi_boot_mem_unused(&start, &end) )
> - {
> - ASSERT(__pa(start) >= xen_regions[region_rw].s);
> - ASSERT(__pa(end) <= xen_regions[region_rw].e);
> - xen_regions[region_rw].e = __pa(start);
> - xen_regions[region_bss].s = __pa(end);
> - xen_regions[region_bss].e = __pa(&__2M_rwdata_end);
> - }
> + /* hypervisor .data + .bss */
> + if ( efi_boot_mem_unused(&start, &end) )
> + {
> + ASSERT(__pa(start) >= __pa(&__2M_rwdata_start));
> + rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)),
> + PFN_DOWN(__pa(start)));
> + if ( rc )
> + return rc;
> + ASSERT(__pa(end) <= __pa(&__2M_rwdata_end));
> + rc = rangeset_remove_range(r, PFN_DOWN(__pa(end)),
> + PFN_DOWN(__pa(&__2M_rwdata_end)));
> + if ( rc )
> + return rc;
> + }
> + else
> + {
> + rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)),
> + PFN_DOWN(__pa(&__2M_rwdata_end)));
> + if ( rc )
> + return rc;
> }
> -
> - start = (paddr_t)mfn << PAGE_SHIFT;
> - end = start + PAGE_SIZE;
> - for ( i = 0; i < nr_regions; i++ )
> - if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
> - return 1;
>
> return 0;
> }
>
> -static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
> - unsigned long pfn,
> - unsigned long max_pfn)
> +static int __hwdom_init map_subtract(unsigned long s, unsigned long e,
Bah, this (and others below) are missing cf_check attribute.
Will fix in v2.
Roger.
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |