Xen project Mailing List

Re: [PATCH V4 07/15] x86/domain_page: Remove the fast paths when mfn is not in the directmap

To: "Elias El Yandouzi" <eliasely@xxxxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: "Alejandro Vallejo" <alejandro.vallejo@xxxxxxxxx>

Date: Mon, 18 Nov 2024 18:08:57 +0000

Cc: <julien@xxxxxxx>, <pdurrant@xxxxxxxxxx>, <dwmw@xxxxxxxxxx>, "Hongyan Xia" <hongyxia@xxxxxxxxxx>, "Julien Grall" <jgrall@xxxxxxxxxx>

Delivery-date: Mon, 18 Nov 2024 18:09:08 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

I'm still headscratching about various things, but the build errors are on release builds without pmap enabled. I'm highlighted them here. On Mon Nov 11, 2024 at 1:11 PM GMT, Elias El Yandouzi wrote: > From: Hongyan Xia <hongyxia@xxxxxxxxxx> > > When mfn is not in direct map, never use mfn_to_virt for any mappings. > > We replace mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) with > arch_mfns_in_direct_map(mfn, 1) because these two are equivalent. The > extra comparison in arch_mfns_in_direct_map() looks different but because > DIRECTMAP_VIRT_END is always higher, it does not make any difference. > > Lastly, domain_page_map_to_mfn() needs to gain to a special case for > the PMAP. > > Signed-off-by: Hongyan Xia <hongyxia@xxxxxxxxxx> > Signed-off-by: Julien Grall <jgrall@xxxxxxxxxx> > > ---- > > Changes in v4: > * Introduce helper functions virt_is_fixmap and virt_in_fixmap_range > > Changes since Hongyan's version: > * arch_mfn_in_direct_map() was renamed to arch_mfns_in_directmap() > * add a special case for the PMAP in domain_page_map_to_mfn() > > diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c > index 55e337aaf703..df7d4750ef05 100644 > --- a/xen/arch/x86/domain_page.c > +++ b/xen/arch/x86/domain_page.c > @@ -14,8 +14,10 @@ > #include <xen/sched.h> > #include <xen/vmap.h> > #include <asm/current.h> > +#include <asm/fixmap.h> > #include <asm/flushtlb.h> > #include <asm/hardirq.h> > +#include <asm/pmap.h> > #include <asm/setup.h> > > static DEFINE_PER_CPU(struct vcpu *, override); > @@ -24,6 +26,7 @@ static inline struct vcpu *mapcache_current_vcpu(void) > { > /* In the common case we use the mapcache of the running VCPU. */ > struct vcpu *v = this_cpu(override) ?: current; > + struct vcpu *idle_v = idle_vcpu[smp_processor_id()]; > > /* > * When current isn't properly set up yet, this is equivalent to > @@ -35,10 +38,11 @@ static inline struct vcpu *mapcache_current_vcpu(void) > /* > * When using efi runtime page tables, we have the equivalent of the idle > * domain's page tables but current may point at another domain's VCPU. > - * Return NULL as though current is not properly set up yet. > + * Return the idle domains's vcpu on that core because the efi per-domain > + * region (where the mapcache is) is in-sync with the idle domain. > */ > if ( efi_rs_using_pgtables() ) > - return NULL; > + return idle_v; > > /* > * If guest_table is NULL, and we are running a paravirtualised guest, > @@ -48,7 +52,7 @@ static inline struct vcpu *mapcache_current_vcpu(void) > if ( unlikely(pagetable_is_null(v->arch.guest_table)) && is_pv_vcpu(v) ) > { > /* If we really are idling, perform lazy context switch now. */ > - if ( (v = idle_vcpu[smp_processor_id()]) == current ) > + if ( (v = idle_v) == current ) > sync_local_execstate(); > /* We must now be running on the idle page table. */ > ASSERT(cr3_pa(read_cr3()) == __pa(idle_pg_table)); > @@ -77,18 +81,24 @@ void *map_domain_page(mfn_t mfn) > struct vcpu_maphash_entry *hashent; > > #ifdef NDEBUG > - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) > return mfn_to_virt(mfn_x(mfn)); > #endif > > v = mapcache_current_vcpu(); > - if ( !v ) > - return mfn_to_virt(mfn_x(mfn)); > + if ( !v || !v->domain->arch.mapcache.inuse ) > + { > + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) > + return mfn_to_virt(mfn_x(mfn)); > + else > + { > + BUG_ON(system_state >= SYS_STATE_smp_boot); Missing CONFIG_HAS_PMAP guards around this return. Without it this wants to BUG(), I think. I'm not entirely convinced the current logic takes into account the extended directmap present in HVM and idle vCPUs though. arch_mfns_in_directmap() merely checks they fit in DIRECTMAP_SIZE, doesn't it? > + return pmap_map(mfn); > + } > + } > > dcache = &v->domain->arch.mapcache; > vcache = &v->arch.mapcache; > - if ( !dcache->inuse ) > - return mfn_to_virt(mfn_x(mfn)); > > perfc_incr(map_domain_page_count); > > @@ -184,6 +194,12 @@ void unmap_domain_page(const void *ptr) > if ( !va || va >= DIRECTMAP_VIRT_START ) > return; > > + if ( virt_is_fixmap(va) ) > + { > + pmap_unmap(ptr); > + return; > + } > + This hunk is also missing CONFIG_HAS_PMAP guards. > ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END); > > v = mapcache_current_vcpu(); > @@ -237,7 +253,7 @@ int mapcache_domain_init(struct domain *d) > unsigned int bitmap_pages; > > #ifdef NDEBUG > - if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) > ) > + if ( !mem_hotplug && arch_mfn_in_directmap(0, max_page) ) I suspect you wanted arch_mfns_in_directmap() rather than _mfn_ > return 0; > #endif > > @@ -308,7 +324,7 @@ void *map_domain_page_global(mfn_t mfn) > local_irq_is_enabled())); > > #ifdef NDEBUG > - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > + if ( arch_mfn_in_directmap(mfn_x(mfn, 1)) ) I suspect you wanted 's/mfn_x(mfn, 1)/mfn_x(mfn), 1/' instead? > return mfn_to_virt(mfn_x(mfn)); > #endif > > @@ -335,6 +351,22 @@ mfn_t domain_page_map_to_mfn(const void *ptr) > if ( va >= DIRECTMAP_VIRT_START ) > return _mfn(virt_to_mfn(ptr)); > > + /* > + * The fixmap is stealing the top-end of the VMAP. So the check for > + * the PMAP *must* happen first. > + * > + * Also, the fixmap translate a slot to an address backwards. The > + * logic will rely on it to avoid any complexity. So check at > + * compile time this will always hold. > + */ > + BUILD_BUG_ON(fix_to_virt(FIX_PMAP_BEGIN) < fix_to_virt(FIX_PMAP_END)); > + > + if ( virt_in_fixmap_range(va, FIX_PMAP_BEGIN, FIX_PMAP_END) ) > + { > + BUG_ON(system_state >= SYS_STATE_smp_boot); > + return l1e_get_mfn(l1_fixmap[l1_table_offset(va)]); > + } > + This hunk should be surrounded by CONFIG_HAS_PMAP guards or it'll fail to compile. > if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END ) > return vmap_to_mfn(va); > > diff --git a/xen/arch/x86/include/asm/fixmap.h > b/xen/arch/x86/include/asm/fixmap.h > index 80b7b74fd816..381c95a8b11f 100644 > --- a/xen/arch/x86/include/asm/fixmap.h > +++ b/xen/arch/x86/include/asm/fixmap.h > @@ -101,6 +101,31 @@ static inline unsigned long virt_to_fix(const unsigned > long vaddr) > return __virt_to_fix(vaddr); > } > > +static inline bool virt_is_fixmap(const unsigned long vaddr) > +{ > + return vaddr >= FIXADDR_START && vaddr < FIXADDR_TOP; > +} > + > +static inline bool virt_in_fixmap_range( > + const unsigned long vaddr, > + const unsigned int start_idx, > + const unsigned int end_idx > +) > +{ > + unsigned long start_addr = (unsigned long)fix_to_virt(start_idx); > + unsigned long end_addr = (unsigned long)fix_to_virt(end_idx); > + > + /* > + * The check ensures that the virtual address (vaddr) is within the > + * fixmap range. The addresses are allocated backwards, meaning the > + * start address is higher than the end address. As a result, the > + * check ensures that the virtual address is greater than or equal to > + * the end address, and less than or equal to the start address, which > + * may appear counterintuitive due to the reverse allocation order. > + */ > + return ((vaddr & PAGE_MASK) <= start_addr) && (vaddr >= end_addr); > +} > + > enum fixed_addresses_x { > /* Index 0 is reserved since fix_x_to_virt(0) == FIXADDR_X_TOP. */ > FIX_X_RESERVED, Cheers, Alejandro

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.