[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 15/18] x86/mm: introduce a per-vCPU mapcache when using ASI



On Wed Jan 8, 2025 at 2:26 PM GMT, Roger Pau Monne wrote:
> When using a unique per-vCPU root page table the per-domain region becomes
> per-vCPU, and hence the mapcache is no longer shared between all vCPUs of a
> domain.  Introduce per-vCPU mapcache structures, and modify map_domain_page()
> to create per-vCPU mappings when possible.  Note the lock is also not needed
> with using per-vCPU map caches, as the structure is no longer shared.
>
> This introduces some duplication in the domain and vcpu structures, as both
> contain a mapcache field to support running with and without per-vCPU
> page-tables.
>
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
>  xen/arch/x86/domain_page.c        | 90 ++++++++++++++++++++-----------
>  xen/arch/x86/include/asm/domain.h | 20 ++++---
>  2 files changed, 71 insertions(+), 39 deletions(-)
>
> diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
> index 1372be20224e..65900d6218f8 100644
> --- a/xen/arch/x86/domain_page.c
> +++ b/xen/arch/x86/domain_page.c
> @@ -74,7 +74,9 @@ void *map_domain_page(mfn_t mfn)
>      struct vcpu *v;
>      struct mapcache_domain *dcache;
>      struct mapcache_vcpu *vcache;
> +    struct mapcache *cache;
>      struct vcpu_maphash_entry *hashent;
> +    struct domain *d;
>  
>  #ifdef NDEBUG
>      if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
> @@ -85,9 +87,12 @@ void *map_domain_page(mfn_t mfn)
>      if ( !v || !is_pv_vcpu(v) )
>          return mfn_to_virt(mfn_x(mfn));
>  
> -    dcache = &v->domain->arch.pv.mapcache;
> +    d = v->domain;
> +    dcache = &d->arch.pv.mapcache;
>      vcache = &v->arch.pv.mapcache;
> -    if ( !dcache->inuse )
> +    cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> +                            : &d->arch.pv.mapcache.cache;
> +    if ( !cache->inuse )
>          return mfn_to_virt(mfn_x(mfn));
>  
>      perfc_incr(map_domain_page_count);
> @@ -98,17 +103,18 @@ void *map_domain_page(mfn_t mfn)
>      if ( hashent->mfn == mfn_x(mfn) )
>      {
>          idx = hashent->idx;
> -        ASSERT(idx < dcache->entries);
> +        ASSERT(idx < cache->entries);
>          hashent->refcnt++;
>          ASSERT(hashent->refcnt);
>          ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
>          goto out;
>      }
>  
> -    spin_lock(&dcache->lock);
> +    if ( !d->arch.vcpu_pt )
> +        spin_lock(&dcache->lock);

Hmmm. I wonder whether we might not want a nospec here...

>  
>      /* Has some other CPU caused a wrap? We must flush if so. */
> -    if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
> +    if ( unlikely(!d->arch.vcpu_pt && dcache->epoch != vcache->shadow_epoch) 
> )
>      {
>          vcache->shadow_epoch = dcache->epoch;
>          if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) 
> )
> @@ -118,21 +124,21 @@ void *map_domain_page(mfn_t mfn)
>          }
>      }
>  
> -    idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
> -    if ( unlikely(idx >= dcache->entries) )
> +    idx = find_next_zero_bit(cache->inuse, cache->entries, cache->cursor);
> +    if ( unlikely(idx >= cache->entries) )
>      {
>          unsigned long accum = 0, prev = 0;
>  
>          /* /First/, clean the garbage map and update the inuse list. */
> -        for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
> +        for ( i = 0; i < BITS_TO_LONGS(cache->entries); i++ )
>          {
>              accum |= prev;
> -            dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
> -            prev = ~dcache->inuse[i];
> +            cache->inuse[i] &= ~xchg(&cache->garbage[i], 0);
> +            prev = ~cache->inuse[i];
>          }
>  
> -        if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
> -            idx = find_first_zero_bit(dcache->inuse, dcache->entries);
> +        if ( accum | (prev & BITMAP_LAST_WORD_MASK(cache->entries)) )
> +            idx = find_first_zero_bit(cache->inuse, cache->entries);
>          else
>          {
>              /* Replace a hash entry instead. */
> @@ -152,19 +158,23 @@ void *map_domain_page(mfn_t mfn)
>                      i = 0;
>              } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
>          }
> -        BUG_ON(idx >= dcache->entries);
> +        BUG_ON(idx >= cache->entries);
>  
>          /* /Second/, flush TLBs. */
>          perfc_incr(domain_page_tlb_flush);
>          flush_tlb_local();
> -        vcache->shadow_epoch = ++dcache->epoch;
> -        dcache->tlbflush_timestamp = tlbflush_current_time();
> +        if ( !d->arch.vcpu_pt )
> +        {
> +            vcache->shadow_epoch = ++dcache->epoch;
> +            dcache->tlbflush_timestamp = tlbflush_current_time();
> +        }
>      }
>  
> -    set_bit(idx, dcache->inuse);
> -    dcache->cursor = idx + 1;
> +    set_bit(idx, cache->inuse);
> +    cache->cursor = idx + 1;
>  
> -    spin_unlock(&dcache->lock);
> +    if ( !d->arch.vcpu_pt )
> +        spin_unlock(&dcache->lock);

... and here.

>  
>      l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
>  
> @@ -178,6 +188,7 @@ void unmap_domain_page(const void *ptr)
>      unsigned int idx;
>      struct vcpu *v;
>      struct mapcache_domain *dcache;
> +    struct mapcache *cache;
>      unsigned long va = (unsigned long)ptr, mfn, flags;
>      struct vcpu_maphash_entry *hashent;
>  
> @@ -190,7 +201,9 @@ void unmap_domain_page(const void *ptr)
>      ASSERT(v && is_pv_vcpu(v));
>  
>      dcache = &v->domain->arch.pv.mapcache;
> -    ASSERT(dcache->inuse);
> +    cache = v->domain->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> +                                    : &v->domain->arch.pv.mapcache.cache;
> +    ASSERT(cache->inuse);
>  
>      idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
>      mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
> @@ -213,7 +226,7 @@ void unmap_domain_page(const void *ptr)
>                     hashent->mfn);
>              l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
>              /* /Second/, mark as garbage. */
> -            set_bit(hashent->idx, dcache->garbage);
> +            set_bit(hashent->idx, cache->garbage);
>          }
>  
>          /* Add newly-freed mapping to the maphash. */
> @@ -225,7 +238,7 @@ void unmap_domain_page(const void *ptr)
>          /* /First/, zap the PTE. */
>          l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
>          /* /Second/, mark as garbage. */
> -        set_bit(idx, dcache->garbage);
> +        set_bit(idx, cache->garbage);
>      }
>  
>      local_irq_restore(flags);
> @@ -234,7 +247,6 @@ void unmap_domain_page(const void *ptr)
>  void mapcache_domain_init(struct domain *d)
>  {
>      struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> -    unsigned int bitmap_pages;
>  
>      ASSERT(is_pv_domain(d));
>  
> @@ -243,13 +255,12 @@ void mapcache_domain_init(struct domain *d)
>          return;
>  #endif
>  
> +    if ( d->arch.vcpu_pt )
> +        return;
> +
>      BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
>                   2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) 
> >
>                   MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
> -    bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> -    dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> -    dcache->garbage = dcache->inuse +
> -                      (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
>  
>      spin_lock_init(&dcache->lock);
>  }
> @@ -258,30 +269,45 @@ int mapcache_vcpu_init(struct vcpu *v)
>  {
>      struct domain *d = v->domain;
>      struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> +    struct mapcache *cache;
>      unsigned long i;
> -    unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
> +    unsigned int ents = (d->arch.vcpu_pt ? 1 : d->max_vcpus) *
> +                        MAPCACHE_VCPU_ENTRIES;
>      unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
>  
> -    if ( !is_pv_vcpu(v) || !dcache->inuse )
> +    if ( !is_pv_vcpu(v) )
>          return 0;
>  
> -    if ( ents > dcache->entries )
> +    cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> +                            : &dcache->cache;
> +
> +    if ( !cache->inuse )
> +        return 0;
> +
> +    if ( ents > cache->entries )
>      {
>          /* Populate page tables. */
>          int rc = create_perdomain_mapping(v, MAPCACHE_VIRT_START, ents, 
> false);
> +        const unsigned int bitmap_pages =
> +            PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> +
> +        cache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> +        cache->garbage = cache->inuse +
> +                         (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
> +
>  
>          /* Populate bit maps. */
>          if ( !rc )
> -            rc = create_perdomain_mapping(v, (unsigned long)dcache->inuse,
> +            rc = create_perdomain_mapping(v, (unsigned long)cache->inuse,
>                                            nr, true);
>          if ( !rc )
> -            rc = create_perdomain_mapping(v, (unsigned long)dcache->garbage,
> +            rc = create_perdomain_mapping(v, (unsigned long)cache->garbage,
>                                            nr, true);
>  
>          if ( rc )
>              return rc;
>  
> -        dcache->entries = ents;
> +        cache->entries = ents;
>      }
>  
>      /* Mark all maphash entries as not in use. */

Cheers,
Alejandro



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.