[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V4 01/15] x86: Create per-domain mapping for guest_root_pt



On Mon Nov 11, 2024 at 1:11 PM GMT, Elias El Yandouzi wrote:
> From: Hongyan Xia <hongyxia@xxxxxxxxxx>
>
> This patch introduces a per-domain mapping for the `guest_root_pt` in PV
> guests as part of the effort to remove the direct map in Xen.
>
> For the time being, the `root_pgt` is not mapped or unmapped, as it remains
> a Xenheap page. This will be addressed in subsequent patches.
>
> Signed-off-by: Hongyan Xia <hongyxia@xxxxxxxxxx>
> Signed-off-by: Julien Grall <jgrall@xxxxxxxxxx>
> Signed-off-by: Elias El Yandouzi <eliasely@xxxxxxxxxx>
>
> ----
>     Changes in V4:
>         * Fix over-allocation issue
>         * Update the mappings when switching from kernel to user-mode
>
>     Changes in V3:
>         * Rename SHADOW_ROOT
>         * Haven't addressed the potentially over-allocation issue as I don't 
> get it
>
>     Changes in V2:
>         * Rework the shadow perdomain mapping solution in the follow-up 
> patches
>
>     Changes since Hongyan's version:
>         * Remove the final dot in the commit title
>
> diff --git a/xen/arch/x86/include/asm/config.h 
> b/xen/arch/x86/include/asm/config.h
> index f8a5a4913b07..bd360ec4141e 100644
> --- a/xen/arch/x86/include/asm/config.h
> +++ b/xen/arch/x86/include/asm/config.h
> @@ -174,7 +174,7 @@
>  /* Slot 260: per-domain mappings (including map cache). */
>  #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
>  #define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
> -#define PERDOMAIN_SLOTS         3
> +#define PERDOMAIN_SLOTS         4
>  #define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
>                                   (PERDOMAIN_SLOT_MBYTES << 20))
>  /* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */
> @@ -288,6 +288,14 @@ extern unsigned long xen_phys_start;
>  #define ARG_XLAT_START(v)        \
>      (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
>  
> +/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */
> +#define PV_ROOT_PT_MAPPING_VIRT_START   PERDOMAIN_VIRT_SLOT(3)
> +#define PV_ROOT_PT_MAPPING_ENTRIES      MAX_VIRT_CPUS
> +
> +/* The address of a particular VCPU's PV_ROOT_PT */
> +#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \
> +    (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE))
> +
>  #define ELFSIZE 64
>  
>  #define ARCH_CRASH_SAVE_VMCOREINFO
> diff --git a/xen/arch/x86/include/asm/domain.h 
> b/xen/arch/x86/include/asm/domain.h
> index b79d6badd71c..478ce41ad8ca 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -272,6 +272,7 @@ struct time_scale {
>  struct pv_domain
>  {
>      l1_pgentry_t **gdt_ldt_l1tab;
> +    l1_pgentry_t **root_pt_l1tab;
>  
>      atomic_t nr_l4_pages;
>  
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index d537a799bced..a152e21bb086 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -516,6 +516,13 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
>          v->arch.cr3 |= get_pcid_bits(v, false);
>  }
>  
> +#define pv_root_pt_idx(v) \
> +    ((v)->vcpu_id >> PAGETABLE_ORDER)
> +
> +#define pv_root_pt_pte(v) \
> +    ((v)->domain->arch.pv.root_pt_l1tab[pv_root_pt_idx(v)] + \
> +     ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1)))
> +
>  void write_ptbase(struct vcpu *v)
>  {
>      const struct domain *d = v->domain;
> @@ -527,11 +534,16 @@ void write_ptbase(struct vcpu *v)
>  
>      if ( is_pv_domain(d) && d->arch.pv.xpti )
>      {
> +        mfn_t guest_root_pt = _mfn(MASK_EXTR(v->arch.cr3, 
> X86_CR3_ADDR_MASK));
> +        l1_pgentry_t *pte = pv_root_pt_pte(v);
> +
>          cpu_info->root_pgt_changed = true;
>          cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
>          if ( new_cr4 & X86_CR4_PCIDE )
>              cpu_info->pv_cr3 |= get_pcid_bits(v, true);
>          switch_cr3_cr4(v->arch.cr3, new_cr4);
> +
> +        l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RO));
>      }
>      else
>      {
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index d5a8564c1cbe..1a1c999743ac 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -289,6 +289,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
>                                1U << GDT_LDT_VCPU_SHIFT);
>  }
>  
> +static int pv_create_root_pt_l1tab(const struct vcpu *v)
> +{
> +    return create_perdomain_mapping(v->domain,
> +                                    PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v),
> +                                    1, v->domain->arch.pv.root_pt_l1tab,
> +                                    NULL);
> +}
> +
> +static void pv_destroy_root_pt_l1tab(const struct vcpu *v)
> +
> +{
> +    destroy_perdomain_mapping(v->domain,
> +                              PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), 1);
> +}
> +
>  void pv_vcpu_destroy(struct vcpu *v)
>  {
>      if ( is_pv_32bit_vcpu(v) )
> @@ -298,6 +313,7 @@ void pv_vcpu_destroy(struct vcpu *v)
>      }
>  
>      pv_destroy_gdt_ldt_l1tab(v);
> +    pv_destroy_root_pt_l1tab(v);
>      XFREE(v->arch.pv.trap_ctxt);
>  }
>  
> @@ -312,6 +328,13 @@ int pv_vcpu_initialise(struct vcpu *v)
>      if ( rc )
>          return rc;
>  
> +    if ( v->domain->arch.pv.xpti )
> +    {
> +        rc = pv_create_root_pt_l1tab(v);
> +        if ( rc )
> +            goto done;
> +    }
> +
>      BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
>                   PAGE_SIZE);
>      v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
> @@ -347,10 +370,12 @@ void pv_domain_destroy(struct domain *d)
>  
>      destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
>                                GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
> +    destroy_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, 
> d->max_vcpus);
>  
>      XFREE(d->arch.pv.cpuidmasks);
>  
>      FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab);
> +    FREE_XENHEAP_PAGE(d->arch.pv.root_pt_l1tab);

With root_pt_l1tab coming from xzalloc_array(), this must use XFREE() instead.

  XFREE(v->arch.pv.root_pt_l1tab);

>  }
>  
>  void noreturn cf_check continue_pv_domain(void);
> @@ -382,8 +407,22 @@ int pv_domain_initialise(struct domain *d)
>      if ( rc )
>          goto fail;
>  
> +    rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START,
> +                                  d->max_vcpus, NULL, NULL);
> +    if ( rc )
> +        goto fail;
> +
>      d->arch.ctxt_switch = &pv_csw;
>  
> +    if ( d->arch.pv.xpti )
> +    {
> +        d->arch.pv.root_pt_l1tab =
> +            xzalloc_array(l1_pgentry_t *,
> +                          DIV_ROUND_UP(d->max_vcpus, L1_PAGETABLE_ENTRIES));
> +        if ( !d->arch.pv.root_pt_l1tab )
> +            goto fail;
> +    }
> +
>      if ( !is_pv_32bit_domain(d) && use_invpcid && cpu_has_pcid )
>          switch ( ACCESS_ONCE(opt_pcid) )
>          {
> @@ -457,7 +496,8 @@ static void _toggle_guest_pt(struct vcpu *v)
>              guest_update = false;
>          }
>      }
> -    write_cr3(cr3);
> +
> +    write_ptbase(v);
>  
>      if ( !pagetable_is_null(old_shadow) )
>          shadow_put_top_level(v->domain, old_shadow);
> @@ -497,9 +537,6 @@ void toggle_guest_mode(struct vcpu *v)
>      {
>          struct cpu_info *cpu_info = get_cpu_info();
>  
> -        cpu_info->root_pgt_changed = true;
> -        cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) |
> -                           (d->arch.pv.pcid ? get_pcid_bits(v, true) : 0);
>          /*
>           * As in _toggle_guest_pt() the XPTI CR3 write needs to be a TLB-
>           * flushing one too for shadow mode guests.
> diff --git a/xen/arch/x86/x86_64/asm-offsets.c 
> b/xen/arch/x86/x86_64/asm-offsets.c
> index 630bdc39451d..c1ae5013af96 100644
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -80,6 +80,7 @@ void __dummy__(void)
>  
>  #undef OFFSET_EF
>  
> +    OFFSET(VCPU_id, struct vcpu, vcpu_id);
>      OFFSET(VCPU_processor, struct vcpu, processor);
>      OFFSET(VCPU_domain, struct vcpu, domain);
>      OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index c5c723b5f4d4..91413b905768 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -170,9 +170,16 @@ FUNC_LOCAL(restore_all_guest)
>          movabs $PADDR_MASK & PAGE_MASK, %rsi
>          movabs $DIRECTMAP_VIRT_START, %rcx
>          and   %rsi, %rdi
> -        and   %r9, %rsi
>          add   %rcx, %rdi
> +
> +        /*
> +         * The address in the vCPU cr3 is always mapped in the per-domain
> +         * pv_root_pt virt area.
> +         */
> +        imul  $PAGE_SIZE, VCPU_id(%rbx), %esi
> +        movabs $PV_ROOT_PT_MAPPING_VIRT_START, %rcx
>          add   %rcx, %rsi
> +
>          mov   $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
>          mov   root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
>          mov   %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.