VMX: allocate VMCS pages from domain heap There being only very few uses of the virtual address of a VMCS, convert these cases to establish a mapping and lift the Xen heap restriction from the VMCS allocation. Signed-off-by: Jan Beulich --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -119,8 +119,8 @@ const u32 vmx_introspection_force_enable const unsigned int vmx_introspection_force_enabled_msrs_size = ARRAY_SIZE(vmx_introspection_force_enabled_msrs); -static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region); -static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs); +static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region); +static DEFINE_PER_CPU(paddr_t, current_vmcs); static DEFINE_PER_CPU(struct list_head, active_vmcs_list); DEFINE_PER_CPU(bool_t, vmxon); @@ -483,25 +483,28 @@ static int vmx_init_vmcs_config(void) return 0; } -static struct vmcs_struct *vmx_alloc_vmcs(void) +static paddr_t vmx_alloc_vmcs(void) { + struct page_info *pg; struct vmcs_struct *vmcs; - if ( (vmcs = alloc_xenheap_page()) == NULL ) + if ( (pg = alloc_domheap_page(NULL, 0)) == NULL ) { gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n"); - return NULL; + return 0; } + vmcs = __map_domain_page(pg); clear_page(vmcs); vmcs->vmcs_revision_id = vmcs_revision_id; + unmap_domain_page(vmcs); - return vmcs; + return page_to_maddr(pg); } -static void vmx_free_vmcs(struct vmcs_struct *vmcs) +static void vmx_free_vmcs(paddr_t pa) { - free_xenheap_page(vmcs); + free_domheap_page(maddr_to_page(pa)); } static void __vmx_clear_vmcs(void *info) @@ -514,7 +517,7 @@ static void __vmx_clear_vmcs(void *info) if ( arch_vmx->active_cpu == smp_processor_id() ) { - __vmpclear(virt_to_maddr(arch_vmx->vmcs)); + __vmpclear(arch_vmx->vmcs_pa); if ( arch_vmx->vmcs_shadow_maddr ) __vmpclear(arch_vmx->vmcs_shadow_maddr); @@ -523,8 +526,8 @@ static void __vmx_clear_vmcs(void *info) list_del(&arch_vmx->active_list); - if ( arch_vmx->vmcs == this_cpu(current_vmcs) ) - this_cpu(current_vmcs) = NULL; + if ( arch_vmx->vmcs_pa == this_cpu(current_vmcs) ) + this_cpu(current_vmcs) = 0; } } @@ -550,8 +553,8 @@ static void vmx_load_vmcs(struct vcpu *v ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id()); - __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs)); - this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs; + __vmptrld(v->arch.hvm_vmx.vmcs_pa); + this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs_pa; local_irq_restore(flags); } @@ -565,11 +568,11 @@ int vmx_cpu_up_prepare(unsigned int cpu) if ( nvmx_cpu_up_prepare(cpu) != 0 ) printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu); - if ( per_cpu(vmxon_region, cpu) != NULL ) + if ( per_cpu(vmxon_region, cpu) ) return 0; per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs(); - if ( per_cpu(vmxon_region, cpu) != NULL ) + if ( per_cpu(vmxon_region, cpu) ) return 0; printk("CPU%d: Could not allocate host VMCS\n", cpu); @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu) void vmx_cpu_dead(unsigned int cpu) { vmx_free_vmcs(per_cpu(vmxon_region, cpu)); - per_cpu(vmxon_region, cpu) = NULL; + per_cpu(vmxon_region, cpu) = 0; nvmx_cpu_dead(cpu); } @@ -638,7 +641,7 @@ int vmx_cpu_up(void) if ( (rc = vmx_cpu_up_prepare(cpu)) != 0 ) return rc; - switch ( __vmxon(virt_to_maddr(this_cpu(vmxon_region))) ) + switch ( __vmxon(this_cpu(vmxon_region)) ) { case -2: /* #UD or #GP */ if ( bios_locked && @@ -710,7 +713,7 @@ bool_t vmx_vmcs_try_enter(struct vcpu *v * vmx_vmcs_enter/exit and scheduling tail critical regions. */ if ( likely(v == current) ) - return v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs); + return v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs); fv = &this_cpu(foreign_vmcs); @@ -906,17 +909,17 @@ int vmx_check_msr_bitmap(unsigned long * /* * Switch VMCS between layer 1 & 2 guest */ -void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to) +void vmx_vmcs_switch(paddr_t from, paddr_t to) { struct arch_vmx_struct *vmx = ¤t->arch.hvm_vmx; spin_lock(&vmx->vmcs_lock); - __vmpclear(virt_to_maddr(from)); + __vmpclear(from); if ( vmx->vmcs_shadow_maddr ) __vmpclear(vmx->vmcs_shadow_maddr); - __vmptrld(virt_to_maddr(to)); + __vmptrld(to); - vmx->vmcs = to; + vmx->vmcs_pa = to; vmx->launched = 0; this_cpu(current_vmcs) = to; @@ -936,11 +939,11 @@ void virtual_vmcs_enter(void *vvmcs) void virtual_vmcs_exit(void *vvmcs) { - struct vmcs_struct *cur = this_cpu(current_vmcs); + paddr_t cur = this_cpu(current_vmcs); __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs))); if ( cur ) - __vmptrld(virt_to_maddr(cur)); + __vmptrld(cur); } @@ -1558,17 +1561,17 @@ int vmx_create_vmcs(struct vcpu *v) struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; int rc; - if ( (arch_vmx->vmcs = vmx_alloc_vmcs()) == NULL ) + if ( (arch_vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 ) return -ENOMEM; INIT_LIST_HEAD(&arch_vmx->active_list); - __vmpclear(virt_to_maddr(arch_vmx->vmcs)); + __vmpclear(arch_vmx->vmcs_pa); arch_vmx->active_cpu = -1; arch_vmx->launched = 0; if ( (rc = construct_vmcs(v)) != 0 ) { - vmx_free_vmcs(arch_vmx->vmcs); + vmx_free_vmcs(arch_vmx->vmcs_pa); return rc; } @@ -1581,7 +1584,7 @@ void vmx_destroy_vmcs(struct vcpu *v) vmx_clear_vmcs(v); - vmx_free_vmcs(arch_vmx->vmcs); + vmx_free_vmcs(arch_vmx->vmcs_pa); free_xenheap_page(v->arch.hvm_vmx.host_msr_area); free_xenheap_page(v->arch.hvm_vmx.msr_area); @@ -1612,7 +1615,7 @@ void vmx_do_resume(struct vcpu *v) if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() ) { - if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) ) + if ( v->arch.hvm_vmx.vmcs_pa != this_cpu(current_vmcs) ) vmx_load_vmcs(v); } else --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -56,13 +56,14 @@ int nvmx_vcpu_initialise(struct vcpu *v) { struct nestedvmx *nvmx = &vcpu_2_nvmx(v); struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct page_info *pg = alloc_domheap_page(NULL, 0); - nvcpu->nv_n2vmcx = alloc_xenheap_page(); - if ( !nvcpu->nv_n2vmcx ) + if ( !pg ) { gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n"); return -ENOMEM; } + nvcpu->nv_n2vmcx_pa = page_to_maddr(pg); /* non-root VMREAD/VMWRITE bitmap. */ if ( cpu_has_vmx_vmcs_shadowing ) @@ -129,13 +130,14 @@ void nvmx_vcpu_destroy(struct vcpu *v) * in order to avoid double free of L2 VMCS and the possible memory * leak of L1 VMCS page. */ - if ( nvcpu->nv_n1vmcx ) - v->arch.hvm_vmx.vmcs = nvcpu->nv_n1vmcx; + if ( nvcpu->nv_n1vmcx_pa ) + v->arch.hvm_vmx.vmcs_pa = nvcpu->nv_n1vmcx_pa; - if ( nvcpu->nv_n2vmcx ) { - __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx)); - free_xenheap_page(nvcpu->nv_n2vmcx); - nvcpu->nv_n2vmcx = NULL; + if ( nvcpu->nv_n2vmcx_pa ) + { + __vmpclear(nvcpu->nv_n2vmcx_pa); + free_domheap_page(maddr_to_page(nvcpu->nv_n2vmcx_pa)); + nvcpu->nv_n2vmcx_pa = 0; } /* Must also cope with nvmx_vcpu_initialise() not having got called. */ @@ -726,8 +728,8 @@ static void __clear_current_vvmcs(struct { struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); - if ( nvcpu->nv_n2vmcx ) - __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx)); + if ( nvcpu->nv_n2vmcx_pa ) + __vmpclear(nvcpu->nv_n2vmcx_pa); } static bool_t __must_check _map_msr_bitmap(struct vcpu *v) @@ -1125,7 +1127,7 @@ static void virtual_vmentry(struct cpu_u void *vvmcs = nvcpu->nv_vvmcx; unsigned long lm_l1, lm_l2; - vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n2vmcx); + vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa); nestedhvm_vcpu_enter_guestmode(v); nvcpu->nv_vmentry_pending = 0; @@ -1335,7 +1337,7 @@ static void virtual_vmexit(struct cpu_us !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) ) shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields); - vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx); + vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa); nestedhvm_vcpu_exit_guestmode(v); nvcpu->nv_vmexit_pending = 0; @@ -1433,10 +1435,11 @@ int nvmx_handle_vmxon(struct cpu_user_re * `fork' the host vmcs to shadow_vmcs * vmcs_lock is not needed since we are on current */ - nvcpu->nv_n1vmcx = v->arch.hvm_vmx.vmcs; - __vmpclear(virt_to_maddr(v->arch.hvm_vmx.vmcs)); - memcpy(nvcpu->nv_n2vmcx, v->arch.hvm_vmx.vmcs, PAGE_SIZE); - __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs)); + nvcpu->nv_n1vmcx_pa = v->arch.hvm_vmx.vmcs_pa; + __vmpclear(v->arch.hvm_vmx.vmcs_pa); + copy_domain_page(_mfn(PFN_DOWN(nvcpu->nv_n2vmcx_pa)), + _mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa))); + __vmptrld(v->arch.hvm_vmx.vmcs_pa); v->arch.hvm_vmx.launched = 0; vmreturn(regs, VMSUCCEED); @@ -1888,9 +1891,15 @@ int nvmx_msr_read_intercept(unsigned int */ switch (msr) { case MSR_IA32_VMX_BASIC: + { + const struct vmcs_struct *vmcs = + map_domain_page(_mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa))); + data = (host_data & (~0ul << 32)) | - (v->arch.hvm_vmx.vmcs->vmcs_revision_id & 0x7fffffff); + (vmcs->vmcs_revision_id & 0x7fffffff); + unmap_domain_page(vmcs); break; + } case MSR_IA32_VMX_PINBASED_CTLS: case MSR_IA32_VMX_TRUE_PINBASED_CTLS: /* 1-seetings */ --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -104,8 +104,8 @@ struct nestedvcpu { void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */ uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */ - uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */ - uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */ + paddr_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */ + paddr_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */ /* SVM/VMX arch specific */ union { --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -92,8 +92,8 @@ struct pi_desc { #define NR_PML_ENTRIES 512 struct arch_vmx_struct { - /* Virtual address of VMCS. */ - struct vmcs_struct *vmcs; + /* Physical address of VMCS. */ + paddr_t vmcs_pa; /* VMCS shadow machine address. */ paddr_t vmcs_shadow_maddr; @@ -488,7 +488,7 @@ void vmx_enable_intercept_for_msr(struct int vmx_read_guest_msr(u32 msr, u64 *val); int vmx_write_guest_msr(u32 msr, u64 val); int vmx_add_msr(u32 msr, int type); -void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to); +void vmx_vmcs_switch(paddr_t from, paddr_t to); void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector); void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector); int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type);