[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 15/17] vmx: nest: virtual ept for nested
This patch adds virtual ept capability to L1. It's implemented as a simple per vCPU vTLB like component independent to domain wide p2m. Signed-off-by: Qing He <qing.he@xxxxxxxxx> --- b/xen/arch/x86/hvm/vmx/vept.c | 574 +++++++++++++++++++++++++++++++++++ b/xen/include/asm-x86/hvm/vmx/vept.h | 10 xen/arch/x86/hvm/vmx/Makefile | 1 xen/arch/x86/hvm/vmx/nest.c | 136 +++++++- xen/arch/x86/hvm/vmx/vmx.c | 13 xen/include/asm-x86/hvm/vmx/nest.h | 7 6 files changed, 734 insertions(+), 7 deletions(-) diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/Makefile --- a/xen/arch/x86/hvm/vmx/Makefile Thu Apr 22 22:30:09 2010 +0800 +++ b/xen/arch/x86/hvm/vmx/Makefile Thu Apr 22 22:30:10 2010 +0800 @@ -6,3 +6,4 @@ obj-y += vpmu.o obj-y += vpmu_core2.o obj-y += nest.o +obj-y += vept.o diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/nest.c --- a/xen/arch/x86/hvm/vmx/nest.c Thu Apr 22 22:30:09 2010 +0800 +++ b/xen/arch/x86/hvm/vmx/nest.c Thu Apr 22 22:30:10 2010 +0800 @@ -26,6 +26,7 @@ #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vvmcs.h> #include <asm/hvm/vmx/nest.h> +#include <asm/hvm/vmx/vept.h> /* * VMX instructions support functions @@ -295,6 +296,9 @@ __vmptrld(virt_to_maddr(nest->hvmcs)); v->arch.hvm_vmx.launched = 0; + nest->geptp = 0; + nest->vept = vept_init(v); + vmreturn(regs, VMSUCCEED); out: @@ -313,6 +317,9 @@ if ( unlikely(!nest->guest_vmxon_pa) ) goto invalid_op; + vept_teardown(nest->vept); + nest->vept = 0; + nest->guest_vmxon_pa = 0; __vmpclear(virt_to_maddr(nest->svmcs)); @@ -529,6 +536,67 @@ return vmx_nest_handle_vmresume(regs); } +int vmx_nest_handle_invept(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + struct vmx_inst_decoded decode; + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + mfn_t mfn; + u64 eptp; + int type; + + if ( unlikely(!nest->guest_vmxon_pa) ) + goto invalid_op; + + decode_vmx_inst(regs, &decode); + + hvm_copy_from_guest_virt(&eptp, decode.mem, sizeof(eptp), 0); + type = reg_read(regs, decode.reg2); + + /* TODO: physical invept on other cpus */ + switch ( type ) + { + case 1: + mfn = vept_invalidate(nest->vept, eptp); + if ( eptp == nest->geptp ) + nest->geptp = 0; + + if ( __mfn_valid(mfn_x(mfn)) ) + __invept(1, mfn_x(mfn) << PAGE_SHIFT | (eptp & 0xfff), 0); + break; + case 2: + vept_invalidate_all(nest->vept); + nest->geptp = 0; + break; + default: + gdprintk(XENLOG_ERR, "nest: unsupported invept type %d\n", type); + break; + } + + vmreturn(regs, VMSUCCEED); + + return X86EMUL_OKAY; + +invalid_op: + hvm_inject_exception(TRAP_invalid_op, 0, 0); + return X86EMUL_EXCEPTION; +} + +int vmx_nest_vept(struct vcpu *v) +{ + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + int r = 0; + + if ( paging_mode_hap(v->domain) && + (__get_vvmcs(nest->vvmcs, CPU_BASED_VM_EXEC_CONTROL) & + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && + (__get_vvmcs(nest->vvmcs, SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_ENABLE_EPT) ) + r = 1; + + return r; +} + /* * Nested VMX context switch */ @@ -739,7 +807,14 @@ vvmcs_to_shadow(nest->vvmcs, CR0_GUEST_HOST_MASK); vvmcs_to_shadow(nest->vvmcs, CR4_GUEST_HOST_MASK); - /* TODO: PDPTRs for nested ept */ + if ( vmx_nest_vept(v) ) + { + vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR0); + vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR1); + vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR2); + vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR3); + } + /* TODO: CR3 target control */ } @@ -787,14 +862,32 @@ } #endif + + /* loading EPT_POINTER for L2 */ + if ( vmx_nest_vept(v) ) + { + u64 geptp; + mfn_t mfn; + + geptp = __get_vvmcs(nest->vvmcs, EPT_POINTER); + if ( geptp != nest->geptp ) + { + mfn = vept_load_eptp(nest->vept, geptp); + nest->geptp = geptp; + + __vmwrite(EPT_POINTER, (mfn_x(mfn) << PAGE_SHIFT) | 0x1e); +#ifdef __i386__ + __vmwrite(EPT_POINTER_HIGH, (mfn_x(mfn) << PAGE_SHIFT) >> 32); +#endif + } + } + regs->rip = __get_vvmcs(nest->vvmcs, GUEST_RIP); regs->rsp = __get_vvmcs(nest->vvmcs, GUEST_RSP); regs->rflags = __get_vvmcs(nest->vvmcs, GUEST_RFLAGS); /* updating host cr0 to sync TS bit */ __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); - - /* TODO: EPT_POINTER */ } static void sync_vvmcs_guest_state(struct vmx_nest_struct *nest) @@ -1064,8 +1157,26 @@ break; } + case EXIT_REASON_EPT_VIOLATION: + { + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); + paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS); +#ifdef __i386__ + gpa |= (paddr_t)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32; +#endif + if ( vmx_nest_vept(v) ) + { + if ( !vept_ept_violation(nest->vept, nest->geptp, + exit_qualification, gpa) ) + bypass_l0 = 1; + else + nest->vmexit_pending = 1; + } + + break; + } + case EXIT_REASON_WBINVD: - case EXIT_REASON_EPT_VIOLATION: case EXIT_REASON_EPT_MISCONFIG: case EXIT_REASON_EXTERNAL_INTERRUPT: /* pass to L0 handler */ @@ -1229,11 +1340,14 @@ data = (data << 32) | eax; break; case MSR_IA32_VMX_PROCBASED_CTLS: + mask = paging_mode_hap(current->domain)? + 0: CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + rdmsr(regs->ecx, eax, edx); #define REMOVED_EXEC_CONTROL_CAP (CPU_BASED_TPR_SHADOW \ - | CPU_BASED_ACTIVATE_MSR_BITMAP \ - | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) + | CPU_BASED_ACTIVATE_MSR_BITMAP) data = edx & ~REMOVED_EXEC_CONTROL_CAP; + data = edx & ~mask; data = (data << 32) | eax; break; case MSR_IA32_VMX_EXIT_CTLS: @@ -1254,12 +1368,20 @@ data = (data << 32) | eax; break; case MSR_IA32_VMX_PROCBASED_CTLS2: - mask = 0; + mask = paging_mode_hap(current->domain)? + SECONDARY_EXEC_ENABLE_EPT : 0; rdmsr(regs->ecx, eax, edx); data = edx & mask; data = (data << 32) | eax; break; + case MSR_IA32_VMX_EPT_VPID_CAP: + rdmsr(regs->ecx, eax, edx); +#define REMOVED_EPT_VPID_CAP_HIGH ( 1 | 1<<8 | 1<<9 | 1<<10 | 1<<11 ) +#define REMOVED_EPT_VPID_CAP_LOW ( 1<<16 | 1<<17 | 1<<26 ) + data = edx & ~REMOVED_EPT_VPID_CAP_HIGH; + data = (data << 32) | (eax & ~REMOVED_EPT_VPID_CAP_LOW); + break; /* pass through MSRs */ case IA32_FEATURE_CONTROL_MSR: diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/vept.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vept.c Thu Apr 22 22:30:10 2010 +0800 @@ -0,0 +1,574 @@ +/* + * vept.c: virtual EPT for nested virtualization + * + * Copyright (c) 2010, Intel Corporation. + * Author: Qing He <qing.he@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/list.h> +#include <xen/mm.h> +#include <xen/paging.h> +#include <xen/domain_page.h> +#include <xen/sched.h> +#include <asm/page.h> +#include <xen/numa.h> +#include <asm/hvm/vmx/vmx.h> +#include <asm/hvm/vmx/vept.h> + +#undef mfn_to_page +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) +#undef mfn_valid +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) +#undef page_to_mfn +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) + +/* + * This virtual EPT implementation is independent to p2m facility + * and has some different characteristics. It works in a similar + * way as shadow page table (guest table and host table composition), + * but is per-vcpu, and of vTLB style + * - per vCPU so no lock is required + * - vTLB style signifies honoring all invalidations, and not + * write protection. Unlike ordinary page table, since EPT updates + * and invalidations are minimal in a well written VMM, overhead + * is also minimized. + * + * The physical root is loaded directly to L2 sVMCS, without entering + * any other host controls. Multiple `cache slots' are maintained + * for multiple guest EPTPs, with simple LRU replacement. + * + * One of the limitations so far, is that it doesn't work with + * L0 emulation code, so L1 p2m_mmio_direct on top of L0 p2m_mmio_dm + * is not supported as for now. + */ + +#define VEPT_MAX_SLOTS 8 +#define VEPT_ALLOCATION_SIZE 512 + +struct vept_slot { + u64 eptp; /* guest eptp */ + mfn_t root; /* root of phys table */ + struct list_head list; + + struct page_list_head page_list; +}; + +struct vept { + struct list_head used_slots; /* lru: new->tail, old->head */ + struct list_head free_slots; + + int total_pages; + int free_pages; + struct page_list_head freelist; + + struct vcpu *vcpu; +}; + + +static struct vept_slot *__get_eptp_slot(struct vept *vept, u64 geptp) +{ + struct vept_slot *slot, *tmp; + + list_for_each_entry_safe( slot, tmp, &vept->used_slots, list ) + if ( slot->eptp == geptp ) + return slot; + + return NULL; +} + +static struct vept_slot *get_eptp_slot(struct vept *vept, u64 geptp) +{ + struct vept_slot *slot; + + slot = __get_eptp_slot(vept, geptp); + if ( slot != NULL ) + list_del(&slot->list); + + return slot; +} + +static void __clear_slot(struct vept *vept, struct vept_slot *slot) +{ + struct page_info *pg; + + slot->eptp = 0; + + while ( !page_list_empty(&slot->page_list) ) + { + pg = page_list_remove_head(&slot->page_list); + page_list_add_tail(pg, &vept->freelist); + + vept->free_pages++; + } +} + +static struct vept_slot *get_free_slot(struct vept *vept) +{ + struct vept_slot *slot = NULL; + + if ( !list_empty(&vept->free_slots) ) + { + slot = list_entry(vept->free_slots.next, struct vept_slot, list); + list_del(&slot->list); + } + else if ( !list_empty(&vept->used_slots) ) + { + slot = list_entry(vept->used_slots.next, struct vept_slot, list); + list_del(&slot->list); + __clear_slot(vept, slot); + } + + return slot; +} + +static void clear_all_slots(struct vept *vept) +{ + struct vept_slot *slot, *tmp; + + list_for_each_entry_safe( slot, tmp, &vept->used_slots, list ) + { + list_del(&slot->list); + __clear_slot(vept, slot); + list_add_tail(&slot->list, &vept->free_slots); + } +} + +static int free_some_pages(struct vept *vept, struct vept_slot *curr) +{ + struct vept_slot *slot; + int r = 0; + + if ( !list_empty(&vept->used_slots) ) + { + slot = list_entry(vept->used_slots.next, struct vept_slot, list); + if ( slot != curr ) + { + list_del(&slot->list); + __clear_slot(vept, slot); + list_add_tail(&slot->list, &vept->free_slots); + + r = 1; + } + } + + return r; +} + +struct vept *vept_init(struct vcpu *v) +{ + struct vept *vept; + struct vept_slot *slot; + struct page_info *pg; + int i; + + vept = xmalloc(struct vept); + if ( vept == NULL ) + goto out; + + memset(vept, 0, sizeof(*vept)); + vept->vcpu = v; + + INIT_PAGE_LIST_HEAD(&vept->freelist); + INIT_LIST_HEAD(&vept->used_slots); + INIT_LIST_HEAD(&vept->free_slots); + + for ( i = 0; i < VEPT_MAX_SLOTS; i++ ) + { + slot = xmalloc(struct vept_slot); + if ( slot == NULL ) + break; + + memset(slot, 0, sizeof(*slot)); + + INIT_LIST_HEAD(&slot->list); + INIT_PAGE_LIST_HEAD(&slot->page_list); + + list_add(&slot->list, &vept->free_slots); + } + + for ( i = 0; i < VEPT_ALLOCATION_SIZE; i++ ) + { + pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(v->domain))); + if ( pg == NULL ) + break; + + page_list_add_tail(pg, &vept->freelist); + vept->total_pages++; + vept->free_pages++; + } + + out: + return vept; +} + +void vept_teardown(struct vept *vept) +{ + struct page_info *pg; + struct vept_slot *slot, *tmp; + + clear_all_slots(vept); + + while ( !page_list_empty(&vept->freelist) ) + { + pg = page_list_remove_head(&vept->freelist); + free_domheap_page(pg); + vept->free_pages++; + vept->total_pages++; + } + + list_for_each_entry_safe( slot, tmp, &vept->free_slots, list ) + xfree(slot); + + xfree(vept); +} + +mfn_t vept_load_eptp(struct vept *vept, u64 geptp) +{ + struct page_info *pg; + struct vept_slot *slot; + mfn_t mfn = _mfn(INVALID_MFN); + void *addr; + + ASSERT(vept->vcpu == current); + + slot = get_eptp_slot(vept, geptp); + if ( slot == NULL ) + { + slot = get_free_slot(vept); + if ( unlikely(slot == NULL) ) + { + gdprintk(XENLOG_ERR, "nest: can't get free slot\n"); + return mfn; + } + + while ( !vept->free_pages ) + if ( !free_some_pages(vept, slot) ) + { + slot->eptp = 0; + list_add_tail(&slot->list, &vept->free_slots); + gdprintk(XENLOG_ERR, "nest: vept no free pages\n"); + + return mfn; + } + + vept->free_pages--; + pg = page_list_remove_head(&vept->freelist); + + mfn = page_to_mfn(pg); + addr = map_domain_page(mfn_x(mfn)); + clear_page(addr); + unmap_domain_page(addr); + page_list_add_tail(pg, &slot->page_list); + slot->eptp = geptp; + slot->root = mfn; + } + + mfn = slot->root; + list_add_tail(&slot->list, &vept->used_slots); + + return mfn; +} + +mfn_t vept_invalidate(struct vept *vept, u64 geptp) +{ + struct vept_slot *slot; + mfn_t mfn = _mfn(INVALID_MFN); + + ASSERT(vept->vcpu == current); + + slot = get_eptp_slot(vept, geptp); + if ( slot != NULL ) + { + mfn = slot->root; + __clear_slot(vept, slot); + list_add_tail(&slot->list, &vept->free_slots); + } + + return mfn; +} + +void vept_invalidate_all(struct vept *vept) +{ + ASSERT(vept->vcpu == current); + + clear_all_slots(vept); +} + +/* + * guest EPT walk and EPT violation + */ +struct ept_walk { + unsigned long gfn; + unsigned long gfn_remainder; + ept_entry_t l4e, l3e, l2e, l1e; + mfn_t l4mfn, l3mfn, l2mfn, l1mfn; + int sp; +}; +typedef struct ept_walk ept_walk_t; + +#define GEPT_NORMAL_PAGE 0 +#define GEPT_SUPER_PAGE 1 +#define GEPT_NOT_PRESENT 2 +static int guest_ept_next_level(struct vcpu *v, ept_entry_t **table, + unsigned long *gfn_remainder, int level, u32 *ar, + ept_entry_t *entry, mfn_t *next_mfn) +{ + int index; + ept_entry_t *ept_entry; + ept_entry_t *next; + p2m_type_t p2mt; + int rc = GEPT_NORMAL_PAGE; + mfn_t mfn; + + index = *gfn_remainder >> (level * EPT_TABLE_ORDER); + + ept_entry = (*table) + index; + *entry = *ept_entry; + *ar &= entry->epte & 0x7; + + *gfn_remainder &= (1UL << (level * EPT_TABLE_ORDER)) - 1; + + if ( !(ept_entry->epte & 0x7) ) + rc = GEPT_NOT_PRESENT; + else if ( ept_entry->sp_avail ) + rc = GEPT_SUPER_PAGE; + else + { + mfn = gfn_to_mfn(v->domain, ept_entry->mfn, &p2mt); + if ( !p2m_is_ram(p2mt) ) + return GEPT_NOT_PRESENT; + + if ( next_mfn ) + { + next = map_domain_page(mfn_x(mfn)); + unmap_domain_page(*table); + + *table = next; + *next_mfn = mfn; + } + } + + return rc; +} + +static u32 guest_walk_ept(struct vcpu *v, ept_walk_t *gw, + u64 geptp, u64 ggpa) +{ + ept_entry_t *table; + p2m_type_t p2mt; + int rc; + u32 ar = 0x7; + + unsigned long gfn = (unsigned long) (ggpa >> PAGE_SHIFT); + unsigned long gfn_remainder = gfn; + + memset(gw, 0, sizeof(*gw)); + gw->gfn = gfn; + gw->sp = 0; + + gw->l4mfn = gfn_to_mfn(v->domain, geptp >> PAGE_SHIFT, &p2mt); + if ( !p2m_is_ram(p2mt) ) + return 0; + + table = map_domain_page(mfn_x(gw->l4mfn)); + + rc = guest_ept_next_level(v, &table, &gfn_remainder, 3, &ar, + &gw->l4e, &gw->l3mfn); + + if ( rc ) + goto out; + + rc = guest_ept_next_level(v, &table, &gfn_remainder, 2, &ar, + &gw->l3e, &gw->l2mfn); + + if ( rc == GEPT_SUPER_PAGE ) + gw->sp = 2; + if ( rc ) + goto out; + + rc = guest_ept_next_level(v, &table, &gfn_remainder, 1, &ar, + &gw->l2e, &gw->l1mfn); + + if ( rc == GEPT_SUPER_PAGE ) + gw->sp = 1; + if ( rc ) + goto out; + + rc = guest_ept_next_level(v, &table, &gfn_remainder, 0, &ar, + &gw->l1e, NULL); + + out: + gw->gfn_remainder = gfn_remainder; + unmap_domain_page(*table); + return ar; +} + +static void epte_set_ar_bits(ept_entry_t *entry, unsigned long ar) +{ + entry->epte &= ~0x7f; + entry->epte |= ar & 0x7f; +} + +static int shadow_ept_next_level(struct vept *vept, struct vept_slot *slot, + ept_entry_t **table, unsigned long *gfn_remainder, + int level, u32 *ar, ept_entry_t gentry) +{ + int index; + ept_entry_t *sentry; + ept_entry_t *next; + mfn_t mfn; + struct page_info *pg; + + index = *gfn_remainder >> (level * EPT_TABLE_ORDER); + + sentry = (*table) + index; + *ar = sentry->epte & 0x7; + + *gfn_remainder &= (1UL << (level * EPT_TABLE_ORDER)) - 1; + + if ( !(sentry->epte & 0x7) ) + { + while ( !vept->free_pages ) + if ( !free_some_pages(vept, slot) ) + { + gdprintk(XENLOG_ERR, "nest: vept no free pages\n"); + return 0; + } + + vept->free_pages--; + pg = page_list_remove_head(&vept->freelist); + page_list_add_tail(pg, &slot->page_list); + mfn = page_to_mfn(pg); + next = map_domain_page(mfn_x(mfn)); + clear_page(next); + + sentry->mfn = mfn_x(mfn); + } + else + { + next = map_domain_page(sentry->mfn); + } + + epte_set_ar_bits(sentry, gentry.epte); + + unmap_domain_page(*table); + *table = next; + + return 1; +} + +int vept_ept_violation(struct vept *vept, u64 geptp, + unsigned long qualification, paddr_t addr) +{ + ept_walk_t gw; + struct vept_slot *slot; + ept_entry_t *table, *gept; + ept_entry_t *sentry, *gentry; + u32 old_entry, sp_ar = 0; + p2m_type_t p2mt; + unsigned long mfn_start = 0; + unsigned long gfn_remainder; + int rc, i; + + ASSERT(vept->vcpu == current); + + slot = __get_eptp_slot(vept, geptp); + if ( unlikely(slot == NULL) ) + return 0; + + rc = guest_walk_ept(vept->vcpu, &gw, geptp, addr); + + if ( !(rc & (qualification & 0x7)) ) /* inject to guest */ + return 1; + + if ( gw.sp == 2 ) /* 1G */ + { + sp_ar = gw.l3e.epte & 0x7; + mfn_start = gw.l3e.mfn + + (gw.gfn_remainder & (~(1 << EPT_TABLE_ORDER) - 1)); + } + if ( gw.sp == 1 ) /* 2M */ + { + sp_ar = gw.l2e.epte & 0x7; + mfn_start = gw.l2e.mfn; + } + else + mfn_start = 0; + + table = map_domain_page(mfn_x(slot->root)); + gfn_remainder = gw.gfn; + + shadow_ept_next_level(vept, slot, &table, &gfn_remainder, 3, + &old_entry, gw.l4e); + + shadow_ept_next_level(vept, slot, &table, &gfn_remainder, 2, + &old_entry, gw.l3e); + + shadow_ept_next_level(vept, slot, &table, &gfn_remainder, 1, + &old_entry, (gw.sp == 2) ? gw.l3e : gw.l2e); + + /* if l1p is just allocated, do a full prefetch */ + if ( !old_entry && !gw.sp ) + { + gept = map_domain_page(mfn_x(gw.l1mfn)); + for ( i = 0; i < 512; i++ ) + { + gentry = gept + i; + sentry = table + i; + if ( gentry->epte & 0x7 ) + { + sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain, + gentry->mfn, &p2mt)); + epte_set_ar_bits(sentry, gentry->epte); + } + else + sentry->epte = 0; + } + unmap_domain_page(gept); + } + else if ( !old_entry && gw.sp ) + { + for ( i = 0; i < 512; i++ ) + { + sentry = table + i; + sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain, + mfn_start + i, &p2mt)); + epte_set_ar_bits(sentry, sp_ar); + } + } + else if ( old_entry && !gw.sp ) + { + i = gw.gfn & ((1 << EPT_TABLE_ORDER) - 1); + sentry = table + i; + sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain, + gw.l1e.mfn, &p2mt)); + epte_set_ar_bits(sentry, gw.l1e.epte); + } + else // old_entry && gw.sp + { + i = gw.gfn & ((1 << EPT_TABLE_ORDER) - 1); + sentry = table + i; + sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain, + mfn_start + i, &p2mt)); + epte_set_ar_bits(sentry, sp_ar); + } + + unmap_domain_page(table); + return 0; +} diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 22 22:30:09 2010 +0800 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 22 22:30:10 2010 +0800 @@ -1032,6 +1032,14 @@ p2m_type_t p2mt; char *p; + /* + * If in nesting EPT operation, L0 doesn't have the knowledge on + * how to interpret CR3, it's L1's responsibility to provide + * GUEST_PDPTRn, we rely solely on them. + */ + if ( v->arch.hvm_vcpu.in_nesting && vmx_nest_vept(v) ) + return; + /* EPT needs to load PDPTRS into VMCS for PAE. */ if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) ) return; @@ -2705,6 +2713,11 @@ if ( vmx_nest_handle_vmxon(regs) == X86EMUL_OKAY ) __update_guest_eip(inst_len); break; + case EXIT_REASON_INVEPT: + inst_len = __get_instruction_length(); + if ( vmx_nest_handle_invept(regs) == X86EMUL_OKAY ) + __update_guest_eip(inst_len); + break; case EXIT_REASON_MWAIT_INSTRUCTION: case EXIT_REASON_MONITOR_INSTRUCTION: diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/include/asm-x86/hvm/vmx/nest.h --- a/xen/include/asm-x86/hvm/vmx/nest.h Thu Apr 22 22:30:09 2010 +0800 +++ b/xen/include/asm-x86/hvm/vmx/nest.h Thu Apr 22 22:30:10 2010 +0800 @@ -47,6 +47,9 @@ unsigned long intr_info; unsigned long error_code; + + u64 geptp; + struct vept *vept; }; asmlinkage void vmx_nest_switch_mode(void); @@ -64,6 +67,8 @@ int vmx_nest_handle_vmresume(struct cpu_user_regs *regs); int vmx_nest_handle_vmlaunch(struct cpu_user_regs *regs); +int vmx_nest_handle_invept(struct cpu_user_regs *regs); + void vmx_nest_update_exec_control(struct vcpu *v, unsigned long value); void vmx_nest_update_secondary_exec_control(struct vcpu *v, unsigned long value); @@ -81,4 +86,6 @@ int vmx_nest_msr_write_intercept(struct cpu_user_regs *regs, u64 msr_content); +int vmx_nest_vept(struct vcpu *v); + #endif /* __ASM_X86_HVM_NEST_H__ */ diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/include/asm-x86/hvm/vmx/vept.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/hvm/vmx/vept.h Thu Apr 22 22:30:10 2010 +0800 @@ -0,0 +1,10 @@ +#include <asm/hvm/vmx/vmx.h> + + +struct vept *vept_init(struct vcpu *v); +void vept_teardown(struct vept *vept); +mfn_t vept_load_eptp(struct vept *vept, u64 eptp); +mfn_t vept_invalidate(struct vept *vept, u64 eptp); +void vept_invalidate_all(struct vept *vept); +int vept_ept_violation(struct vept *vept, u64 eptp, + unsigned long qualification, paddr_t addr); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |