[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 10/16]: PVH xen: introduce vmx_pvh.c
The heart of this patch is vmx exit handler for PVH guest. It is nicely isolated in a separate module. A call to it is added to vmx_pvh_vmexit_handler(). Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> diff -r eca698a4e733 -r 0a38c610f26b xen/arch/x86/hvm/vmx/Makefile --- a/xen/arch/x86/hvm/vmx/Makefile Fri Jan 11 16:32:36 2013 -0800 +++ b/xen/arch/x86/hvm/vmx/Makefile Fri Jan 11 16:34:17 2013 -0800 @@ -5,3 +5,4 @@ obj-y += vmcs.o obj-y += vmx.o obj-y += vpmu_core2.o obj-y += vvmx.o +obj-y += vmx_pvh.o diff -r eca698a4e733 -r 0a38c610f26b xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jan 11 16:32:36 2013 -0800 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Jan 11 16:34:17 2013 -0800 @@ -1546,7 +1546,9 @@ static struct hvm_function_table __read_ .nhvm_intr_blocked = nvmx_intr_blocked, .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources, .update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap, - .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled + .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled, + .pvh_set_vcpu_info = vmx_pvh_set_vcpu_info, + .pvh_read_descriptor = vmx_pvh_read_descriptor }; struct hvm_function_table * __init start_vmx(void) @@ -2280,6 +2282,14 @@ void vmx_vmexit_handler(struct cpu_user_ perfc_incra(vmexits, exit_reason); + if ( is_pvh_vcpu(v) ) { + if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) + return vmx_failed_vmentry(exit_reason, regs); + + vmx_pvh_vmexit_handler(regs); + return; + } + /* Handle the interrupt we missed before allowing any more in. */ switch ( (uint16_t)exit_reason ) { diff -r eca698a4e733 -r 0a38c610f26b xen/arch/x86/hvm/vmx/vmx_pvh.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx_pvh.c Fri Jan 11 16:34:17 2013 -0800 @@ -0,0 +1,849 @@ +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/trace.h> +#include <xen/sched.h> +#include <xen/irq.h> +#include <xen/softirq.h> +#include <xen/domain_page.h> +#include <xen/hypercall.h> +#include <xen/guest_access.h> +#include <xen/perfc.h> +#include <asm/current.h> +#include <asm/io.h> +#include <asm/regs.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/types.h> +#include <asm/debugreg.h> +#include <asm/msr.h> +#include <asm/spinlock.h> +#include <asm/paging.h> +#include <asm/p2m.h> +#include <asm/traps.h> +#include <asm/mem_sharing.h> +#include <asm/hvm/emulate.h> +#include <asm/hvm/hvm.h> +#include <asm/hvm/support.h> +#include <asm/hvm/vmx/vmx.h> +#include <asm/hvm/vmx/vmcs.h> +#include <public/sched.h> +#include <public/hvm/ioreq.h> +#include <asm/hvm/vpic.h> +#include <asm/hvm/vlapic.h> +#include <asm/x86_emulate.h> +#include <asm/hvm/vpt.h> +#include <public/hvm/save.h> +#include <asm/hvm/trace.h> +#include <asm/xenoprof.h> +#include <asm/debugger.h> + +volatile int pvhdbg=0; +#define dbgp0(...) dprintk(XENLOG_ERR, __VA_ARGS__); +#define dbgp1(...) {(pvhdbg==1) ? printk(__VA_ARGS__):0;} +#define dbgp2(...) {(pvhdbg==2) ? printk(__VA_ARGS__):0;} + + +static void read_vmcs_selectors(struct cpu_user_regs *regs) +{ + regs->cs = vmr(GUEST_CS_SELECTOR); + regs->ss = vmr(GUEST_SS_SELECTOR); + regs->ds = vmr(GUEST_DS_SELECTOR); + regs->es = vmr(GUEST_ES_SELECTOR); + regs->gs = vmr(GUEST_GS_SELECTOR); + regs->fs = vmr(GUEST_FS_SELECTOR); +} + +/* returns : 0 success */ +static noinline int vmxit_msr_read(struct cpu_user_regs *regs) +{ + int rc=1; + + u64 msr_content = 0; + switch (regs->ecx) + { + case MSR_IA32_MISC_ENABLE: + { + rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); + msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | + MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; + break; + } + default: + { + /* fixme: see hvm_msr_read_intercept() */ + rdmsrl(regs->ecx, msr_content); + break; + } + } + regs->eax = (uint32_t)msr_content; + regs->edx = (uint32_t)(msr_content >> 32); + update_guest_eip(); + rc = 0; + + dbgp1("msr read c:%lx a:%lx d:%lx RIP:%lx RSP:%lx\n", regs->ecx, regs->eax, + regs->edx, vmr(GUEST_RIP), vmr(GUEST_RSP)); + return rc; +} + +/* returns : 0 success */ +static noinline int vmxit_msr_write(struct cpu_user_regs *regs) +{ + uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32); + int rc=1; + + dbgp1("PVH: msr write:0x%lx. eax:0x%lx edx:0x%lx\n", regs->ecx, + regs->eax,regs->edx); + + if ( hvm_msr_write_intercept(regs->ecx, msr_content) == X86EMUL_OKAY ) { + update_guest_eip(); + rc = 0; + } + return rc; +} + +/* Returns: rc == 0: handled the MTF vmexit */ +static noinline int vmxit_mtf(struct cpu_user_regs *regs) +{ + struct vcpu *vp = current; + int rc=1, ss=vp->arch.hvm_vcpu.single_step; + + dbgp2("\n"); + vp->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vp->arch.hvm_vmx.exec_control); + vp->arch.hvm_vcpu.single_step = 0; + + if ( vp->domain->debugger_attached && ss ) { + domain_pause_for_debugger(); + rc = 0; + } + return rc; +} + +static noinline int vmxit_int3(struct cpu_user_regs *regs) +{ + int ilen = get_instruction_length(); + struct vcpu *vp = current; + struct hvm_trap trap_info = { + .vector = TRAP_int3, + .type = X86_EVENTTYPE_SW_EXCEPTION, + .error_code = HVM_DELIVER_NO_ERROR_CODE, + .insn_len = ilen + }; + + regs->eip += ilen; + + /* gdbsx or another debugger. Never pause dom0 */ + if ( vp->domain->domain_id != 0 && guest_kernel_mode(vp, regs) ) + { + dbgp1("[%d]PVH: domain pause for debugger\n", smp_processor_id()); + current->arch.gdbsx_vcpu_event = TRAP_int3; + domain_pause_for_debugger(); + return 0; + } + + regs->eip -= ilen; + hvm_inject_trap(&trap_info); + + return 0; +} + +/* Returns: rc == 0: handled the exception/NMI */ +static noinline int vmxit_exception(struct cpu_user_regs *regs) +{ + unsigned int vector = (__vmread(VM_EXIT_INTR_INFO)) & INTR_INFO_VECTOR_MASK; + int rc=1; + struct vcpu *vp = current; + + dbgp2(" EXCPT: vec:%d cs:%lx r.IP:%lx\n", vector, vmr(GUEST_CS_SELECTOR), + regs->eip); + + if (vector == TRAP_debug) { + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); + write_debugreg(6, exit_qualification | 0xffff0ff0); + regs->rip = vmr(GUEST_RIP); + regs->rsp = vmr(GUEST_RSP); + rc = 0; + + /* gdbsx or another debugger */ + if ( vp->domain->domain_id != 0 && /* never pause dom0 */ + guest_kernel_mode(vp, regs) && vp->domain->debugger_attached ) + { + domain_pause_for_debugger(); + } else { + hvm_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE); + } + } + if (vector == TRAP_int3) { + rc = vmxit_int3(regs); + } + + if (vector == TRAP_invalid_op) { + if ( guest_kernel_mode(vp, regs) || emulate_forced_invalid_op(regs)==0 ) + { + hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); + rc = 0; + } + + } + if (vector == TRAP_no_device) { + hvm_funcs.fpu_dirty_intercept(); /* calls vmx_fpu_dirty_intercept */ + rc = 0; + } + + if (vector == TRAP_gp_fault) { + regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE); + /* hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code); */ + rc = 1; + } + + if (vector == TRAP_page_fault) { + printk("PVH: Unexpected vector page_fault. IP:%lx\n", regs->eip); + rc = 1; + } + + if (rc) + printk("PVH: Unhandled trap vector:%d. IP:%lx\n", vector, regs->eip); + return rc; +} + +static noinline int vmxit_invlpg(void) +{ + ulong vaddr = __vmread(EXIT_QUALIFICATION); + + update_guest_eip(); + vpid_sync_vcpu_gva(current, vaddr); + return 0; +} + +static noinline int pvh_grant_table_op( + unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count) +{ + switch (cmd) + { + case GNTTABOP_map_grant_ref: + case GNTTABOP_unmap_grant_ref: + case GNTTABOP_setup_table: + case GNTTABOP_copy: + case GNTTABOP_query_size: + case GNTTABOP_set_version: + return do_grant_table_op(cmd, uop, count); + } + return -ENOSYS; +} + +static long pvh_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) +{ + long rc = -ENOSYS; + + switch ( cmd ) + { + case VCPUOP_register_runstate_memory_area: + case VCPUOP_get_runstate_info: + case VCPUOP_set_periodic_timer: + case VCPUOP_stop_periodic_timer: + case VCPUOP_set_singleshot_timer: + case VCPUOP_stop_singleshot_timer: + case VCPUOP_is_up: + case VCPUOP_up: + case VCPUOP_initialise: + rc = do_vcpu_op(cmd, vcpuid, arg); + + /* pvh boot vcpu setting context for bringing up smp vcpu */ + if (cmd == VCPUOP_initialise) + vmx_vmcs_enter(current); + } + return rc; +} + +static long pvh_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) +{ + switch ( cmd ) + { + case PHYSDEVOP_map_pirq: + case PHYSDEVOP_unmap_pirq: + case PHYSDEVOP_eoi: + case PHYSDEVOP_irq_status_query: + case PHYSDEVOP_get_free_pirq: + return do_physdev_op(cmd, arg); + + default: + if ( IS_PRIV(current->domain) ) + return do_physdev_op(cmd, arg); + } + return -ENOSYS; +} + +static noinline long do_pvh_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) +{ + long rc = -EINVAL; + struct xen_hvm_param harg; + struct domain *d; + + if ( copy_from_guest(&harg, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_target_domain_by_id(harg.domid, &d); + if ( rc != 0 ) + return rc; + + if (is_hvm_domain(d)) { + /* pvh dom0 is building an hvm guest */ + rcu_unlock_domain(d); + return do_hvm_op(op, arg); + } + + rc = -ENOSYS; + if (op == HVMOP_set_param) { + if (harg.index == HVM_PARAM_CALLBACK_IRQ) { + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + uint64_t via = harg.value; + uint8_t via_type = (uint8_t)(via >> 56) + 1; + + if (via_type == HVMIRQ_callback_vector) { + hvm_irq->callback_via_type = HVMIRQ_callback_vector; + hvm_irq->callback_via.vector = (uint8_t)via; + rc = 0; + } + } + } + rcu_unlock_domain(d); + return rc; +} + +typedef unsigned long pvh_hypercall_t( + unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, + unsigned long); + +int hcall_a[NR_hypercalls]; + +static pvh_hypercall_t *pvh_hypercall64_table[NR_hypercalls] = { + [__HYPERVISOR_platform_op] = (pvh_hypercall_t *)do_platform_op, + [__HYPERVISOR_memory_op] = (pvh_hypercall_t *)do_memory_op, + /* [__HYPERVISOR_set_timer_op] = (pvh_hypercall_t *)do_set_timer_op, */ + [__HYPERVISOR_xen_version] = (pvh_hypercall_t *)do_xen_version, + [__HYPERVISOR_console_io] = (pvh_hypercall_t *)do_console_io, + [__HYPERVISOR_grant_table_op] = (pvh_hypercall_t *)pvh_grant_table_op, + [__HYPERVISOR_vcpu_op] = (pvh_hypercall_t *)pvh_vcpu_op, + [__HYPERVISOR_mmuext_op] = (pvh_hypercall_t *)do_mmuext_op, + [__HYPERVISOR_xsm_op] = (pvh_hypercall_t *)do_xsm_op, + [__HYPERVISOR_sched_op] = (pvh_hypercall_t *)do_sched_op, + [__HYPERVISOR_event_channel_op]= (pvh_hypercall_t *)do_event_channel_op, + [__HYPERVISOR_physdev_op] = (pvh_hypercall_t *)pvh_physdev_op, + [__HYPERVISOR_hvm_op] = (pvh_hypercall_t *)do_pvh_hvm_op, + [__HYPERVISOR_sysctl] = (pvh_hypercall_t *)do_sysctl, + [__HYPERVISOR_domctl] = (pvh_hypercall_t *)do_domctl +}; + +/* fixme: Do we need to worry about this and slow things down in this path? */ +static int pvh_long_mode_enabled(void) +{ + /* A 64bit linux guest should always run in this mode with CS.L selecting + * either 64bit mode or 32bit compat mode */ + return 1; +} + +/* Check if hypercall is valid + * Returns: 0 if hcall is not valid with eax set to the errno to ret to guest + */ +static noinline int hcall_valid(struct cpu_user_regs *regs) +{ + struct segment_register sreg; + + if (!pvh_long_mode_enabled()) { + printk("PVH Error: Expected long mode set\n"); + return 1; + } + hvm_get_segment_register(current, x86_seg_ss, &sreg); + if ( unlikely(sreg.attr.fields.dpl == 3) ) { + regs->eax = -EPERM; + return 0; + } + + /* domU's are not allowed following hcalls */ + if ( !IS_PRIV(current->domain) && + (regs->eax == __HYPERVISOR_xsm_op || + regs->eax == __HYPERVISOR_platform_op || + regs->eax == __HYPERVISOR_domctl) ) { /* for privcmd mmap */ + + regs->eax = -EPERM; + return 0; + } + return 1; +} + +static noinline int vmxit_vmcall(struct cpu_user_regs *regs) +{ + uint32_t hnum = regs->eax; + + if (hnum >= NR_hypercalls || pvh_hypercall64_table[hnum] ==NULL) + { + dbgp0("PVH[%d]: UnImplemented HCALL:%ld. Ret -ENOSYS IP:%lx SP:%lx\n", + smp_processor_id(), regs->eax, vmr(GUEST_RIP), vmr(GUEST_RSP)); + regs->eax = -ENOSYS; + update_guest_eip(); + return HVM_HCALL_completed; + } + + dbgp2("vmxit_vmcall: hcall eax:$%ld\n", regs->eax); + if (regs->eax == __HYPERVISOR_sched_op && regs->rdi == SCHEDOP_shutdown) { + /* PVH fixme: go thru this again to make sure nothing is left out */ + printk("PVH: FIXME: SCHEDOP_shutdown hcall\n"); + regs->eax = -ENOSYS; + update_guest_eip(); + domain_crash_synchronous(); + return HVM_HCALL_completed; + } + + if ( !hcall_valid(regs) ) + return HVM_HCALL_completed; + + /* PVH fixme: search for this and do it. PV method will not work */ + current->arch.hvm_vcpu.hcall_preempted = 0; + + regs->rax = pvh_hypercall64_table[hnum](regs->rdi, regs->rsi, regs->rdx, + regs->r10, regs->r8, regs->r9); + + if ( !current->arch.hvm_vcpu.hcall_preempted ) + update_guest_eip(); + else + printk("PVH: Hcall :%d preempted\n", hnum); + + return HVM_HCALL_completed; +} + +static noinline uint64_t *get_gpr_ptr(struct cpu_user_regs *regs, uint gpr) +{ + switch (gpr) + { + case VMX_CONTROL_REG_ACCESS_GPR_EAX: + return ®s->eax; + case VMX_CONTROL_REG_ACCESS_GPR_ECX: + return ®s->ecx; + case VMX_CONTROL_REG_ACCESS_GPR_EDX: + return ®s->edx; + case VMX_CONTROL_REG_ACCESS_GPR_EBX: + return ®s->ebx; + case VMX_CONTROL_REG_ACCESS_GPR_ESP: + return ®s->esp; + case VMX_CONTROL_REG_ACCESS_GPR_EBP: + return ®s->ebp; + case VMX_CONTROL_REG_ACCESS_GPR_ESI: + return ®s->esi; + case VMX_CONTROL_REG_ACCESS_GPR_EDI: + return ®s->edi; + case VMX_CONTROL_REG_ACCESS_GPR_R8: + return ®s->r8; + case VMX_CONTROL_REG_ACCESS_GPR_R9: + return ®s->r9; + case VMX_CONTROL_REG_ACCESS_GPR_R10: + return ®s->r10; + case VMX_CONTROL_REG_ACCESS_GPR_R11: + return ®s->r11; + case VMX_CONTROL_REG_ACCESS_GPR_R12: + return ®s->r12; + case VMX_CONTROL_REG_ACCESS_GPR_R13: + return ®s->r13; + case VMX_CONTROL_REG_ACCESS_GPR_R14: + return ®s->r14; + case VMX_CONTROL_REG_ACCESS_GPR_R15: + return ®s->r15; + default: + return NULL; + } +} +/* Returns: rc == 0: success */ +static noinline int access_cr0(struct cpu_user_regs *regs, uint acc_typ, + uint64_t *regp) +{ + struct vcpu *vp = current; + + if (acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR ) + { + unsigned long new_cr0 = *regp; + unsigned long old_cr0 = __vmread(GUEST_CR0); + + dbgp2("PVH:writing to CR0. RIP:%lx val:0x%lx\n", vmr(GUEST_RIP),*regp); + if ( (u32)new_cr0 != new_cr0 ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest setting upper 32 bits in CR0: %lx", new_cr0); + return 1; + } + + new_cr0 &= ~HVM_CR0_GUEST_RESERVED_BITS; + /* ET is reserved and should be always be 1. */ + new_cr0 |= X86_CR0_ET; + + /* pvh cannot change to real mode */ + if ( (new_cr0 & (X86_CR0_PE|X86_CR0_PG)) != (X86_CR0_PG|X86_CR0_PE) ) { + printk("PVH domU attempting to turn off PE/PG. CR0:%lx\n", new_cr0); + return 1; + } + /* TS going from 1 to 0 */ + if ( (old_cr0 & X86_CR0_TS) && ((new_cr0 & X86_CR0_TS)==0) ) + vmx_fpu_enter(vp); + + vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = new_cr0; + __vmwrite(GUEST_CR0, new_cr0); + __vmwrite(CR0_READ_SHADOW, new_cr0); + } else { + *regp = __vmread(GUEST_CR0); + } + return 0; +} + +/* Returns: rc == 0: success */ +static noinline int access_cr4(struct cpu_user_regs *regs, uint acc_typ, + uint64_t *regp) +{ + if (acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR ) + { + u64 old_cr4 = __vmread(GUEST_CR4); + + if ( (old_cr4 ^ (*regp)) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) + vpid_sync_all(); + + /* pvh_verify_cr4_wr(*regp)); */ + __vmwrite(GUEST_CR4, *regp); + } else { + *regp = __vmread(GUEST_CR4); + } + return 0; +} + +/* Returns: rc == 0: success */ +static noinline int vmxit_cr_access(struct cpu_user_regs *regs) +{ + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); + uint acc_typ = VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification); + int cr, rc = 1; + + switch ( acc_typ ) + { + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR: + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR: + { + uint gpr = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification); + uint64_t *regp = get_gpr_ptr(regs, gpr); + cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification); + + if (regp == NULL) + break; + + /* pl don't embed switch statements */ + if (cr == 0) + rc = access_cr0(regs, acc_typ, regp); + else if (cr == 3) { + printk("PVH: d%d: unexpected cr3 access vmexit. rip:%lx\n", + current->domain->domain_id, vmr(GUEST_RIP)); + domain_crash_synchronous(); + } else if (cr == 4) + rc = access_cr4(regs, acc_typ, regp); + + if (rc == 0) + update_guest_eip(); + break; + } + case VMX_CONTROL_REG_ACCESS_TYPE_CLTS: + { + struct vcpu *vp = current; + unsigned long cr0 = vp->arch.hvm_vcpu.guest_cr[0] & ~X86_CR0_TS; + vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = cr0; + vmx_fpu_enter(vp); + __vmwrite(GUEST_CR0, cr0); + __vmwrite(CR0_READ_SHADOW, cr0); + update_guest_eip(); + rc = 0; + } + } + return rc; +} + +/* NOTE: a PVH sets IOPL natively by setting bits in the eflags and not by + * hypercalls used by a PV */ +static int noinline vmxit_io_instr(struct cpu_user_regs *regs) +{ + int curr_lvl; + int requested = (regs->rflags >> 12) & 3; + + read_vmcs_selectors(regs); + curr_lvl = regs->cs & 3; + + if (requested >= curr_lvl && emulate_privileged_op(regs)) + return 0; + + hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code); + return 0; +} + +static noinline int pvh_ept_handle_violation(unsigned long qualification, paddr_t gpa) +{ + unsigned long gla, gfn = gpa >> PAGE_SHIFT; + p2m_type_t p2mt; + mfn_t mfn = get_gfn_query_unlocked(current->domain, gfn, &p2mt); + + gdprintk(XENLOG_ERR, "Dom:%d EPT violation %#lx (%c%c%c/%c%c%c), " + "gpa %#"PRIpaddr", mfn %#lx, type %i.\n", + current->domain->domain_id, qualification, + (qualification & EPT_READ_VIOLATION) ? 'r' : '-', + (qualification & EPT_WRITE_VIOLATION) ? 'w' : '-', + (qualification & EPT_EXEC_VIOLATION) ? 'x' : '-', + (qualification & EPT_EFFECTIVE_READ) ? 'r' : '-', + (qualification & EPT_EFFECTIVE_WRITE) ? 'w' : '-', + (qualification & EPT_EFFECTIVE_EXEC) ? 'x' : '-', + gpa, mfn_x(mfn), p2mt); + + ept_walk_table(current->domain, gfn); + + if ( qualification & EPT_GLA_VALID ) + { + gla = __vmread(GUEST_LINEAR_ADDRESS); + gdprintk(XENLOG_ERR, " --- GLA %#lx\n", gla); + } + + hvm_inject_hw_exception(TRAP_gp_fault, 0); + return 0; +} + +static noinline void pvh_cpuid(struct cpu_user_regs *regs) +{ + unsigned int eax, ebx, ecx, edx; + + asm volatile ( "cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "0" (regs->eax), "2" (regs->rcx) ); + + regs->rax = eax; regs->rbx = ebx; regs->rcx = ecx; regs->rdx = edx; +} + +void vmx_pvh_vmexit_handler(struct cpu_user_regs *regs) +{ + unsigned long exit_qualification; + unsigned int vector, exit_reason = __vmread(VM_EXIT_REASON); + int rc=0, ccpu = smp_processor_id(); + struct vcpu *vp = current; + + dbgp1("PVH:[%d]left VMCS exitreas:%d RIP:%lx RSP:%lx EFLAGS:%lx CR0:%lx\n", + ccpu, exit_reason, vmr(GUEST_RIP), vmr(GUEST_RSP), regs->rflags, + vmr(GUEST_CR0)); + + /* for guest_kernel_mode() */ + regs->cs = vmr(GUEST_CS_SELECTOR); + + switch ( (uint16_t)exit_reason ) + { + case EXIT_REASON_EXCEPTION_NMI: + case EXIT_REASON_EXTERNAL_INTERRUPT: + case EXIT_REASON_MCE_DURING_VMENTRY: + break; + default: + local_irq_enable(); + } + + switch ( (uint16_t)exit_reason ) + { + case EXIT_REASON_EXCEPTION_NMI: /* 0 */ + rc = vmxit_exception(regs); + break; + + case EXIT_REASON_EXTERNAL_INTERRUPT: /* 1 */ + { + vector = __vmread(VM_EXIT_INTR_INFO); + vector &= INTR_INFO_VECTOR_MASK; + vmx_do_extint(regs); + break; + } + + case EXIT_REASON_TRIPLE_FAULT: /* 2 */ + { + dbgp0("PVH:Triple Flt:[%d]exitreas:%d RIP:%lx RSP:%lx EFLAGS:%lx CR3:%lx\n", + ccpu, exit_reason, vmr(GUEST_RIP), vmr(GUEST_RSP), + regs->rflags, vmr(GUEST_CR3)); + + vp->arch.hvm_vcpu.guest_cr[3] = vp->arch.hvm_vcpu.hw_cr[3] = + __vmread(GUEST_CR3); + rc = 1; + break; + } + case EXIT_REASON_PENDING_VIRT_INTR: /* 7 */ + { + struct vcpu *v = current; + /* Disable the interrupt window. */ + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + break; + } + + case EXIT_REASON_CPUID: /* 10 */ + { + if ( guest_kernel_mode(vp, regs) ) { + pv_cpuid(regs); + + /* Because we are setting CR4.OSFXSR to 0, we need to disable + * this because, during boot, user process "init" (which doesn't + * do cpuid), will do 'pxor xmm0,xmm0' and cause #UD. For now + * disable this. HVM doesn't allow setting of CR4.OSFXSR. + * fixme: this and also look at CR4.OSXSAVE */ + + __clear_bit(X86_FEATURE_FXSR, ®s->edx); + } else + pvh_cpuid(regs); + + /* fixme: investigate and fix the XSAVE/MMX/FPU stuff */ + + update_guest_eip(); + dbgp2("cpuid:%ld RIP:%lx\n", regs->eax, vmr(GUEST_RIP)); + break; + } + + case EXIT_REASON_HLT: /* 12 */ + { + update_guest_eip(); + hvm_hlt(regs->eflags); + break; + } + + case EXIT_REASON_INVLPG: /* 14 */ + rc = vmxit_invlpg(); + break; + + case EXIT_REASON_RDTSC: /* 16 */ + { + rc = 1; + break; + } + + case EXIT_REASON_VMCALL: /* 18 */ + rc = vmxit_vmcall(regs); + break; + + case EXIT_REASON_CR_ACCESS: /* 28 */ + rc = vmxit_cr_access(regs); + break; + + case EXIT_REASON_DR_ACCESS: /* 29 */ + { + exit_qualification = __vmread(EXIT_QUALIFICATION); + vmx_dr_access(exit_qualification, regs); + break; + } + + case EXIT_REASON_IO_INSTRUCTION: + vmxit_io_instr(regs); + break; + + case EXIT_REASON_MSR_READ: /* 31 */ + rc = vmxit_msr_read(regs); + break; + + case EXIT_REASON_MSR_WRITE: /* 32 */ + rc = vmxit_msr_write(regs); + break; + + case EXIT_REASON_MONITOR_TRAP_FLAG: /* 37 */ + rc = vmxit_mtf(regs); + break; + + case EXIT_REASON_EPT_VIOLATION: + { + paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS); + exit_qualification = __vmread(EXIT_QUALIFICATION); + rc = pvh_ept_handle_violation(exit_qualification, gpa); + break; + } + default: + rc = 1; + printk("PVH: Unexpected exit reason:%d 0x%x\n", exit_reason, + exit_reason); + } + + if (rc) { + exit_qualification = __vmread(EXIT_QUALIFICATION); + printk("PVH: [%d] exit_reas:%d 0x%x qual:%ld 0x%lx cr0:0x%016lx\n", + ccpu, exit_reason, exit_reason, exit_qualification, + exit_qualification, vmr(GUEST_CR0)); + printk("PVH: [%d] RIP:%lx RSP:%lx\n", ccpu, + vmr(GUEST_RIP), vmr(GUEST_RSP)); + domain_crash_synchronous(); + } +} + +/* + * Sets info for non boot vcpu. VCPU 0 context is set by library which needs + * to be modified to send + * correct selectors and gs_base. For now, we use this for nonboot vcpu + * in which case the call somes from the kernel cpu_initialize_context(). + */ +int vmx_pvh_set_vcpu_info(struct vcpu *v, struct vcpu_guest_context *ctxtp) +{ + if (v->vcpu_id == 0) + return 0; + + vmx_vmcs_enter(v); + __vmwrite(GUEST_GDTR_BASE, ctxtp->u.pvh.gdtaddr); + __vmwrite(GUEST_GDTR_LIMIT, ctxtp->u.pvh.gdtsz); + __vmwrite(GUEST_GS_BASE, ctxtp->gs_base_user); + + __vmwrite(GUEST_CS_SELECTOR, ctxtp->user_regs.cs); + __vmwrite(GUEST_DS_SELECTOR, ctxtp->user_regs.ds); + __vmwrite(GUEST_ES_SELECTOR, ctxtp->user_regs.es); + __vmwrite(GUEST_SS_SELECTOR, ctxtp->user_regs.ss); + __vmwrite(GUEST_GS_SELECTOR, ctxtp->user_regs.gs); + + if ( vmx_add_guest_msr(MSR_SHADOW_GS_BASE) ) + return -EINVAL; + + vmx_write_guest_msr(MSR_SHADOW_GS_BASE, ctxtp->gs_base_kernel); + + vmx_vmcs_exit(v); + return 0; +} + +int vmx_pvh_read_descriptor(unsigned int sel, const struct vcpu *v, + const struct cpu_user_regs *regs, + unsigned long *base, unsigned long *limit, + unsigned int *ar) +{ + unsigned int tmp_ar = 0; + BUG_ON(v!=current); + BUG_ON(!is_pvh_vcpu(v)); + + if (sel == (unsigned int)regs->cs) { + *base = vmr(GUEST_CS_BASE); + *limit = vmr(GUEST_CS_LIMIT); + tmp_ar = vmr(GUEST_CS_AR_BYTES); + } else if (sel == (unsigned int)regs->ds) { + *base = vmr(GUEST_DS_BASE); + *limit = vmr(GUEST_DS_LIMIT); + tmp_ar = vmr(GUEST_DS_AR_BYTES); + } else if (sel == (unsigned int)regs->ss) { + *base = vmr(GUEST_SS_BASE); + *limit = vmr(GUEST_SS_LIMIT); + tmp_ar = vmr(GUEST_SS_AR_BYTES); + } else if (sel == (unsigned int)regs->gs) { + *base = vmr(GUEST_GS_BASE); + *limit = vmr(GUEST_GS_LIMIT); + tmp_ar = vmr(GUEST_GS_AR_BYTES); + } else if (sel == (unsigned int)regs->fs) { + *base = vmr(GUEST_FS_BASE); + *limit = vmr(GUEST_FS_LIMIT); + tmp_ar = vmr(GUEST_FS_AR_BYTES); + } else if (sel == (unsigned int)regs->es) { + *base = vmr(GUEST_ES_BASE); + *limit = vmr(GUEST_ES_LIMIT); + tmp_ar = vmr(GUEST_ES_AR_BYTES); + } else { + printk("Unmatched segment selector:%d\n", sel); + return 0; + } + + if (tmp_ar & X86_SEG_AR_CS_LM_ACTIVE) { /* x86 mess!! */ + *base = 0UL; + *limit = ~0UL; + } + /* Fixup ar so that it looks the same as in native mode */ + *ar = (tmp_ar << 8); + return 1; +} + diff -r eca698a4e733 -r 0a38c610f26b xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Jan 11 16:32:36 2013 -0800 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Jan 11 16:34:17 2013 -0800 @@ -156,11 +156,28 @@ void vmx_update_cpu_exec_control(struct # define VMX_CONTROL_REG_ACCESS_TYPE_LMSW 3 /* 10:8 - general purpose register operand */ #define VMX_CONTROL_REG_ACCESS_GPR(eq) (((eq) >> 8) & 0xf) +#define VMX_CONTROL_REG_ACCESS_GPR_EAX 0 +#define VMX_CONTROL_REG_ACCESS_GPR_ECX 1 +#define VMX_CONTROL_REG_ACCESS_GPR_EDX 2 +#define VMX_CONTROL_REG_ACCESS_GPR_EBX 3 +#define VMX_CONTROL_REG_ACCESS_GPR_ESP 4 +#define VMX_CONTROL_REG_ACCESS_GPR_EBP 5 +#define VMX_CONTROL_REG_ACCESS_GPR_ESI 6 +#define VMX_CONTROL_REG_ACCESS_GPR_EDI 7 +#define VMX_CONTROL_REG_ACCESS_GPR_R8 8 +#define VMX_CONTROL_REG_ACCESS_GPR_R9 9 +#define VMX_CONTROL_REG_ACCESS_GPR_R10 10 +#define VMX_CONTROL_REG_ACCESS_GPR_R11 11 +#define VMX_CONTROL_REG_ACCESS_GPR_R12 12 +#define VMX_CONTROL_REG_ACCESS_GPR_R13 13 +#define VMX_CONTROL_REG_ACCESS_GPR_R14 14 +#define VMX_CONTROL_REG_ACCESS_GPR_R15 15 /* * Access Rights */ #define X86_SEG_AR_SEG_TYPE 0xf /* 3:0, segment type */ +#define X86_SEG_AR_SEG_TYPE_CODE (1u << 3) /* code (vs data) segment */ #define X86_SEG_AR_DESC_TYPE (1u << 4) /* 4, descriptor type */ #define X86_SEG_AR_DPL 0x60 /* 6:5, descriptor privilege level */ #define X86_SEG_AR_SEG_PRESENT (1u << 7) /* 7, segment present */ @@ -420,6 +437,11 @@ void setup_ept_dump(void); void update_guest_eip(void); void vmx_dr_access(unsigned long exit_qualification,struct cpu_user_regs *regs); void vmx_do_extint(struct cpu_user_regs *regs); +void vmx_pvh_vmexit_handler(struct cpu_user_regs *regs); +int vmx_pvh_set_vcpu_info(struct vcpu *v, struct vcpu_guest_context *ctxtp); +int vmx_pvh_read_descriptor(unsigned int sel, const struct vcpu *v, + const struct cpu_user_regs *regs, unsigned long *base, + unsigned long *limit, unsigned int *ar); /* EPT violation qualifications definitions */ #define _EPT_READ_VIOLATION 0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |