[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 9/16]: PVH xen: create PVH vmcs and initialization
This patch mainly contains code to create a VMCS for PVH guest, and HVM specific vcpu/domain creation code. Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> diff -r e51e48fd1d20 -r eca698a4e733 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Jan 11 16:31:33 2013 -0800 +++ b/xen/arch/x86/hvm/hvm.c Fri Jan 11 16:32:36 2013 -0800 @@ -331,6 +331,9 @@ void hvm_do_resume(struct vcpu *v) { ioreq_t *p; + if ( is_pvh_vcpu(v) ) + return; + pt_restore_timer(v); check_wakeup_from_wait(); @@ -499,6 +502,29 @@ static int hvm_print_line( return X86EMUL_OKAY; } +static noinline int hvm_pvh_dom_initialise(struct domain *d) +{ + int rc; + + if (!d->arch.hvm_domain.hap_enabled) + return -EINVAL; + + spin_lock_init(&d->arch.hvm_domain.irq_lock); + hvm_init_guest_time(d); + + hvm_init_cacheattr_region_list(d); + + if ( (rc=paging_enable(d, PG_refcounts|PG_translate|PG_external)) != 0 ) + goto fail1; + + if ( (rc = hvm_funcs.domain_initialise(d)) == 0 ) + return 0; + +fail1: + hvm_destroy_cacheattr_region_list(d); + return rc; +} + int hvm_domain_initialise(struct domain *d) { int rc; @@ -509,6 +535,8 @@ int hvm_domain_initialise(struct domain "on a non-VT/AMDV platform.\n"); return -EINVAL; } + if ( is_pvh_domain(d) ) + return hvm_pvh_dom_initialise(d); spin_lock_init(&d->arch.hvm_domain.pbuf_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); @@ -572,6 +600,11 @@ int hvm_domain_initialise(struct domain void hvm_domain_relinquish_resources(struct domain *d) { + if ( is_pvh_domain(d) ) + { + pit_deinit(d); + return; + } if ( hvm_funcs.nhvm_domain_relinquish_resources ) hvm_funcs.nhvm_domain_relinquish_resources(d); @@ -597,10 +630,16 @@ void hvm_domain_relinquish_resources(str void hvm_domain_destroy(struct domain *d) { hvm_funcs.domain_destroy(d); + hvm_destroy_cacheattr_region_list(d); + + if ( is_pvh_domain(d) ) { + printk("PVH:skippoing stuff in hvm_domain_destroy(). reexamine me\n"); + return; + } + rtc_deinit(d); stdvga_deinit(d); vioapic_deinit(d); - hvm_destroy_cacheattr_region_list(d); } static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h) @@ -1054,12 +1093,44 @@ static int __init __hvm_register_CPU_XSA } __initcall(__hvm_register_CPU_XSAVE_save_and_restore); +static noinline int hvm_pvh_vcpu_initialise(struct vcpu *v) +{ + int rc; + + if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) + return rc; + + softirq_tasklet_init( &v->arch.hvm_vcpu.assert_evtchn_irq_tasklet, + (void(*)(unsigned long))hvm_assert_evtchn_irq, + (unsigned long)v ); + + v->arch.hvm_vcpu.hvm_pvh.pvh_vcpu_info_mfn = INVALID_MFN; + v->arch.user_regs.eflags = 2; + v->arch.hvm_vcpu.inject_trap.vector = -1; + + if ( (rc=hvm_vcpu_cacheattr_init(v)) != 0 ) { + hvm_funcs.vcpu_destroy(v); + return rc; + } + + /* during domain shutdown: pvh_vmx_vmexit_handler->emulate_privileged_op + * -> guest_io_read -> pv_pit_handler -> handle_speaker_io -> _spin_lock + * so we call pit_init to initialize the spin lock */ + if ( v->vcpu_id == 0 ) + pit_init(v, cpu_khz); + + return 0; +} + int hvm_vcpu_initialise(struct vcpu *v) { int rc; hvm_asid_flush_vcpu(v); + if ( is_pvh_vcpu(v) ) + return hvm_pvh_vcpu_initialise(v); + if ( (rc = vlapic_init(v)) != 0 ) goto fail1; @@ -1149,7 +1220,10 @@ void hvm_vcpu_destroy(struct vcpu *v) tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet); hvm_vcpu_cacheattr_destroy(v); - vlapic_destroy(v); + + if ( !is_pvh_vcpu(v) ) + vlapic_destroy(v); + hvm_funcs.vcpu_destroy(v); /* Event channel is already freed by evtchn_destroy(). */ diff -r e51e48fd1d20 -r eca698a4e733 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Jan 11 16:31:33 2013 -0800 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Fri Jan 11 16:32:36 2013 -0800 @@ -606,7 +606,7 @@ void vmx_vmcs_exit(struct vcpu *v) { /* Don't confuse vmx_do_resume (for @v or @current!) */ vmx_clear_vmcs(v); - if ( is_hvm_vcpu(current) ) + if ( is_hvm_or_pvh_vcpu(current) ) vmx_load_vmcs(current); spin_unlock(&v->arch.hvm_vmx.vmcs_lock); @@ -697,6 +697,246 @@ void vmx_vmcs_switch(struct vmcs_struct spin_unlock(&vmx->vmcs_lock); } +static noinline int pvh_construct_vmcs(struct vcpu *v) +{ + struct domain *d = v->domain; + uint16_t sysenter_cs; + unsigned long sysenter_eip; + u32 vmexit_ctl = vmx_vmexit_control; + u32 vmentry_ctl = vmx_vmentry_control; + u64 u64val = -1; + + if ( !paging_mode_hap(d) ) + { + printk("ERROR: HAP is required to run PV in HVM container\n"); + return -EINVAL; + } + + /* VMCS controls. */ + vmx_pin_based_exec_control &= ~PIN_BASED_VIRTUAL_NMIS; + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); + + v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; + + /* if rdtsc exiting is turned on and it goes thru emulate_privileged_op, + * then pv_vcpu.ctrlreg must be added to pvh struct */ + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING; + + v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING | + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; + v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + + /* I/O access bitmap. */ + __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0)); + __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE)); + + /* MSR access bitmap. */ + if ( cpu_has_vmx_msr_bitmap ) + { + unsigned long *msr_bitmap = alloc_xenheap_page(); + int msr_type = MSR_TYPE_R | MSR_TYPE_W; + + if ( msr_bitmap == NULL ) + return -ENOMEM; + + memset(msr_bitmap, ~0, PAGE_SIZE); + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); + + vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type); + vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type); + + /* pure hvm doesn't do this. safe? see: long_mode_do_msr_write() */ +#if 0 + vmx_disable_intercept_for_msr(v, MSR_STAR); + vmx_disable_intercept_for_msr(v, MSR_LSTAR); + vmx_disable_intercept_for_msr(v, MSR_CSTAR); + vmx_disable_intercept_for_msr(v, MSR_SYSCALL_MASK); +#endif + } else { + printk("PVH: CPU does NOT have msr bitmap\n"); + return -EINVAL; + } + + if ( !cpu_has_vmx_vpid ) { + printk("PVH: At present VPID support is required to run PVH\n"); + return -EINVAL; + } + + v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; + + if ( cpu_has_vmx_secondary_exec_control ) { + v->arch.hvm_vmx.secondary_exec_control &= ~0x4FF; /* turn off all */ + v->arch.hvm_vmx.secondary_exec_control |= + SECONDARY_EXEC_PAUSE_LOOP_EXITING; + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID; + + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_EPT; + __vmwrite(SECONDARY_VM_EXEC_CONTROL, + v->arch.hvm_vmx.secondary_exec_control); + } else { + printk("PVH: NO Secondary Exec control\n"); + return -EINVAL; + } + + /* __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vcpu.asid); */ + + __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); + + #define VM_ENTRY_LOAD_DEBUG_CTLS 0x4 + #define VM_ENTRY_LOAD_EFER 0x8000 + vmentry_ctl &= ~VM_ENTRY_LOAD_DEBUG_CTLS; + vmentry_ctl &= ~VM_ENTRY_LOAD_EFER; + vmentry_ctl &= ~VM_ENTRY_SMM; + vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR; + vmentry_ctl |= VM_ENTRY_IA32E_MODE; + __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); + + /* MSR intercepts. */ + __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + + /* Host data selectors. */ + __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_FS_SELECTOR, 0); + __vmwrite(HOST_GS_SELECTOR, 0); + __vmwrite(HOST_FS_BASE, 0); + __vmwrite(HOST_GS_BASE, 0); + + vmx_set_host_env(v); + + /* Host control registers. */ + v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; + __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); + __vmwrite(HOST_CR4, mmu_cr4_features|(cpu_has_xsave ? X86_CR4_OSXSAVE : 0)); + + /* Host CS:RIP. */ + __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); + __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); + + /* Host SYSENTER CS:RIP. */ + rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs); + __vmwrite(HOST_SYSENTER_CS, sysenter_cs); + rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip); + __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); + + __vmwrite(VM_ENTRY_INTR_INFO, 0); + + __vmwrite(CR3_TARGET_COUNT, 0); + + __vmwrite(GUEST_ACTIVITY_STATE, 0); + + /* Set default guest context values here. Some of these are then overwritten + * in vmx_pvh_set_vcpu_info() by guest itself during vcpu bringup */ + __vmwrite(GUEST_CS_BASE, 0); + __vmwrite(GUEST_CS_LIMIT, ~0u); + __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); /* CS.L == 1 */ + __vmwrite(GUEST_CS_SELECTOR, 0x10); + + __vmwrite(GUEST_DS_BASE, 0); + __vmwrite(GUEST_DS_LIMIT, ~0u); + __vmwrite(GUEST_DS_AR_BYTES, 0xc093); + __vmwrite(GUEST_DS_SELECTOR, 0x18); + + __vmwrite(GUEST_SS_BASE, 0); /* use same seg as DS */ + __vmwrite(GUEST_SS_LIMIT, ~0u); + __vmwrite(GUEST_SS_AR_BYTES, 0xc093); + __vmwrite(GUEST_SS_SELECTOR, 0x18); + + __vmwrite(GUEST_ES_SELECTOR, 0); + __vmwrite(GUEST_FS_SELECTOR, 0); + __vmwrite(GUEST_GS_SELECTOR, 0); + + /* Guest segment bases. */ + __vmwrite(GUEST_ES_BASE, 0); + __vmwrite(GUEST_FS_BASE, 0); + __vmwrite(GUEST_GS_BASE, 0); + + /* Guest segment limits. */ + __vmwrite(GUEST_ES_LIMIT, ~0u); + __vmwrite(GUEST_FS_LIMIT, ~0u); + __vmwrite(GUEST_GS_LIMIT, ~0u); + + /* Guest segment AR bytes. */ + __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */ + __vmwrite(GUEST_FS_AR_BYTES, 0xc093); + __vmwrite(GUEST_GS_AR_BYTES, 0xc093); + + /* Guest IDT. */ + __vmwrite(GUEST_GDTR_BASE, 0); + __vmwrite(GUEST_GDTR_LIMIT, 0); + + /* Guest LDT. */ + __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */ + __vmwrite(GUEST_LDTR_SELECTOR, 0); + __vmwrite(GUEST_LDTR_BASE, 0); + __vmwrite(GUEST_LDTR_LIMIT, 0); + + /* Guest TSS. */ + __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */ + __vmwrite(GUEST_TR_BASE, 0); + __vmwrite(GUEST_TR_LIMIT, 0xff); + + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + __vmwrite(GUEST_DR7, 0); + __vmwrite(VMCS_LINK_POINTER, ~0UL); + + __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); + + v->arch.hvm_vmx.exception_bitmap = + HVM_TRAP_MASK | (1 << TRAP_debug) | + (1U << TRAP_int3) | (1U << TRAP_no_device); + __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap); + + __vmwrite(TSC_OFFSET, 0); + + /* Set WP bit so rdonly pages are not written from CPL 0 */ + u64val = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP; + __vmwrite(GUEST_CR0, u64val); + __vmwrite(CR0_READ_SHADOW, u64val); + v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = u64val; + + u64val = X86_CR4_PAE | X86_CR4_VMXE; + __vmwrite(GUEST_CR4, u64val); + __vmwrite(CR4_READ_SHADOW, u64val); + v->arch.hvm_vcpu.guest_cr[4] = u64val; + + __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); + __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); + + v->arch.hvm_vmx.vmx_realmode = 0; + + __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp); + + if ( cpu_has_vmx_pat ) + { + u64 host_pat, guest_pat; + + rdmsrl(MSR_IA32_CR_PAT, host_pat); + guest_pat = MSR_IA32_CR_PAT_RESET; + + __vmwrite(HOST_PAT, host_pat); + __vmwrite(GUEST_PAT, guest_pat); + } + return 0; +} + static int construct_vmcs(struct vcpu *v) { struct domain *d = v->domain; @@ -707,6 +947,12 @@ static int construct_vmcs(struct vcpu *v vmx_vmcs_enter(v); + if ( is_pvh_vcpu(v) ) { + int rc = pvh_construct_vmcs(v); + vmx_vmcs_exit(v); + return rc; + } + /* VMCS controls. */ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); @@ -1143,8 +1389,10 @@ void vmx_do_resume(struct vcpu *v) vmx_clear_vmcs(v); vmx_load_vmcs(v); - hvm_migrate_timers(v); - hvm_migrate_pirqs(v); + if ( !is_pvh_vcpu(v) ) { + hvm_migrate_timers(v); + hvm_migrate_pirqs(v); + } vmx_set_host_env(v); /* * Both n1 VMCS and n2 VMCS need to update the host environment after @@ -1156,6 +1404,9 @@ void vmx_do_resume(struct vcpu *v) hvm_asid_flush_vcpu(v); } + if ( is_pvh_vcpu(v) ) + goto skip_inteLdbgr; /* PVH supports gdbsx and gdb inside PVH */ + debug_state = v->domain->debugger_attached || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3] || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP]; @@ -1166,6 +1417,7 @@ void vmx_do_resume(struct vcpu *v) vmx_update_debug_state(v); } +skip_inteLdbgr: hvm_do_resume(v); reset_stack_and_jump(vmx_asm_do_vmentry); } @@ -1331,7 +1583,7 @@ static void vmcs_dump(unsigned char ch) for_each_domain ( d ) { - if ( !is_hvm_domain(d) ) + if ( !is_hvm_or_pvh_domain(d) ) continue; printk("\n>>> Domain %d <<<\n", d->domain_id); for_each_vcpu ( d, v ) diff -r e51e48fd1d20 -r eca698a4e733 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jan 11 16:31:33 2013 -0800 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Jan 11 16:32:36 2013 -0800 @@ -92,6 +92,9 @@ static int vmx_domain_initialise(struct if ( !zalloc_cpumask_var(&d->arch.hvm_domain.vmx.ept_synced) ) return -ENOMEM; + if ( is_pvh_domain(d) ) + return 0; + if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 ) { free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); @@ -106,6 +109,10 @@ static void vmx_domain_destroy(struct do if ( paging_mode_hap(d) ) on_each_cpu(__ept_sync_domain, d, 1); free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); + + if ( is_pvh_domain(d) ) + return; + vmx_free_vlapic_mapping(d); } @@ -129,6 +136,12 @@ static int vmx_vcpu_initialise(struct vc vpmu_initialise(v); + if (is_pvh_vcpu(v) ) + { + /* this for hvm_long_mode_enabled(v) */ + v->arch.hvm_vcpu.guest_efer = EFER_SCE | EFER_LMA | EFER_LME; + return 0; + } vmx_install_vlapic_mapping(v); /* %eax == 1 signals full real-mode support to the guest loader. */ @@ -1051,6 +1064,23 @@ static void vmx_update_host_cr3(struct v vmx_vmcs_exit(v); } +static noinline void vmx_update_pvh_cr(struct vcpu *v, unsigned int cr) +{ + vmx_vmcs_enter(v); + switch ( cr ) + { + case 3: + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.guest_cr[3]); + hvm_asid_flush_vcpu(v); + break; + + default: + printk("pvh: d%d v%d unexpected cr%d rip:%lx update\n", + v->domain->domain_id, v->vcpu_id, cr, __vmread(GUEST_RIP)); + } + vmx_vmcs_exit(v); +} + void vmx_update_debug_state(struct vcpu *v) { unsigned long mask; @@ -1070,6 +1100,11 @@ void vmx_update_debug_state(struct vcpu static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) { + if ( is_pvh_vcpu(v) ) { + vmx_update_pvh_cr(v, cr); + return; + } + vmx_vmcs_enter(v); switch ( cr ) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |