TODO: remove //temp-s There are a few intentional but not necessarily obvious (and possibly subtle) adjustments to behavior: - VMX now marks all write access faults also as read accesses (whether the hardware would do so for read-modify-write instructions depends on CPU model and the particular instruction) - SVM no longer marks instruction fetch faults as read accesses - __hvmemul_read() no longer blindly bails on instruction fetches matching the MMIO GVA (the callers of handle_mmio_with_translation() now control the behavior via the NPFEC_* flags they pass, and it didn't seem right to bail here rather than just falling through to the unaccelerated path) --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -462,6 +462,7 @@ static int __hvmemul_read( struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; paddr_t gpa; int rc; +static unsigned long tot, cnt, thr, xlat;//temp rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr); @@ -481,11 +482,16 @@ static int __hvmemul_read( while ( off & (chunk - 1) ) chunk >>= 1; - if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) +++tot;//temp + if ( (vio->mmio_access & (access_type != hvm_access_insn_fetch ? + NPFEC_read_access : NPFEC_insn_fetch)) && + (vio->mmio_gva == (addr & PAGE_MASK)) ) { - if ( access_type == hvm_access_insn_fetch ) - return X86EMUL_UNHANDLEABLE; gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); +if(++cnt > thr) {//temp + thr |= cnt; + printk("read %lx/%lx/%lx [%pv:%lx:%x]\n", cnt, tot, xlat, curr, gpa, bytes); +} while ( (off + chunk) <= PAGE_SIZE ) { rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_READ, 0, p_data); @@ -522,6 +528,7 @@ static int __hvmemul_read( hvmemul_ctxt); while ( rc == X86EMUL_OKAY ) { +++xlat;//temp rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_READ, 0, p_data); if ( rc != X86EMUL_OKAY || bytes == chunk ) break; @@ -535,6 +542,7 @@ static int __hvmemul_read( gpa += chunk; else { +++xlat;//temp rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, hvmemul_ctxt); off = 0; @@ -611,6 +619,7 @@ static int hvmemul_write( struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; paddr_t gpa; int rc; +static unsigned long tot, cnt, thr, xlat;//temp rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr); @@ -624,9 +633,15 @@ static int hvmemul_write( while ( off & (chunk - 1) ) chunk >>= 1; - if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) +++tot;//temp + if ( (vio->mmio_access & NPFEC_write_access) && + (vio->mmio_gva == (addr & PAGE_MASK)) ) { gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); +if(++cnt > thr) {//temp + thr |= cnt; + printk("write %lx/%lx/%lx [%pv:%lx:%x]\n", cnt, tot, xlat, curr, gpa, bytes); +} while ( (off + chunk) <= PAGE_SIZE ) { rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_WRITE, 0, p_data); @@ -655,6 +670,7 @@ static int hvmemul_write( case HVMCOPY_bad_gva_to_gfn: return X86EMUL_EXCEPTION; case HVMCOPY_bad_gfn_to_mfn: +++xlat;//temp rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, hvmemul_ctxt); while ( rc == X86EMUL_OKAY ) @@ -672,6 +688,7 @@ static int hvmemul_write( gpa += chunk; else { +++xlat;//temp rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, hvmemul_ctxt); off = 0; --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2684,11 +2684,8 @@ void hvm_inject_page_fault(int errcode, } int hvm_hap_nested_page_fault(paddr_t gpa, - bool_t gla_valid, - unsigned long gla, - bool_t access_r, - bool_t access_w, - bool_t access_x) + unsigned int npfec, + unsigned long gla) { unsigned long gfn = gpa >> PAGE_SHIFT; p2m_type_t p2mt; @@ -2718,7 +2715,9 @@ int hvm_hap_nested_page_fault(paddr_t gp * the same as for shadow paging. */ rv = nestedhvm_hap_nested_page_fault(v, &gpa, - access_r, access_w, access_x); + !!(npfec & NPFEC_read_access), + !!(npfec & NPFEC_write_access), + !!(npfec & NPFEC_insn_fetch)); switch (rv) { case NESTEDHVM_PAGEFAULT_DONE: case NESTEDHVM_PAGEFAULT_RETRY: @@ -2749,7 +2748,7 @@ int hvm_hap_nested_page_fault(paddr_t gp && is_hvm_vcpu(v) && hvm_mmio_internal(gpa) ) { - if ( !handle_mmio() ) + if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) hvm_inject_hw_exception(TRAP_gp_fault, 0); rc = 1; goto out; @@ -2757,7 +2756,9 @@ int hvm_hap_nested_page_fault(paddr_t gp p2m = p2m_get_hostp2m(v->domain); mfn = get_gfn_type_access(p2m, gfn, &p2mt, &p2ma, - P2M_ALLOC | (access_w ? P2M_UNSHARE : 0), NULL); + P2M_ALLOC | (npfec & NPFEC_write_access ? + P2M_UNSHARE : 0), + NULL); /* Check access permissions first, then handle faults */ if ( mfn_x(mfn) != INVALID_MFN ) @@ -2769,26 +2770,27 @@ int hvm_hap_nested_page_fault(paddr_t gp case p2m_access_n: case p2m_access_n2rwx: default: - violation = access_r || access_w || access_x; + violation = !!(npfec & (NPFEC_read_access | NPFEC_write_access | + NPFEC_insn_fetch)); break; case p2m_access_r: - violation = access_w || access_x; + violation = !!(npfec & (NPFEC_write_access | NPFEC_insn_fetch)); break; case p2m_access_w: - violation = access_r || access_x; + violation = !!(npfec & (NPFEC_read_access | NPFEC_insn_fetch)); break; case p2m_access_x: - violation = access_r || access_w; + violation = !!(npfec & (NPFEC_read_access | NPFEC_write_access)); break; case p2m_access_rx: case p2m_access_rx2rw: - violation = access_w; + violation = !!(npfec & NPFEC_write_access); break; case p2m_access_wx: - violation = access_r; + violation = !!(npfec & NPFEC_read_access); break; case p2m_access_rw: - violation = access_x; + violation = !!(npfec & NPFEC_insn_fetch); break; case p2m_access_rwx: break; @@ -2796,8 +2798,11 @@ int hvm_hap_nested_page_fault(paddr_t gp if ( violation ) { - if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, - access_w, access_x, &req_ptr) ) + if ( p2m_mem_access_check(gpa, !!(npfec & NPFEC_gla_valid), gla, + !!(npfec & NPFEC_read_access), + !!(npfec & NPFEC_write_access), + !!(npfec & NPFEC_insn_fetch), + &req_ptr) ) { fall_through = 1; } else { @@ -2813,7 +2818,7 @@ int hvm_hap_nested_page_fault(paddr_t gp * to the mmio handler. */ if ( (p2mt == p2m_mmio_dm) || - (access_w && (p2mt == p2m_ram_ro)) ) + ((npfec & NPFEC_write_access) && (p2mt == p2m_ram_ro)) ) { put_gfn(p2m->domain, gfn); @@ -2821,7 +2826,7 @@ int hvm_hap_nested_page_fault(paddr_t gp if ( unlikely(is_pvh_vcpu(v)) ) goto out; - if ( !handle_mmio() ) + if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) hvm_inject_hw_exception(TRAP_gp_fault, 0); rc = 1; goto out; @@ -2832,7 +2837,7 @@ int hvm_hap_nested_page_fault(paddr_t gp paged = 1; /* Mem sharing: unshare the page and try again */ - if ( access_w && (p2mt == p2m_ram_shared) ) + if ( (npfec & NPFEC_write_access) && (p2mt == p2m_ram_shared) ) { ASSERT(!p2m_is_nestedp2m(p2m)); sharing_enomem = @@ -2849,7 +2854,7 @@ int hvm_hap_nested_page_fault(paddr_t gp * a large page, we do not change other pages type within that large * page. */ - if ( access_w ) + if ( npfec & NPFEC_write_access ) { paging_mark_dirty(v->domain, mfn_x(mfn)); p2m_change_type_one(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); @@ -2859,7 +2864,7 @@ int hvm_hap_nested_page_fault(paddr_t gp } /* Shouldn't happen: Maybe the guest was writing to a r/o grant mapping? */ - if ( access_w && (p2mt == p2m_grant_map_ro) ) + if ( (npfec & NPFEC_write_access) && (p2mt == p2m_grant_map_ro) ) { gdprintk(XENLOG_WARNING, "trying to write to read-only grant mapping\n"); --- a/xen/arch/x86/hvm/io.c +++ b/xen/arch/x86/hvm/io.c @@ -95,7 +95,7 @@ int handle_mmio(void) if ( vio->io_state == HVMIO_awaiting_completion ) vio->io_state = HVMIO_handle_mmio_awaiting_completion; else - vio->mmio_gva = 0; + vio->mmio_access = 0; switch ( rc ) { @@ -124,9 +124,12 @@ int handle_mmio(void) return 1; } -int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn) +int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn, + unsigned int access) { struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io; + + vio->mmio_access = access & NPFEC_gla_translation ? access : 0; vio->mmio_gva = gva & PAGE_MASK; vio->mmio_gpfn = gpfn; return handle_mmio(); --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1396,18 +1396,13 @@ const struct hvm_function_table * __init static void svm_do_nested_pgfault(struct vcpu *v, struct cpu_user_regs *regs, uint32_t npfec, paddr_t gpa) { - int ret; + unsigned int hapec; unsigned long gfn = gpa >> PAGE_SHIFT; mfn_t mfn; p2m_type_t p2mt; p2m_access_t p2ma; struct p2m_domain *p2m = NULL; - ret = hvm_hap_nested_page_fault(gpa, 0, ~0ul, - 1, /* All NPFs count as reads */ - npfec & PFEC_write_access, - npfec & PFEC_insn_fetch); - if ( tb_init_done ) { struct { @@ -1426,7 +1421,12 @@ static void svm_do_nested_pgfault(struct __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d); } - switch (ret) { + hapec = npfec & PFEC_insn_fetch ? NPFEC_insn_fetch : NPFEC_read_access; + if ( npfec & PFEC_write_access ) + hapec |= NPFEC_write_access; + + switch ( hvm_hap_nested_page_fault(gpa, hapec, ~0ul) ) + { case 0: break; case 1: --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2351,7 +2351,7 @@ static void ept_handle_violation(unsigne unsigned long gla, gfn = gpa >> PAGE_SHIFT; mfn_t mfn; p2m_type_t p2mt; - int ret; + unsigned int npfec = 0; struct domain *d = current->domain; if ( tb_init_done ) @@ -2370,16 +2370,23 @@ static void ept_handle_violation(unsigne __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d); } + if ( qualification & EPT_WRITE_VIOLATION ) + npfec |= NPFEC_read_access | NPFEC_write_access; + else if ( qualification & EPT_READ_VIOLATION ) + npfec |= NPFEC_read_access; + if ( qualification & EPT_EXEC_VIOLATION ) + npfec |= NPFEC_insn_fetch; if ( qualification & EPT_GLA_VALID ) + { __vmread(GUEST_LINEAR_ADDRESS, &gla); + npfec |= NPFEC_gla_valid; + if ( qualification & EPT_GLA_FAULT ) + npfec |= NPFEC_gla_translation; + } else gla = ~0ull; - ret = hvm_hap_nested_page_fault(gpa, - !!(qualification & EPT_GLA_VALID), gla, - !!(qualification & EPT_READ_VIOLATION), - !!(qualification & EPT_WRITE_VIOLATION), - !!(qualification & EPT_EXEC_VIOLATION)); - switch ( ret ) + + switch ( hvm_hap_nested_page_fault(gpa, npfec, gla) ) { case 0: // Unhandled L1 EPT violation break; @@ -2412,7 +2419,8 @@ static void ept_handle_violation(unsigne ept_walk_table(d, gfn); if ( qualification & EPT_GLA_VALID ) - gdprintk(XENLOG_ERR, " --- GLA %#lx\n", gla); + gdprintk(XENLOG_ERR, " --- GLA %#lx (%s)\n", gla, + qualification & EPT_GLA_FAULT ? "translation" : "walk"); domain_crash(d); } --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -2824,6 +2824,8 @@ static int sh_page_fault(struct vcpu *v, p2m_type_t p2mt; uint32_t rc; int version; + unsigned int npfec = NPFEC_read_access | NPFEC_gla_valid | + NPFEC_gla_translation; #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION int fast_emul = 0; #endif @@ -2834,6 +2836,9 @@ static int sh_page_fault(struct vcpu *v, perfc_incr(shadow_fault); + if ( regs->error_code & PFEC_write_access ) + npfec |= NPFEC_write_access; + #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION /* If faulting frame is successfully emulated in last shadow fault * it's highly likely to reach same emulation action for this frame. @@ -2935,7 +2940,7 @@ static int sh_page_fault(struct vcpu *v, SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); reset_early_unshadow(v); trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va); - return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) + return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, npfec) ? EXCRET_fault_fixed : 0); } else @@ -3424,7 +3429,7 @@ static int sh_page_fault(struct vcpu *v, paging_unlock(d); put_gfn(d, gfn_x(gfn)); trace_shadow_gen(TRC_SHADOW_MMIO, va); - return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) + return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, npfec) ? EXCRET_fault_fixed : 0); not_a_shadow_fault: --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -455,11 +455,14 @@ static inline void hvm_invalidate_regs_f #endif } -int hvm_hap_nested_page_fault(paddr_t gpa, - bool_t gla_valid, unsigned long gla, - bool_t access_r, - bool_t access_w, - bool_t access_x); +int hvm_hap_nested_page_fault(paddr_t gpa, unsigned int npfec, + unsigned long gla); + +#define NPFEC_read_access (1U << 0) +#define NPFEC_write_access (1U << 1) +#define NPFEC_insn_fetch (1U << 2) +#define NPFEC_gla_valid (1U << 3) +#define NPFEC_gla_translation (1U << 4) #define hvm_msr_tsc_aux(v) ({ \ struct domain *__d = (v)->domain; \ --- a/xen/include/asm-x86/hvm/io.h +++ b/xen/include/asm-x86/hvm/io.h @@ -119,7 +119,8 @@ static inline void register_buffered_io_ void send_timeoffset_req(unsigned long timeoff); void send_invalidate_req(void); int handle_mmio(void); -int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn); +int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn, + unsigned int access); int handle_pio(uint16_t port, unsigned int size, int dir); void hvm_interrupt_post(struct vcpu *v, int vector, int type); void hvm_io_assist(ioreq_t *p); --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -54,8 +54,9 @@ struct hvm_vcpu_io { * HVM emulation: * Virtual address @mmio_gva maps to MMIO physical frame @mmio_gpfn. * The latter is known to be an MMIO frame (not RAM). - * This translation is only valid if @mmio_gva is non-zero. + * This translation is only valid for accesses as per @mmio_access. */ + unsigned int mmio_access; /* using NPFEC_* values */ unsigned long mmio_gva; unsigned long mmio_gpfn;