[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 9/9] x86emul+VMX: support {RD,WR}MSRLIST
On 04.04.2023 16:55, Jan Beulich wrote: > These are "compound" instructions to issue a series of RDMSR / WRMSR > respectively. In the emulator we can therefore implement them by using > the existing msr_{read,write}() hooks. The memory accesses utilize that > the HVM ->read() / ->write() hooks are already linear-address > (x86_seg_none) aware (by way of hvmemul_virtual_to_linear() handling > this case). > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- > TODO: Use VMX tertiary execution control (once bit is known; see > //todo-s) and then further adjust cpufeatureset.h. Argh, should have Cc-ed Kevin and Jun, even if there weren't this issue. Jan > RFC: In vmx_vmexit_handler() handling is forwarded to the emulator > blindly. Alternatively we could consult the exit qualification and > process just a single MSR at a time (without involving the > emulator), exiting back to the guest after every iteration. (I > don't think a mix of both models makes a lot of sense.) > > RFC: For PV priv_op_ops would need to gain proper read/write hooks, > which doesn't look desirable (albeit there we could refuse to > handle anything else than x86_seg_none); we may want to consider to > instead not support the feature for PV guests, requiring e.g. Linux > to process the lists in new pvops hooks. > > RFC: I wasn't sure whether to add preemption checks to the loops - > thoughts? > > With the VMX side of the spec still unclear (tertiary execution control > bit unspecified in ISE 046) we can't enable the insn yet for (HVM) guest > use. The precise behavior of MSR_BARRIER is also not spelled out, so the > (minimal) implementation is a guess for now. > > --- a/tools/libs/light/libxl_cpuid.c > +++ b/tools/libs/light/libxl_cpuid.c > @@ -240,6 +240,7 @@ int libxl_cpuid_parse_config(libxl_cpuid > {"lkgs", 0x00000007, 1, CPUID_REG_EAX, 18, 1}, > {"wrmsrns", 0x00000007, 1, CPUID_REG_EAX, 19, 1}, > {"avx-ifma", 0x00000007, 1, CPUID_REG_EAX, 23, 1}, > + {"msrlist", 0x00000007, 1, CPUID_REG_EAX, 27, 1}, > > {"avx-vnni-int8",0x00000007, 1, CPUID_REG_EDX, 4, 1}, > {"avx-ne-convert",0x00000007, 1, CPUID_REG_EDX, 5, 1}, > --- a/tools/misc/xen-cpuid.c > +++ b/tools/misc/xen-cpuid.c > @@ -195,6 +195,8 @@ static const char *const str_7a1[32] = > [18] = "lkgs", [19] = "wrmsrns", > > /* 22 */ [23] = "avx-ifma", > + > + /* 26 */ [27] = "msrlist", > }; > > static const char *const str_e21a[32] = > --- a/tools/tests/x86_emulator/predicates.c > +++ b/tools/tests/x86_emulator/predicates.c > @@ -342,6 +342,8 @@ static const struct { > { { 0x01, 0xc4 }, { 2, 2 }, F, N }, /* vmxoff */ > { { 0x01, 0xc5 }, { 2, 2 }, F, N }, /* pconfig */ > { { 0x01, 0xc6 }, { 2, 2 }, F, N }, /* wrmsrns */ > + { { 0x01, 0xc6 }, { 0, 2 }, F, W, pfx_f2 }, /* rdmsrlist */ > + { { 0x01, 0xc6 }, { 0, 2 }, F, R, pfx_f3 }, /* wrmsrlist */ > { { 0x01, 0xc8 }, { 2, 2 }, F, N }, /* monitor */ > { { 0x01, 0xc9 }, { 2, 2 }, F, N }, /* mwait */ > { { 0x01, 0xca }, { 2, 2 }, F, N }, /* clac */ > --- a/tools/tests/x86_emulator/test_x86_emulator.c > +++ b/tools/tests/x86_emulator/test_x86_emulator.c > @@ -589,6 +589,7 @@ static int read( > default: > if ( !is_x86_user_segment(seg) ) > return X86EMUL_UNHANDLEABLE; > + case x86_seg_none: > bytes_read += bytes; > break; > } > @@ -619,7 +620,7 @@ static int write( > if ( verbose ) > printf("** %s(%u, %p,, %u,)\n", __func__, seg, (void *)offset, > bytes); > > - if ( !is_x86_user_segment(seg) ) > + if ( !is_x86_user_segment(seg) && seg != x86_seg_none ) > return X86EMUL_UNHANDLEABLE; > memcpy((void *)offset, p_data, bytes); > return X86EMUL_OKAY; > @@ -711,6 +712,10 @@ static int read_msr( > { > switch ( reg ) > { > + case 0x0000002f: /* BARRIER */ > + *val = 0; > + return X86EMUL_OKAY; > + > case 0xc0000080: /* EFER */ > *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0; > return X86EMUL_OKAY; > @@ -1499,9 +1504,53 @@ int main(int argc, char **argv) > (gs_base != 0x0000111122224444UL) || > gs_base_shadow ) > goto fail; > + printf("okay\n"); > > cp.extd.nscb = i; > emulops.write_segment = NULL; > + > + printf("%-40s", "Testing rdmsrlist..."); > + instr[0] = 0xf2; instr[1] = 0x0f; instr[2] = 0x01; instr[3] = 0xc6; > + regs.rip = (unsigned long)&instr[0]; > + regs.rsi = (unsigned long)(res + 0x80); > + regs.rdi = (unsigned long)(res + 0x80 + 0x40 * 2); > + regs.rcx = 0x0002000100008000UL; > + gs_base_shadow = 0x0000222244446666UL; > + memset(res + 0x80, ~0, 0x40 * 8 * 2); > + res[0x80 + 0x0f * 2] = 0xc0000101; /* GS_BASE */ > + res[0x80 + 0x0f * 2 + 1] = 0; > + res[0x80 + 0x20 * 2] = 0xc0000102; /* SHADOW_GS_BASE */ > + res[0x80 + 0x20 * 2 + 1] = 0; > + res[0x80 + 0x31 * 2] = 0x2f; /* BARRIER */ > + res[0x80 + 0x31 * 2 + 1] = 0; > + rc = x86_emulate(&ctxt, &emulops); > + if ( (rc != X86EMUL_OKAY) || > + (regs.rip != (unsigned long)&instr[4]) || > + regs.rcx || > + (res[0x80 + (0x40 + 0x0f) * 2] != (unsigned int)gs_base) || > + (res[0x80 + (0x40 + 0x0f) * 2 + 1] != (gs_base >> (8 * > sizeof(int)))) || > + (res[0x80 + (0x40 + 0x20) * 2] != (unsigned int)gs_base_shadow) || > + (res[0x80 + (0x40 + 0x20) * 2 + 1] != (gs_base_shadow >> (8 * > sizeof(int)))) || > + res[0x80 + (0x40 + 0x31) * 2] || res[0x80 + (0x40 + 0x31) * 2 + 1] ) > + goto fail; > + printf("okay\n"); > + > + printf("%-40s", "Testing wrmsrlist..."); > + instr[0] = 0xf3; instr[1] = 0x0f; instr[2] = 0x01; instr[3] = 0xc6; > + regs.eip = (unsigned long)&instr[0]; > + regs.rsi -= 0x11 * 8; > + regs.rdi -= 0x11 * 8; > + regs.rcx = 0x0002000100000000UL; > + res[0x80 + 0x0f * 2] = 0xc0000102; /* SHADOW_GS_BASE */ > + res[0x80 + 0x20 * 2] = 0xc0000101; /* GS_BASE */ > + rc = x86_emulate(&ctxt, &emulops); > + if ( (rc != X86EMUL_OKAY) || > + (regs.rip != (unsigned long)&instr[4]) || > + regs.rcx || > + (gs_base != 0x0000222244446666UL) || > + (gs_base_shadow != 0x0000111122224444UL) ) > + goto fail; > + > emulops.write_msr = NULL; > #endif > printf("okay\n"); > --- a/tools/tests/x86_emulator/x86-emulate.c > +++ b/tools/tests/x86_emulator/x86-emulate.c > @@ -88,6 +88,7 @@ bool emul_test_init(void) > cp.feat.rdpid = true; > cp.feat.lkgs = true; > cp.feat.wrmsrns = true; > + cp.feat.msrlist = true; > cp.extd.clzero = true; > > if ( cpu_has_xsave ) > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -835,6 +835,17 @@ static void cf_check vmx_cpuid_policy_ch > else > vmx_set_msr_intercept(v, MSR_PKRS, VMX_MSR_RW); > > + if ( cp->feat.msrlist ) > + { > + vmx_clear_msr_intercept(v, MSR_BARRIER, VMX_MSR_RW); > + //todo enable MSRLIST tertiary execution control > + } > + else > + { > + vmx_set_msr_intercept(v, MSR_BARRIER, VMX_MSR_RW); > + //todo disable MSRLIST tertiary execution control > + } > + > out: > vmx_vmcs_exit(v); > > @@ -3705,6 +3716,22 @@ gp_fault: > return X86EMUL_EXCEPTION; > } > > +static bool cf_check is_msrlist( > + const struct x86_emulate_state *state, const struct x86_emulate_ctxt > *ctxt) > +{ > + > + if ( ctxt->opcode == X86EMUL_OPC(0x0f, 0x01) ) > + { > + unsigned int rm, reg; > + int mode = x86_insn_modrm(state, &rm, ®); > + > + /* This also includes WRMSRNS; should be okay. */ > + return mode == 3 && rm == 6 && !reg; > + } > + > + return false; > +} > + > static void vmx_do_extint(struct cpu_user_regs *regs) > { > unsigned long vector; > @@ -4513,6 +4540,17 @@ void vmx_vmexit_handler(struct cpu_user_ > } > break; > > + case EXIT_REASON_RDMSRLIST: > + case EXIT_REASON_WRMSRLIST: > + if ( vmx_guest_x86_mode(v) != 8 || !currd->arch.cpuid->feat.msrlist ) > + { > + ASSERT_UNREACHABLE(); > + hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); > + } > + else if ( !hvm_emulate_one_insn(is_msrlist, "MSR list") ) > + hvm_inject_hw_exception(TRAP_gp_fault, 0); > + break; > + > case EXIT_REASON_VMXOFF: > case EXIT_REASON_VMXON: > case EXIT_REASON_VMCLEAR: > --- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h > +++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h > @@ -211,6 +211,8 @@ static inline void pi_clear_sn(struct pi > #define EXIT_REASON_XRSTORS 64 > #define EXIT_REASON_BUS_LOCK 74 > #define EXIT_REASON_NOTIFY 75 > +#define EXIT_REASON_RDMSRLIST 78 > +#define EXIT_REASON_WRMSRLIST 79 > /* Remember to also update VMX_PERF_EXIT_REASON_SIZE! */ > > /* > --- a/xen/arch/x86/include/asm/msr-index.h > +++ b/xen/arch/x86/include/asm/msr-index.h > @@ -24,6 +24,8 @@ > #define APIC_BASE_ENABLE (_AC(1, ULL) << 11) > #define APIC_BASE_ADDR_MASK 0x000ffffffffff000ULL > > +#define MSR_BARRIER 0x0000002f > + > #define MSR_TEST_CTRL 0x00000033 > #define TEST_CTRL_SPLITLOCK_DETECT (_AC(1, ULL) << 29) > #define TEST_CTRL_SPLITLOCK_DISABLE (_AC(1, ULL) << 31) > --- a/xen/arch/x86/include/asm/perfc_defn.h > +++ b/xen/arch/x86/include/asm/perfc_defn.h > @@ -6,7 +6,7 @@ PERFCOUNTER_ARRAY(exceptions, > > #ifdef CONFIG_HVM > > -#define VMX_PERF_EXIT_REASON_SIZE 76 > +#define VMX_PERF_EXIT_REASON_SIZE 80 > #define VMEXIT_NPF_PERFC 143 > #define SVM_PERF_EXIT_REASON_SIZE (VMEXIT_NPF_PERFC + 1) > PERFCOUNTER_ARRAY(vmexits, "vmexits", > --- a/xen/arch/x86/msr.c > +++ b/xen/arch/x86/msr.c > @@ -223,6 +223,12 @@ int guest_rdmsr(struct vcpu *v, uint32_t > case MSR_AMD_PPIN: > goto gp_fault; > > + case MSR_BARRIER: > + if ( !cp->feat.msrlist ) > + goto gp_fault; > + *val = 0; > + break; > + > case MSR_IA32_FEATURE_CONTROL: > /* > * Architecturally, availability of this MSR is enumerated by the > @@ -493,6 +499,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t > uint64_t rsvd; > > /* Read-only */ > + case MSR_BARRIER: > case MSR_IA32_PLATFORM_ID: > case MSR_CORE_CAPABILITIES: > case MSR_INTEL_CORE_THREAD_COUNT: > --- a/xen/arch/x86/x86_emulate/0f01.c > +++ b/xen/arch/x86/x86_emulate/0f01.c > @@ -40,6 +40,7 @@ int x86emul_0f01(struct x86_emulate_stat > switch ( s->modrm ) > { > unsigned long base, limit, cr0, cr0w, cr4; > + unsigned int n; > struct segment_register sreg; > uint64_t msr_val; > > @@ -54,6 +55,56 @@ int x86emul_0f01(struct x86_emulate_stat > ((uint64_t)regs->r(dx) << 32) | regs->eax, > ctxt); > goto done; > + > + case vex_f3: /* wrmsrlist */ > + vcpu_must_have(msrlist); > + generate_exception_if(!mode_64bit(), X86_EXC_UD); > + generate_exception_if(!mode_ring0() || (regs->r(si) & 7) || > + (regs->r(di) & 7), > + X86_EXC_GP, 0); > + fail_if(!ops->write_msr); > + while ( regs->r(cx) ) > + { > + n = __builtin_ffsl(regs->r(cx)) - 1; > + if ( (rc = ops->read(x86_seg_none, regs->r(si) + n * 8, > + &msr_val, 8, ctxt)) != X86EMUL_OKAY ) > + break; > + generate_exception_if(msr_val != (uint32_t)msr_val, > + X86_EXC_GP, 0); > + base = msr_val; > + if ( (rc = ops->read(x86_seg_none, regs->r(di) + n * 8, > + &msr_val, 8, ctxt)) != X86EMUL_OKAY || > + (rc = ops->write_msr(base, msr_val, ctxt)) != > X86EMUL_OKAY ) > + break; > + regs->r(cx) &= ~(1UL << n); > + } > + goto done; > + > + case vex_f2: /* rdmsrlist */ > + vcpu_must_have(msrlist); > + generate_exception_if(!mode_64bit(), X86_EXC_UD); > + generate_exception_if(!mode_ring0() || (regs->r(si) & 7) || > + (regs->r(di) & 7), > + X86_EXC_GP, 0); > + fail_if(!ops->read_msr || !ops->write); > + while ( regs->r(cx) ) > + { > + n = __builtin_ffsl(regs->r(cx)) - 1; > + if ( (rc = ops->read(x86_seg_none, regs->r(si) + n * 8, > + &msr_val, 8, ctxt)) != X86EMUL_OKAY ) > + break; > + generate_exception_if(msr_val != (uint32_t)msr_val, > + X86_EXC_GP, 0); > + if ( (rc = ops->read_msr(msr_val, &msr_val, > + ctxt)) != X86EMUL_OKAY || > + (rc = ops->write(x86_seg_none, regs->r(di) + n * 8, > + &msr_val, 8, ctxt)) != X86EMUL_OKAY ) > + break; > + regs->r(cx) &= ~(1UL << n); > + } > + if ( rc != X86EMUL_OKAY ) > + ctxt->regs->r(cx) = regs->r(cx); > + goto done; > } > generate_exception(X86_EXC_UD); > > --- a/xen/arch/x86/x86_emulate/private.h > +++ b/xen/arch/x86/x86_emulate/private.h > @@ -600,6 +600,7 @@ amd_like(const struct x86_emulate_ctxt * > #define vcpu_has_lkgs() (ctxt->cpuid->feat.lkgs) > #define vcpu_has_wrmsrns() (ctxt->cpuid->feat.wrmsrns) > #define vcpu_has_avx_ifma() (ctxt->cpuid->feat.avx_ifma) > +#define vcpu_has_msrlist() (ctxt->cpuid->feat.msrlist) > #define vcpu_has_avx_vnni_int8() (ctxt->cpuid->feat.avx_vnni_int8) > #define vcpu_has_avx_ne_convert() (ctxt->cpuid->feat.avx_ne_convert) > > --- a/xen/arch/x86/x86_emulate/util.c > +++ b/xen/arch/x86/x86_emulate/util.c > @@ -112,6 +112,9 @@ bool cf_check x86_insn_is_mem_access(con > break; > > case X86EMUL_OPC(0x0f, 0x01): > + /* {RD,WR}MSRLIST */ > + if ( mode_64bit() && s->modrm == 0xc6 ) > + return s->vex.pfx >= vex_f3; > /* Cover CLZERO. */ > return (s->modrm_rm & 7) == 4 && (s->modrm_reg & 7) == 7; > } > @@ -172,7 +175,11 @@ bool cf_check x86_insn_is_mem_write(cons > case 0xff: /* Grp5 */ > break; > > - case X86EMUL_OPC(0x0f, 0x01): /* CLZERO is the odd one. */ > + case X86EMUL_OPC(0x0f, 0x01): > + /* RDMSRLIST */ > + if ( mode_64bit() && s->modrm == 0xc6 ) > + return s->vex.pfx == vex_f2; > + /* CLZERO is another odd one. */ > return (s->modrm_rm & 7) == 4 && (s->modrm_reg & 7) == 7; > > default: > --- a/xen/include/public/arch-x86/cpufeatureset.h > +++ b/xen/include/public/arch-x86/cpufeatureset.h > @@ -286,6 +286,7 @@ XEN_CPUFEATURE(FRED, 10*32+17) / > XEN_CPUFEATURE(LKGS, 10*32+18) /*S Load Kernel GS Base */ > XEN_CPUFEATURE(WRMSRNS, 10*32+19) /*A WRMSR Non-Serialising */ > XEN_CPUFEATURE(AVX_IFMA, 10*32+23) /*A AVX-IFMA Instructions */ > +XEN_CPUFEATURE(MSRLIST, 10*32+27) /* MSR list instructions */ > > /* AMD-defined CPU features, CPUID level 0x80000021.eax, word 11 */ > XEN_CPUFEATURE(LFENCE_DISPATCH, 11*32+ 2) /*A LFENCE always serializing > */ > >
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |