[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH v1 13/15] x86/msr: Use the alternatives mechanism to read MSR



Also add support for the immediate form MSR read support.

Suggested-by: H. Peter Anvin (Intel) <hpa@xxxxxxxxx>
Signed-off-by: Xin Li (Intel) <xin@xxxxxxxxx>
---
 arch/x86/include/asm/msr.h            | 274 ++++++++++++++++++++------
 arch/x86/include/asm/paravirt.h       |  40 ----
 arch/x86/include/asm/paravirt_types.h |   9 -
 arch/x86/kernel/paravirt.c            |   2 -
 arch/x86/xen/enlighten_pv.c           |  45 ++---
 arch/x86/xen/xen-asm.S                |  34 ++++
 arch/x86/xen/xen-ops.h                |   7 +
 7 files changed, 276 insertions(+), 135 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 066cde11254a..fc93c2601853 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -74,6 +74,8 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, 
int failed) {}
 #endif
 
 #ifdef CONFIG_CC_IS_GCC
+#define ASM_RDMSR_IMM                  \
+       " .insn VEX.128.F2.M7.W0 0xf6 /0, %[msr]%{:u32}, %[val]\n\t"
 #define ASM_WRMSRNS_IMM                        \
        " .insn VEX.128.F3.M7.W0 0xf6 /0, %[val], %[msr]%{:u32}\n\t"
 #endif
@@ -85,10 +87,17 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 
val, int failed) {}
  * The register operand is encoded as %rax because all uses of the immediate
  * form MSR access instructions reference %rax as the register operand.
  */
+#define ASM_RDMSR_IMM                  \
+       " .byte 0xc4,0xe7,0x7b,0xf6,0xc0; .long %c[msr]"
 #define ASM_WRMSRNS_IMM                        \
        " .byte 0xc4,0xe7,0x7a,0xf6,0xc0; .long %c[msr]"
 #endif
 
+#define RDMSR_AND_SAVE_RESULT          \
+       "rdmsr\n\t"                     \
+       "shl $0x20, %%rdx\n\t"          \
+       "or %%rdx, %[val]\n\t"
+
 #define PREPARE_RDX_FOR_WRMSR          \
        "mov %%rax, %%rdx\n\t"          \
        "shr $0x20, %%rdx\n\t"
@@ -142,6 +151,7 @@ static __always_inline enum pv_msr_action 
get_pv_msr_action(const u32 msr)
        }
 }
 
+extern void asm_xen_read_msr(void);
 extern void asm_xen_write_msr(void);
 #else
 static __always_inline enum pv_msr_action get_pv_msr_action(const u32 msr)
@@ -150,35 +160,95 @@ static __always_inline enum pv_msr_action 
get_pv_msr_action(const u32 msr)
 }
 #endif
 
-static __always_inline unsigned long long __rdmsr(unsigned int msr)
+static __always_inline bool __native_rdmsr_variable(const u32 msr, u64 *val, 
const int type)
 {
-       DECLARE_ARGS(val, low, high);
+#ifdef CONFIG_X86_64
+       BUILD_BUG_ON(__builtin_constant_p(msr));
 
-       asm volatile("1: rdmsr\n"
-                    "2:\n"
-                    _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
-                    : EAX_EDX_RET(val, low, high) : "c" (msr));
+       asm_inline volatile goto(
+               "1:\n"
+               RDMSR_AND_SAVE_RESULT
+               _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])     /* For RDMSR */
 
-       return EAX_EDX_VAL(val, low, high);
+               : [val] "=a" (*val)
+               : "c" (msr), [type] "i" (type)
+               : "memory", "rdx"
+               : badmsr);
+#else
+       asm_inline volatile goto(
+               "1: rdmsr\n\t"
+               _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])     /* For RDMSR */
+
+               : "=A" (*val)
+               : "c" (msr), [type] "i" (type)
+               : "memory"
+               : badmsr);
+#endif
+
+       return false;
+
+badmsr:
+       return true;
+}
+
+#ifdef CONFIG_X86_64
+static __always_inline bool __native_rdmsr_constant(const u32 msr, u64 *val, 
const int type)
+{
+       BUILD_BUG_ON(!__builtin_constant_p(msr));
+
+       asm_inline volatile goto(
+               "1:\n"
+               ALTERNATIVE("mov %[msr], %%ecx\n\t"
+                           "2:\n"
+                           RDMSR_AND_SAVE_RESULT,
+                           ASM_RDMSR_IMM,
+                           X86_FEATURE_MSR_IMM)
+               _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])     /* For RDMSR 
immediate */
+               _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type])     /* For RDMSR */
+
+               : [val] "=a" (*val)
+               : [msr] "i" (msr), [type] "i" (type)
+               : "memory", "ecx", "rdx"
+               : badmsr);
+
+       return false;
+
+badmsr:
+       return true;
+}
+#endif
+
+static __always_inline bool __native_rdmsr(const u32 msr, u64 *val, const int 
type)
+{
+#ifdef CONFIG_X86_64
+       if (__builtin_constant_p(msr))
+               return __native_rdmsr_constant(msr, val, type);
+#endif
+
+       return __native_rdmsr_variable(msr, val, type);
 }
 
-#define native_rdmsr(msr, val1, val2)                  \
+#define native_rdmsr(msr, low, high)                   \
 do {                                                   \
-       u64 __val = __rdmsr((msr));                     \
-       (void)((val1) = (u32)__val);                    \
-       (void)((val2) = (u32)(__val >> 32));            \
+       u64 __val = 0;                                  \
+       __native_rdmsr((msr), &__val, EX_TYPE_RDMSR);   \
+       (void)((low) = (u32)__val);                     \
+       (void)((high) = (u32)(__val >> 32));            \
 } while (0)
 
 static __always_inline u64 native_rdmsrl(const u32 msr)
 {
-       return __rdmsr(msr);
+       u64 val = 0;
+
+       __native_rdmsr(msr, &val, EX_TYPE_RDMSR);
+       return val;
 }
 
-static inline unsigned long long native_read_msr(unsigned int msr)
+static inline u64 native_read_msr(const u32 msr)
 {
-       unsigned long long val;
+       u64 val = 0;
 
-       val = __rdmsr(msr);
+       __native_rdmsr(msr, &val, EX_TYPE_RDMSR);
 
        if (tracepoint_enabled(read_msr))
                do_trace_read_msr(msr, val, 0);
@@ -186,19 +256,139 @@ static inline unsigned long long 
native_read_msr(unsigned int msr)
        return val;
 }
 
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
-                                                     int *err)
+static inline u64 native_read_msr_safe(const u32 msr, int *err)
 {
-       DECLARE_ARGS(val, low, high);
+       u64 val = 0;
+
+       *err = __native_rdmsr(msr, &val, EX_TYPE_RDMSR_SAFE) ? -EIO : 0;
 
-       asm volatile("1: rdmsr ; xor %[err],%[err]\n"
-                    "2:\n\t"
-                    _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
-                    : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
-                    : "c" (msr));
        if (tracepoint_enabled(read_msr))
-               do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
-       return EAX_EDX_VAL(val, low, high);
+               do_trace_read_msr(msr, val, *err);
+
+       return val;
+}
+
+static __always_inline bool __rdmsr_variable(const u32 msr, u64 *val, const 
int type)
+{
+#ifdef CONFIG_X86_64
+       BUILD_BUG_ON(__builtin_constant_p(msr));
+
+       asm_inline volatile goto(
+               "1:\n"
+               ALTERNATIVE(RDMSR_AND_SAVE_RESULT,
+                           "call asm_xen_read_msr\n\t",
+                           X86_FEATURE_XENPV)
+               _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])     /* For RDMSR 
and CALL */
+
+               : [val] "=a" (*val), ASM_CALL_CONSTRAINT
+               : "c" (msr), [type] "i" (type)
+               : "memory", "rdx"
+               : badmsr);
+
+       return false;
+
+badmsr:
+       return true;
+#else
+       return __native_rdmsr_variable(msr, val, type);
+#endif
+}
+
+static __always_inline bool __rdmsr_variable_all(const u32 msr, u64 *val, 
const int type)
+{
+       const enum pv_msr_action action = get_pv_msr_action(msr);
+
+       if (action == PV_MSR_PV) {
+               return __rdmsr_variable(msr, val, type);
+       } else if (action == PV_MSR_IGNORE) {
+               if (cpu_feature_enabled(X86_FEATURE_XENPV))
+                       return false;
+       }
+
+       return __native_rdmsr_variable(msr, val, type);
+}
+
+#ifdef CONFIG_X86_64
+static __always_inline bool __rdmsr_constant(const u32 msr, u64 *val, const 
int type)
+{
+       BUILD_BUG_ON(!__builtin_constant_p(msr));
+
+       asm_inline volatile goto(
+               "1:\n"
+               ALTERNATIVE_2("mov %[msr], %%ecx\n\t"
+                             "2:\n"
+                             RDMSR_AND_SAVE_RESULT,
+                             ASM_RDMSR_IMM,
+                             X86_FEATURE_MSR_IMM,
+                             "mov %[msr], %%ecx\n\t"
+                             "call asm_xen_read_msr\n\t",
+                             X86_FEATURE_XENPV)
+               _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])     /* For RDMSR 
immediate */
+               _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type])     /* For RDMSR 
and CALL */
+
+               : [val] "=a" (*val), ASM_CALL_CONSTRAINT
+               : [msr] "i" (msr), [type] "i" (type)
+               : "memory", "ecx", "rdx"
+               : badmsr);
+
+       return false;
+
+badmsr:
+       return true;
+}
+
+static __always_inline bool __rdmsr_constant_all(const u32 msr, u64 *val, 
const int type)
+{
+       const enum pv_msr_action action = get_pv_msr_action(msr);
+
+       if (action == PV_MSR_PV) {
+               return __rdmsr_constant(msr, val, type);
+       } else if (action == PV_MSR_IGNORE) {
+               if (cpu_feature_enabled(X86_FEATURE_XENPV))
+                       return false;
+       }
+
+       return __native_rdmsr_constant(msr, val, type);
+}
+#endif
+
+static __always_inline bool __rdmsr(const u32 msr, u64 *val, const int type)
+{
+#ifdef CONFIG_X86_64
+       if (__builtin_constant_p(msr))
+               return __rdmsr_constant_all(msr, val, type);
+#endif
+
+       return __rdmsr_variable_all(msr, val, type);
+}
+
+#define rdmsr(msr, low, high)                          \
+do {                                                   \
+       u64 __val = 0;                                  \
+       __rdmsr((msr), &__val, EX_TYPE_RDMSR);          \
+       (void)((low) = (u32)__val);                     \
+       (void)((high) = (u32)(__val >> 32));            \
+} while (0)
+
+#define rdmsrl(msr, val)                               \
+do {                                                   \
+       u64 __val = 0;                                  \
+       __rdmsr((msr), &__val, EX_TYPE_RDMSR);          \
+       (val) = __val;                                  \
+} while (0)
+
+#define rdmsr_safe(msr, low, high)                                             
\
+({                                                                             
\
+       u64 __val = 0;                                                          
\
+       int __err = __rdmsr((msr), &__val, EX_TYPE_RDMSR_SAFE) ? -EIO : 0;      
\
+       (*low) = (u32)__val;                                                    
\
+       (*high) = (u32)(__val >> 32);                                           
\
+       __err;                                                                  
\
+})
+
+static __always_inline int rdmsrl_safe(const u32 msr, u64 *val)
+{
+       return __rdmsr(msr, val, EX_TYPE_RDMSR_SAFE) ? -EIO : 0;
 }
 
 /*
@@ -503,40 +693,6 @@ static inline unsigned long long native_read_pmc(int 
counter)
  * Note: the rd* operations modify the parameters directly (without using
  * pointer indirection), this allows gcc to optimize better
  */
-
-#define rdmsr(msr, low, high)                                  \
-do {                                                           \
-       u64 __val = native_read_msr((msr));                     \
-       (void)((low) = (u32)__val);                             \
-       (void)((high) = (u32)(__val >> 32));                    \
-} while (0)
-
-#define rdmsrl(msr, val)                       \
-       ((val) = native_read_msr((msr)))
-
-static inline void wrmsrl(u32 msr, u64 val)
-{
-       native_write_msr(msr, val);
-}
-
-/* rdmsr with exception handling */
-#define rdmsr_safe(msr, low, high)                             \
-({                                                             \
-       int __err;                                              \
-       u64 __val = native_read_msr_safe((msr), &__err);        \
-       (*low) = (u32)__val;                                    \
-       (*high) = (u32)(__val >> 32);                           \
-       __err;                                                  \
-})
-
-static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
-{
-       int err;
-
-       *p = native_read_msr_safe(msr, &err);
-       return err;
-}
-
 #define rdpmc(counter, low, high)                      \
 do {                                                   \
        u64 _l = native_read_pmc((counter));            \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 351feb890ab0..7ddb71ed9d0c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -175,46 +175,6 @@ static inline void __write_cr4(unsigned long x)
        PVOP_VCALL1(cpu.write_cr4, x);
 }
 
-static inline u64 paravirt_read_msr(unsigned msr)
-{
-       return PVOP_CALL1(u64, cpu.read_msr, msr);
-}
-
-static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
-{
-       return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
-}
-
-#define rdmsr(msr, val1, val2)                 \
-do {                                           \
-       u64 _l = paravirt_read_msr(msr);        \
-       val1 = (u32)_l;                         \
-       val2 = _l >> 32;                        \
-} while (0)
-
-#define rdmsrl(msr, val)                       \
-do {                                           \
-       val = paravirt_read_msr(msr);           \
-} while (0)
-
-/* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b)                          \
-({                                                     \
-       int _err;                                       \
-       u64 _l = paravirt_read_msr_safe(msr, &_err);    \
-       (*a) = (u32)_l;                                 \
-       (*b) = _l >> 32;                                \
-       _err;                                           \
-})
-
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
-{
-       int err;
-
-       *p = paravirt_read_msr_safe(msr, &err);
-       return err;
-}
-
 static inline unsigned long long paravirt_read_pmc(int counter)
 {
        return PVOP_CALL1(u64, cpu.read_pmc, counter);
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 8a563576d70e..5c57e6e4115f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -90,15 +90,6 @@ struct pv_cpu_ops {
        void (*cpuid)(unsigned int *eax, unsigned int *ebx,
                      unsigned int *ecx, unsigned int *edx);
 
-       /* Unsafe MSR operations.  These will warn or panic on failure. */
-       u64 (*read_msr)(unsigned int msr);
-
-       /*
-        * Safe MSR operations.
-        * read sets err to 0 or -EIO.  write returns 0 or -EIO.
-        */
-       u64 (*read_msr_safe)(unsigned int msr, int *err);
-
        u64 (*read_pmc)(int counter);
 
        void (*start_context_switch)(struct task_struct *prev);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ffb04445f97e..e3d4f9869779 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -128,8 +128,6 @@ struct paravirt_patch_template pv_ops = {
        .cpu.read_cr0           = native_read_cr0,
        .cpu.write_cr0          = native_write_cr0,
        .cpu.write_cr4          = native_write_cr4,
-       .cpu.read_msr           = native_read_msr,
-       .cpu.read_msr_safe      = native_read_msr_safe,
        .cpu.read_pmc           = native_read_pmc,
        .cpu.load_tr_desc       = native_load_tr_desc,
        .cpu.set_ldt            = native_set_ldt,
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d02f55bfa869..e49f16278487 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1087,19 +1087,26 @@ static void xen_write_cr4(unsigned long cr4)
        native_write_cr4(cr4);
 }
 
-static u64 xen_do_read_msr(unsigned int msr, int *err)
+/*
+ * Return true in xen_rdmsr_ret_type to indicate the requested MSR read has
+ * been done successfully.
+ */
+struct xen_rdmsr_ret_type xen_do_read_msr(u32 msr)
 {
-       u64 val = 0;    /* Avoid uninitialized value for safe variant. */
-       bool emulated;
+       struct xen_rdmsr_ret_type ret;
 
-       if (pmu_msr_chk_emulated(msr, &val, true, &emulated) && emulated)
-               return val;
+       ret.done = true;
 
-       if (err)
-               val = native_read_msr_safe(msr, err);
-       else
-               val = native_read_msr(msr);
+       if (pmu_msr_chk_emulated(msr, &ret.val, true, &ret.done) && ret.done)
+               return ret;
+
+       ret.val = 0;
+       ret.done = false;
+       return ret;
+}
 
+u64 xen_do_read_msr_fixup(u32 msr, u64 val)
+{
        switch (msr) {
        case MSR_IA32_APICBASE:
                val &= ~X2APIC_ENABLE;
@@ -1108,7 +1115,11 @@ static u64 xen_do_read_msr(unsigned int msr, int *err)
                else
                        val &= ~MSR_IA32_APICBASE_BSP;
                break;
+
+       default:
+               break;
        }
+
        return val;
 }
 
@@ -1160,18 +1171,6 @@ bool xen_do_write_msr(u32 msr, u64 val)
        }
 }
 
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
-{
-       return xen_do_read_msr(msr, err);
-}
-
-static u64 xen_read_msr(unsigned int msr)
-{
-       int err;
-
-       return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
-}
-
 /* This is called once we have the cpu_possible_mask */
 void __init xen_setup_vcpu_info_placement(void)
 {
@@ -1206,10 +1205,6 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = {
 
                .write_cr4 = xen_write_cr4,
 
-               .read_msr = xen_read_msr,
-
-               .read_msr_safe = xen_read_msr_safe,
-
                .read_pmc = xen_read_pmc,
 
                .load_tr_desc = paravirt_nop,
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index e672632b1cc0..6e7a9daa03d4 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -399,3 +399,37 @@ SYM_CODE_END(xen_entry_SYSCALL_compat)
        RET
 SYM_FUNC_END(asm_xen_write_msr)
 EXPORT_SYMBOL_GPL(asm_xen_write_msr)
+
+/*
+ * The prototype of the Xen C code:
+ *     struct { u64 val, bool done } xen_do_read_msr(u32 msr)
+ */
+SYM_FUNC_START(asm_xen_read_msr)
+       ENDBR
+       FRAME_BEGIN
+       XEN_SAVE_CALLEE_REGS_FOR_MSR
+       mov %ecx, %edi          /* MSR number */
+       call xen_do_read_msr
+       test %dl, %dl           /* %dl=1, i.e., ZF=0, meaning successfully done 
*/
+       XEN_RESTORE_CALLEE_REGS_FOR_MSR
+       jnz 2f
+
+1:     rdmsr
+       _ASM_EXTABLE_FUNC_REWIND(1b, -5, FRAME_OFFSET / (BITS_PER_LONG / 8))
+       shl $0x20, %rdx
+       or %rdx, %rax
+       /*
+        * The top of the stack points directly at the return address;
+        * back up by 5 bytes from the return address.
+        */
+
+       XEN_SAVE_CALLEE_REGS_FOR_MSR
+       mov %ecx, %edi
+       mov %rax, %rsi
+       call xen_do_read_msr_fixup
+       XEN_RESTORE_CALLEE_REGS_FOR_MSR
+
+2:     FRAME_END
+       RET
+SYM_FUNC_END(asm_xen_read_msr)
+EXPORT_SYMBOL_GPL(asm_xen_read_msr)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index fc3c55871037..46efeaa4bbd3 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -146,7 +146,14 @@ __visible unsigned long xen_read_cr2_direct(void);
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
 
+struct xen_rdmsr_ret_type {
+       u64 val;
+       bool done;
+};
+
 extern bool xen_do_write_msr(u32 msr, u64 val);
+extern struct xen_rdmsr_ret_type xen_do_read_msr(u32 msr);
+extern u64 xen_do_read_msr_fixup(u32 msr, u64 val);
 
 extern int xen_panic_handler_init(void);
 
-- 
2.49.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.