[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH 1/6] x86: Introduce x86_decode_lite()
In order to relocate all IP-relative fields in an alternative replacement block, we need to decode the instructions enough to obtain their length and any relative fields. Full x86_decode() is far too heavyweight, so introduce a minimal form which can make several simplifying assumptions. This logic is sufficient to decode all alternative blocks that exist in Xen right now. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- CC: Jan Beulich <JBeulich@xxxxxxxx> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx> --- xen/arch/x86/x86_emulate/Makefile | 1 + xen/arch/x86/x86_emulate/decode-lite.c | 245 +++++++++++++++++++++++++ xen/arch/x86/x86_emulate/private.h | 2 + xen/arch/x86/x86_emulate/x86_emulate.h | 17 ++ 4 files changed, 265 insertions(+) create mode 100644 xen/arch/x86/x86_emulate/decode-lite.c diff --git a/xen/arch/x86/x86_emulate/Makefile b/xen/arch/x86/x86_emulate/Makefile index 2e20d65d788a..f06731913d67 100644 --- a/xen/arch/x86/x86_emulate/Makefile +++ b/xen/arch/x86/x86_emulate/Makefile @@ -3,6 +3,7 @@ obj-y += 0fae.o obj-y += 0fc7.o obj-y += blk.o obj-y += decode.o +obj-y += decode-lite.o obj-$(CONFIG_HVM) += fpu.o obj-y += util.o obj-y += util-xen.o diff --git a/xen/arch/x86/x86_emulate/decode-lite.c b/xen/arch/x86/x86_emulate/decode-lite.c new file mode 100644 index 000000000000..050b0d8cf4a8 --- /dev/null +++ b/xen/arch/x86/x86_emulate/decode-lite.c @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "private.h" + +#define Imm8 (1 << 0) +#define Imm (1 << 1) +#define Branch (1 << 5) /* ... that we care about */ +/* ModRM (1 << 6) */ +#define Known (1 << 7) + +#define ALU_OPS \ + (Known|ModRM), \ + (Known|ModRM), \ + (Known|ModRM), \ + (Known|ModRM), \ + (Known|Imm8), \ + (Known|Imm) + +static const uint8_t init_or_livepatch_const onebyte[256] = { + [0x00] = ALU_OPS, /* ADD */ [0x08] = ALU_OPS, /* OR */ + [0x10] = ALU_OPS, /* ADC */ [0x18] = ALU_OPS, /* SBB */ + [0x20] = ALU_OPS, /* AND */ [0x28] = ALU_OPS, /* SUB */ + [0x30] = ALU_OPS, /* XOR */ [0x38] = ALU_OPS, /* CMP */ + + [0x50 ... 0x5f] = (Known), /* PUSH/POP %reg */ + + [0x70 ... 0x7f] = (Known|Branch|Imm8), /* Jcc disp8 */ + [0x80] = (Known|ModRM|Imm8), + [0x81] = (Known|ModRM|Imm), + + [0x83] = (Known|ModRM|Imm8), + [0x84 ... 0x8e] = (Known|ModRM), /* TEST/XCHG/MOV/MOV-SREG/LEA r/rm */ + + [0xb0 ... 0xb7] = (Known|Imm8), /* MOV $imm8, %reg */ + [0xb8 ... 0xbf] = (Known|Imm), /* MOV $imm32, %reg */ + + [0xcc] = (Known), /* INT3 */ + [0xcd] = (Known|Imm8), /* INT $imm8 */ + + [0xe8 ... 0xe9] = (Known|Branch|Imm), /* CALL/JMP disp32 */ + [0xeb] = (Known|Branch|Imm8), /* JMP disp8 */ + + [0xf1] = (Known), /* ICEBP */ + [0xf4] = (Known), /* HLT */ + [0xf5] = (Known), /* CMC */ + [0xf6 ... 0xf7] = (Known|ModRM), /* Grp3 */ + [0xf8 ... 0xfd] = (Known), /* CLC ... STD */ + [0xfe ... 0xff] = (Known|ModRM), /* Grp4 */ +}; +static const uint8_t init_or_livepatch_const twobyte[256] = { + [0x00 ... 0x01] = (Known|ModRM), /* Grp6/Grp7 */ + + [0x18 ... 0x1f] = (Known|ModRM), /* Grp16 (Hint Nop) */ + + [0x20 ... 0x23] = (Known|ModRM), /* MOV CR/DR */ + + [0x30 ... 0x34] = (Known), /* WRMSR ... RDPMC */ + [0x40 ... 0x4f] = (Known|ModRM), /* CMOVcc */ + + [0x80 ... 0x8f] = (Known|Branch|Imm), /* Jcc disp32 */ + [0x90 ... 0x9f] = (Known|ModRM), /* SETcc */ + + [0xab] = (Known|ModRM), /* BTS */ + [0xac] = (Known|ModRM|Imm8), /* SHRD $imm8 */ + [0xad ... 0xaf] = (Known|ModRM), /* SHRD %cl / Grp15 / IMUL */ + + [0xb8 ... 0xb9] = (Known|ModRM), /* POPCNT/Grp10 (UD1) */ + [0xba] = (Known|ModRM|Imm8), /* Grp8 */ + [0xbb ... 0xbf] = (Known|ModRM), /* BSR/BSF/BSR/MOVSX */ +}; + +/* + * Bare minimum x86 instruction decoder to parse the alternative replacement + * instructions and locate the IP-relative references that may need updating. + * + * These are: + * - disp8/32 from near branches + * - RIP-relative memory references + * + * The following simplifications are used: + * - All code is 64bit, and the instruction stream is safe to read. + * - The 67 prefix is not implemented, so the address size is only 64bit. + * + * Inputs: + * @ip The position to start decoding from. + * @end End of the replacement block. Exceeding this is considered an error. + * + * Returns: x86_decode_lite_t + * - On failure, length of -1. + * - On success, length > 0 and REL_TYPE_*. For REL_TYPE != NONE, rel points + * at the relative field in the instruction stream. + */ +x86_decode_lite_t init_or_livepatch x86_decode_lite(void *ip, void *end) +{ + void *start = ip, *rel = NULL; + unsigned int opc, type = REL_TYPE_NONE; + uint8_t b, d, osize = 4; + +#define OPC_TWOBYTE (1 << 8) + + /* Mutates IP, uses END. */ +#define FETCH(ty) \ + ({ \ + ty _val; \ + \ + if ( (ip + sizeof(ty)) > end ) \ + goto overrun; \ + _val = *(ty *)ip; \ + ip += sizeof(ty); \ + _val; \ + }) + + for ( ;; ) /* Prefixes */ + { + switch ( b = FETCH(uint8_t) ) + { + case 0x26: /* ES override */ + case 0x2e: /* CS override */ + case 0x36: /* DS override */ + case 0x3e: /* SS override */ + case 0x64: /* FS override */ + case 0x65: /* GS override */ + case 0xf0: /* Lock */ + case 0xf2: /* REPNE */ + case 0xf3: /* REP */ + break; + + case 0x66: /* Operand size override */ + osize = 2; + break; + + /* case 0x67: Address size override, not implemented */ + + case 0x40 ... 0x4f: /* REX */ + continue; + + default: + goto prefixes_done; + } + } + prefixes_done: + + /* Fetch the main opcode byte(s) */ + if ( b == 0x0f ) + { + b = FETCH(uint8_t); + opc = OPC_TWOBYTE | b; + + d = twobyte[b]; + } + else + { + opc = b; + d = onebyte[b]; + } + + if ( unlikely(!(d & Known)) ) + goto unknown; + + if ( d & ModRM ) + { + uint8_t modrm = FETCH(uint8_t); + uint8_t mod = modrm >> 6; + uint8_t reg = (modrm >> 3) & 7; + uint8_t rm = modrm & 7; + + /* ModRM/SIB decode */ + if ( mod == 0 && rm == 5 ) /* RIP relative */ + { + rel = ip; + type = REL_TYPE_d32; + FETCH(uint32_t); + } + else if ( mod != 3 && rm == 4 ) /* SIB */ + FETCH(uint8_t); + + if ( mod == 1 ) /* disp8 */ + FETCH(uint8_t); + else if ( mod == 2 ) /* disp32 */ + FETCH(uint32_t); + + /* ModRM based decode adjustements */ + switch ( opc ) + { + case 0xf6: /* Grp3 TEST(s) have extra Imm8 */ + if ( reg == 0 || reg == 1 ) + d |= Imm8; + break; + case 0xf7: /* Grp3 TEST(s) have extra Imm */ + if ( reg == 0 || reg == 1 ) + d |= Imm; + break; + } + } + + if ( d & Branch ) + { + /* + * Don't tolerate 66-prefixed call/jmp in alternatives. Some are + * genuinely decoded differently between Intel and AMD CPUs. + */ + if ( osize != 4 ) + goto bad_osize; + + rel = ip; + type = (d & Imm8) ? REL_TYPE_d8 : REL_TYPE_d32; + } + + if ( d & (Imm|Imm8) ) + { + if ( d & Imm8 ) + osize = 1; + + switch ( osize ) + { + case 1: FETCH(uint8_t); break; + case 2: FETCH(uint16_t); break; + case 4: FETCH(uint32_t); break; + default: goto bad_osize; + } + } + + return (x86_decode_lite_t){ ip - start, type, rel }; + + bad_osize: + printk(XENLOG_ERR "%s() Bad osize %u in %*ph\n", + __func__, osize, + (int)(unsigned long)(end - start), start); + return (x86_decode_lite_t){ -1, REL_TYPE_NONE, NULL }; + + unknown: + printk(XENLOG_ERR "%s() Unknown opcode in %*ph <%02x> %*ph\n", + __func__, + (int)(unsigned long)(ip - 1 - start), start, b, + (int)(unsigned long)(end - ip), ip); + return (x86_decode_lite_t){ -1, REL_TYPE_NONE, NULL }; + + overrun: + printk(XENLOG_ERR "%s() Decode overrun, got %*ph\n", + __func__, + (int)(unsigned long)(end - start), start); + return (x86_decode_lite_t){ -1, REL_TYPE_NONE, NULL }; + +#undef FETCH +} diff --git a/xen/arch/x86/x86_emulate/private.h b/xen/arch/x86/x86_emulate/private.h index 0fa26ba00aa5..792b04889ce6 100644 --- a/xen/arch/x86/x86_emulate/private.h +++ b/xen/arch/x86/x86_emulate/private.h @@ -9,7 +9,9 @@ #ifdef __XEN__ # include <xen/bug.h> +# include <xen/init.h> # include <xen/kernel.h> +# include <xen/livepatch.h> # include <asm/endbr.h> # include <asm/msr-index.h> # include <asm/x86-vendors.h> diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index d92be69d84d9..b7cbcd8b70ba 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -842,4 +842,21 @@ static inline void x86_emul_reset_event(struct x86_emulate_ctxt *ctxt) ctxt->event = (struct x86_event){}; } +/* + * x86_decode_lite(). Very minimal decoder for managing alternatives. + * + * @len is -1 on error, or positive on success. If the instruction has a + * relative field, REL_TYPE_* gives the size, and @rel points at the field. + */ +typedef struct { + int8_t len; + uint8_t rel_type; +#define REL_TYPE_NONE 0 +#define REL_TYPE_d8 1 +#define REL_TYPE_d32 2 + void *rel; +} x86_decode_lite_t; + +x86_decode_lite_t x86_decode_lite(void *ip, void *end); + #endif /* __X86_EMULATE_H__ */ -- 2.30.2
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |