|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/3] x86/emul: Optimise decode_register() somewhat
The positions of GPRs inside struct cpu_user_regs doesn't follow any
particular order, so as compiled, decode_register() becomes a jump table to 16
blocks which calculate the appropriate offset, at a total of 207 bytes.
Instead, pre-compute the offsets at build time and use pointer arithmetic to
calculate the result. The resulting function is far more reasonable:
test %edx,%edx
lea 0xbfb97(%rip),%rax # <cpu_user_regs_high_gpr_offsets>
lea 0xbfba0(%rip),%rdx # <cpu_user_regs_gpr_offsets>
cmove %rdx,%rax
and $0xf,%edi
movzbl (%rax,%rdi,1),%eax
add %rsi,%rax
retq
and by observation, most callers in x86_emulate() inline and
constant-propagate the highbyte_regs value of 0 to drop the test, one lea and
the cmove.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
---
xen/arch/x86/x86_emulate/x86_emulate.c | 82 ++++++++++++++++++++++++----------
1 file changed, 58 insertions(+), 24 deletions(-)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
b/xen/arch/x86/x86_emulate/x86_emulate.c
index ff0a003..3f5636f 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -396,6 +396,51 @@ static const struct {
/* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
+/*
+ * Map GPRs by ModRM encoding to their offset within struct cpu_user_regs.
+ * The AH,CH,DH,BH offsets are misaligned.
+ */
+static const uint8_t cpu_user_regs_gpr_offsets[] = {
+ offsetof(struct cpu_user_regs, r(ax)),
+ offsetof(struct cpu_user_regs, r(cx)),
+ offsetof(struct cpu_user_regs, r(dx)),
+ offsetof(struct cpu_user_regs, r(bx)),
+ offsetof(struct cpu_user_regs, r(sp)),
+ offsetof(struct cpu_user_regs, r(bp)),
+ offsetof(struct cpu_user_regs, r(si)),
+ offsetof(struct cpu_user_regs, r(di)),
+#if defined(__x86_64__)
+ offsetof(struct cpu_user_regs, r8),
+ offsetof(struct cpu_user_regs, r9),
+ offsetof(struct cpu_user_regs, r10),
+ offsetof(struct cpu_user_regs, r11),
+ offsetof(struct cpu_user_regs, r12),
+ offsetof(struct cpu_user_regs, r13),
+ offsetof(struct cpu_user_regs, r14),
+ offsetof(struct cpu_user_regs, r15),
+#endif
+};
+static const uint8_t cpu_user_regs_high_gpr_offsets[] = {
+ offsetof(struct cpu_user_regs, r(ax)),
+ offsetof(struct cpu_user_regs, r(cx)),
+ offsetof(struct cpu_user_regs, r(dx)),
+ offsetof(struct cpu_user_regs, r(bx)),
+ offsetof(struct cpu_user_regs, ah),
+ offsetof(struct cpu_user_regs, ch),
+ offsetof(struct cpu_user_regs, dh),
+ offsetof(struct cpu_user_regs, bh),
+#if defined(__x86_64__)
+ offsetof(struct cpu_user_regs, r8),
+ offsetof(struct cpu_user_regs, r9),
+ offsetof(struct cpu_user_regs, r10),
+ offsetof(struct cpu_user_regs, r11),
+ offsetof(struct cpu_user_regs, r12),
+ offsetof(struct cpu_user_regs, r13),
+ offsetof(struct cpu_user_regs, r14),
+ offsetof(struct cpu_user_regs, r15),
+#endif
+};
+
static const struct {
uint8_t simd_size:5;
uint8_t to_mem:1;
@@ -1939,32 +1984,21 @@ void *
decode_register(
uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
{
- void *p;
+ const uint8_t *offsets = highbyte_regs ? cpu_user_regs_high_gpr_offsets
+ : cpu_user_regs_gpr_offsets;
- switch ( modrm_reg )
- {
- case 0: p = ®s->r(ax); break;
- case 1: p = ®s->r(cx); break;
- case 2: p = ®s->r(dx); break;
- case 3: p = ®s->r(bx); break;
- case 4: p = (highbyte_regs ? ®s->ah : (void *)®s->r(sp)); break;
- case 5: p = (highbyte_regs ? ®s->ch : (void *)®s->r(bp)); break;
- case 6: p = (highbyte_regs ? ®s->dh : (void *)®s->r(si)); break;
- case 7: p = (highbyte_regs ? ®s->bh : (void *)®s->r(di)); break;
-#if defined(__x86_64__)
- case 8: p = ®s->r8; break;
- case 9: p = ®s->r9; break;
- case 10: p = ®s->r10; break;
- case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
-#endif
- default: BUG(); p = NULL; break;
- }
+ /* Check that the arrays are the same size, and a power of two. */
+ BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) !=
+ ARRAY_SIZE(cpu_user_regs_high_gpr_offsets));
+ BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) &
+ (ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1));
+
+ ASSERT(modrm_reg < ARRAY_SIZE(cpu_user_regs_gpr_offsets));
+
+ /* For safety in release builds. Debug builds will hit the ASSERT() */
+ modrm_reg &= ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1;
- return p;
+ return (void *)regs + offsets[modrm_reg];
}
static void *decode_vex_gpr(unsigned int vex_reg, struct cpu_user_regs *regs,
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |