|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Mon, Feb 12, 2018 at 11:23:05AM +0000, Andrew Cooper wrote:
> The correct amount of padding in an origin patch site can be calculated
> automatically, based on the relative lengths of the replacements.
>
> This requires a bit of trickery to calculate correctly, especially in the
> ALTENRATIVE_2 case where a branchless max() calculation in needed. The
> calculation is further complicated because GAS's idea of true is -1 rather
> than 1, which is why the extra negations are required.
>
> Additionally, have apply_alternatives() attempt to optimise the padding nops.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
LGTM, just a couple of nits:
Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> CC: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
> xen/arch/x86/alternative.c | 32 ++++++++++++++++++++++++----
> xen/include/asm-x86/alternative-asm.h | 40
> +++++++++++++++++++++++++++--------
> xen/include/asm-x86/alternative.h | 39 ++++++++++++++++++++++++++--------
> 3 files changed, 89 insertions(+), 22 deletions(-)
>
> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
> index f8ddab5..ec87ff4 100644
> --- a/xen/arch/x86/alternative.c
> +++ b/xen/arch/x86/alternative.c
> @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct
> alt_instr *start,
> uint8_t *orig = ALT_ORIG_PTR(a);
> uint8_t *repl = ALT_REPL_PTR(a);
> uint8_t buf[MAX_PATCH_LEN];
> + unsigned int total_len = a->orig_len + a->pad_len;
>
> - BUG_ON(a->repl_len > a->orig_len);
> - BUG_ON(a->orig_len > sizeof(buf));
> + BUG_ON(a->repl_len > total_len);
> + BUG_ON(total_len > sizeof(buf));
> BUG_ON(a->cpuid >= NCAPINTS * 32);
>
> if ( !boot_cpu_has(a->cpuid) )
> + {
> + unsigned int i;
> +
> + /* No replacement to make, but try to optimise any padding. */
> + if ( a->pad_len <= 1 )
> + continue;
> +
> + /* Search the padding area for any byte which isn't a nop. */
> + for ( i = a->orig_len; i < total_len; ++i )
> + if ( orig[i] != 0x90 )
Maybe better to compare against ASM_NOP1?
> + break;
> +
> + /*
> + * Only make any changes if all padding bytes are unoptimised
> + * nops. With multiple alternatives over the same origin site,
> we
> + * may have already made a replacement, or optimised the nops.
> + */
> + if ( i != total_len )
> + continue;
> +
> + add_nops(buf, a->pad_len);
> + text_poke(orig + a->orig_len, buf, a->pad_len);
> continue;
> + }
>
> memcpy(buf, repl, a->repl_len);
>
> @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct
> alt_instr *start,
> if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
> *(s32 *)(buf + 1) += repl - orig;
>
> - add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
> - text_poke(orig, buf, a->orig_len);
> + add_nops(buf + a->repl_len, total_len - a->repl_len);
> + text_poke(orig, buf, total_len);
> }
> }
>
> diff --git a/xen/include/asm-x86/alternative-asm.h
> b/xen/include/asm-x86/alternative-asm.h
> index 150bd1a..f7e37cb 100644
> --- a/xen/include/asm-x86/alternative-asm.h
> +++ b/xen/include/asm-x86/alternative-asm.h
> @@ -9,30 +9,41 @@
> * enough information for the alternatives patching code to patch an
> * instruction. See apply_alternatives().
> */
> -.macro altinstruction_entry orig repl feature orig_len repl_len
> +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
> .long \orig - .
> .long \repl - .
> .word \feature
> .byte \orig_len
> .byte \repl_len
> + .byte \pad_len
> .endm
>
> #define orig_len (.L\@_orig_e - .L\@_orig_s)
> +#define pad_len (.L\@_orig_p - .L\@_orig_e)
> +#define total_len (.L\@_orig_p - .L\@_orig_s)
> #define repl_len(nr) (.L\@_repl_e\()nr - .L\@_repl_s\()nr)
> #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
> +#define gas_max(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
That seems to work fine at least on newish versions of clang, so I'm
not sure the g prefix is required (as_max).
>
> .macro ALTERNATIVE oldinstr, newinstr, feature
> .L\@_orig_s:
> \oldinstr
> .L\@_orig_e:
> + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90
> +.L\@_orig_p:
>
> .pushsection .altinstructions, "a", @progbits
> altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \
> - orig_len, repl_len(1)
> + orig_len, repl_len(1), pad_len
>
> .section .discard, "a", @progbits
> - /* Assembler-time check that \newinstr isn't longer than \oldinstr. */
> - .byte 0xff + repl_len(1) - orig_len
> + /*
> + * Assembler-time checks:
> + * - total_len <= 255
> + * - \newinstr <= total_len
> + */
> + .byte total_len
> + .byte 0xff + repl_len(1) - total_len
>
> .section .altinstr_replacement, "ax", @progbits
>
> @@ -45,18 +56,26 @@
> .L\@_orig_s:
> \oldinstr
> .L\@_orig_e:
> + .skip (-((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \
> + (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90
> +.L\@_orig_p:
>
> .pushsection .altinstructions, "a", @progbits
>
> altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature1, \
> - orig_len, repl_len(1)
> + orig_len, repl_len(1), pad_len
> altinstruction_entry .L\@_orig_s, .L\@_repl_s2, \feature2, \
> - orig_len, repl_len(2)
> + orig_len, repl_len(2), pad_len
>
> .section .discard, "a", @progbits
> - /* Assembler-time check that \newinstr{1,2} aren't longer than
> \oldinstr. */
> - .byte 0xff + repl_len(1) - orig_len
> - .byte 0xff + repl_len(2) - orig_len
> + /*
> + * Assembler-time checks:
> + * - total_len <= 255
> + * - \newinstr* <= total_len
> + */
> + .byte total_len
> + .byte 0xff + repl_len(1) - total_len
> + .byte 0xff + repl_len(2) - total_len
>
> .section .altinstr_replacement, "ax", @progbits
>
> @@ -66,8 +85,11 @@
> .popsection
> .endm
>
> +#undef gas_max
> #undef decl_repl
> #undef repl_len
> +#undef total_len
> +#undef pad_len
> #undef orig_len
>
> #endif /* __ASSEMBLY__ */
> diff --git a/xen/include/asm-x86/alternative.h
> b/xen/include/asm-x86/alternative.h
> index 1e5cfbd..20dea22 100644
> --- a/xen/include/asm-x86/alternative.h
> +++ b/xen/include/asm-x86/alternative.h
> @@ -8,12 +8,13 @@
> #include <xen/stringify.h>
> #include <xen/types.h>
>
> -struct alt_instr {
> +struct __packed alt_instr {
> int32_t orig_offset; /* original instruction */
> int32_t repl_offset; /* offset to replacement instruction */
> uint16_t cpuid; /* cpuid bit set for replacement */
> uint8_t orig_len; /* length of original instruction */
> - uint8_t repl_len; /* length of new instruction, <= instrlen */
> + uint8_t repl_len; /* length of new instruction */
> + uint8_t pad_len; /* length of build-time padding */
> };
>
> #define __ALT_PTR(a,f) ((u8 *)((void *)&(a)->f + (a)->f))
> @@ -26,44 +27,64 @@ extern void apply_alternatives(const struct alt_instr
> *start,
> const struct alt_instr *end);
> extern void alternative_instructions(void);
>
> -#define OLDINSTR(oldinstr) ".L%=_orig_s:\n\t" oldinstr
> "\n.L%=_orig_e:\n"
> -
> #define repl_s(num) ".L%=_repl_s"#num
> #define repl_e(num) ".L%=_repl_e"#num
>
> #define alt_orig_len "(.L%=_orig_e - .L%=_orig_s)"
> +#define alt_pad_len "(.L%=_orig_p - .L%=_orig_e)"
> +#define alt_total_len "(.L%=_orig_p - .L%=_orig_s)"
> #define alt_repl_len(num) "(" repl_e(num) " - " repl_s(num) ")"
> +#define gas_max(a, b) \
> + "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
> +
> +#define OLDINSTR_1(oldinstr, n1) \
> + ".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \
> + ".skip (-(("alt_repl_len(n1)"-"alt_orig_len") > 0) * " \
> + "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \
ASM_NOP1 instead of 0x90?
Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |