[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH RFC 1/2] x86/alternatives: allow replacement code snippets to be merged
Prior to b11380f6cd58 ("x86/spec-ctrl: Build with BRANCH_HARDEN lfences by default") LFENCE was the dominant entry. But even without it there's still quite a bit of redundancy, much of which can be eliminated by marking the sections as mergable. This of course also requires splitting them by "element" size. Note that due to gas restrictions replacement code containing Jcc or PC-relative JMP cannot be made subject to merging. Hence the original assembly macros can't be altered, but replacement ones need to be introduced (and then used selectively). Note that CR4_PV32_RESTORE could in principle also benefit from getting changed, but right now at least GNU ld doesn't merge sections with relocations. Since this is a per-section decision, not an element based one, marking the items for merging could thus get in the way of merging other same-sized entries originating from the same source files. The sorting in the linker script additions is merely for cosmetic reasons, easing manual inspection of the resulting binary. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- Obviously the new assembler macros could be merged with the existing ones, reducing redundancy. But I didn't want to do so right away, i.e. before knowing whether the basic approach is deemed viable (and worthwhile, considering it requires a relatively new gas). For the same reason there are no C macro variants just yet (which overall would provide larger savings: in particular all altcalls can have their replacement insns folded). --- a/xen/arch/x86/arch.mk +++ b/xen/arch/x86/arch.mk @@ -35,6 +35,11 @@ $(call as-option-add,CFLAGS,CC,\ $(call as-option-add,CFLAGS,CC,\ ".L1: .L2: .nops (.L2 - .L1)$$(comma)9",-DHAVE_AS_NOPS_DIRECTIVE) +# Check to see whether the assembler supports insn emission to the absolute +# section (via the .struct directive), to size them into an absolute symbol. +$(call as-option-add,CFLAGS,CC,\ + ".pushsection .text; .struct; lfence; .L0:; .if .L0 != 3; .error; .endif; .popsection",-DHAVE_AS_INSN_SIZE) + CFLAGS += -mno-red-zone -fpic # Xen doesn't use SSE interally. If the compiler supports it, also skip the --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -12,7 +12,7 @@ #include <irq_vectors.h> ENTRY(entry_int82) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP pushq $0 movl $HYPERCALL_VECTOR, 4(%rsp) SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ @@ -248,7 +248,7 @@ ENTRY(compat_int80_direct_trap) compat_create_bounce_frame: ASSERT_INTERRUPTS_ENABLED mov %fs,%edi - ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP testb $2,UREGS_cs+8(%rsp) jz 1f /* Push new frame at registered guest-OS stack base. */ @@ -295,7 +295,7 @@ compat_create_bounce_frame: movl TRAPBOUNCE_error_code(%rdx),%eax .Lft8: movl %eax,%fs:(%rsi) # ERROR CODE 1: - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP /* Rewrite our stack frame and return to guest-OS mode. */ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\ @@ -341,7 +341,7 @@ compat_crash_page_fault_4: addl $4,%esi compat_crash_page_fault: .Lft14: mov %edi,%fs - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP movl %esi,%edi call show_page_walk jmp dom_crash_sync_extable --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -249,7 +249,7 @@ iret_exit_to_guest: */ ENTRY(lstar_enter) #ifdef CONFIG_XEN_SHSTK - ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK + MERGE_ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK #endif /* sti could live here when we don't switch page tables below. */ movq 8(%rsp),%rax /* Restore %rax. */ @@ -286,7 +286,7 @@ ENTRY(lstar_enter) /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) #ifdef CONFIG_XEN_SHSTK - ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK + MERGE_ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK #endif /* sti could live here when we don't switch page tables below. */ CR4_PV32_RESTORE @@ -324,14 +324,14 @@ ENTRY(cstar_enter) ENTRY(sysenter_entry) #ifdef CONFIG_XEN_SHSTK - ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK + MERGE_ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK #endif /* sti could live here when we don't switch page tables below. */ pushq $FLAT_USER_SS pushq $0 pushfq GLOBAL(sysenter_eflags_saved) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP pushq $3 /* ring 3 null cs */ pushq $0 /* null rip */ pushq $0 @@ -386,7 +386,7 @@ UNLIKELY_END(sysenter_gpf) jmp .Lbounce_exception ENTRY(int80_direct_trap) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP pushq $0 movl $0x80, 4(%rsp) SAVE_ALL @@ -514,7 +514,7 @@ __UNLIKELY_END(create_bounce_frame_bad_s subq $7*8,%rsi movq UREGS_ss+8(%rsp),%rax - ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP movq VCPU_domain(%rbx),%rdi STORE_GUEST_STACK(rax,6) # SS movq UREGS_rsp+8(%rsp),%rax @@ -552,7 +552,7 @@ __UNLIKELY_END(create_bounce_frame_bad_s STORE_GUEST_STACK(rax,1) # R11 movq UREGS_rcx+8(%rsp),%rax STORE_GUEST_STACK(rax,0) # RCX - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP #undef STORE_GUEST_STACK @@ -594,11 +594,11 @@ domain_crash_page_fault_2x8: domain_crash_page_fault_1x8: addq $8,%rsi domain_crash_page_fault_0x8: - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP movq %rsi,%rdi call show_page_walk ENTRY(dom_crash_sync_extable) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP # Get out of the guest-save area of the stack. GET_STACK_END(ax) leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp @@ -667,7 +667,7 @@ UNLIKELY_END(exit_cr3) iretq ENTRY(common_interrupt) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP SAVE_ALL GET_STACK_END(14) @@ -700,7 +700,7 @@ ENTRY(page_fault) movl $TRAP_page_fault,4(%rsp) /* No special register assumptions. */ GLOBAL(handle_exception) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP SAVE_ALL GET_STACK_END(14) @@ -728,7 +728,7 @@ handle_exception_saved: jz exception_with_ints_disabled #if defined(CONFIG_PV32) - ALTERNATIVE_2 "jmp .Lcr4_pv32_done", \ + MERGE_ALTERNATIVE_2 "jmp .Lcr4_pv32_done", \ __stringify(mov VCPU_domain(%rbx), %rax), X86_FEATURE_XEN_SMEP, \ __stringify(mov VCPU_domain(%rbx), %rax), X86_FEATURE_XEN_SMAP @@ -908,7 +908,7 @@ ENTRY(entry_CP) ENTRY(double_fault) movl $TRAP_double_fault,4(%rsp) /* Set AC to reduce chance of further SMAP faults */ - ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP SAVE_ALL GET_STACK_END(14) @@ -942,7 +942,7 @@ ENTRY(nmi) pushq $0 movl $TRAP_nmi,4(%rsp) handle_ist_exception: - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + MERGE_ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP SAVE_ALL GET_STACK_END(14) --- a/xen/arch/x86/xen.lds.S +++ b/xen/arch/x86/xen.lds.S @@ -192,6 +192,8 @@ SECTIONS * the address and the length of them to patch the kernel safely. */ *(.altinstr_replacement) + *(SORT(.altinstr_replacement.?)) + *(SORT(.altinstr_replacement.*)) #ifdef EFI /* EFI wants to merge all of .init.* ELF doesn't. */ . = ALIGN(SMP_CACHE_BYTES); --- a/xen/include/asm-x86/alternative-asm.h +++ b/xen/include/asm-x86/alternative-asm.h @@ -55,6 +55,7 @@ decl_orig(\oldinstr, repl_len(1) - orig_len) .pushsection .altinstructions, "a", @progbits + altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \ orig_len, repl_len(1), pad_len @@ -102,6 +103,88 @@ .popsection .endm +#ifdef HAVE_AS_INSN_SIZE + +.macro SECTION esz + .section .altinstr_replacement.\esz, "axM", @progbits, \esz +.endm + +.macro MERGE_ALTERNATIVE oldinstr, newinstr, feature + decl_orig(\oldinstr, repl_len(1) - orig_len) + + .pushsection .altinstructions, "a", @progbits + + altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \ + orig_len, repl_len(1), pad_len + + .struct + \newinstr +.L\@_repl_l\(1): + + .section .discard, "a", @progbits + /* + * Assembler-time checks: + * - total_len <= 255 + * - \newinstr <= total_len + */ + .byte total_len + .byte 0xff + .L\@_repl_l\(1) - total_len + + .altmacro + SECTION %.L\@_repl_l\(1) + .noaltmacro + + decl_repl(\newinstr, 1) + + .popsection +.endm + +.macro MERGE_ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + decl_orig(\oldinstr, as_max(repl_len(1), repl_len(2)) - orig_len) + + .pushsection .altinstructions, "a", @progbits + + altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature1, \ + orig_len, repl_len(1), pad_len + altinstruction_entry .L\@_orig_s, .L\@_repl_s2, \feature2, \ + orig_len, repl_len(2), pad_len + + .struct + \newinstr1 +.L\@_repl_l\(1): + + .struct + \newinstr2 +.L\@_repl_l\(2): + + .section .discard, "a", @progbits + /* + * Assembler-time checks: + * - total_len <= 255 + * - \newinstr* <= total_len + */ + .byte total_len + .byte 0xff + .L\@_repl_l\(1) - total_len + .byte 0xff + .L\@_repl_l\(2) - total_len + + .altmacro + SECTION %.L\@_repl_l\(1) + .noaltmacro + decl_repl(\newinstr1, 1) + + .altmacro + SECTION %.L\@_repl_l\(2) + .noaltmacro + decl_repl(\newinstr2, 2) + + .popsection +.endm + +#else /* !HAVE_AS_INSN_SIZE */ +# define MERGE_ALTERNATIVE ALTERNATIVE +# define MERGE_ALTERNATIVE_2 ALTERNATIVE_2 +#endif /* HAVE_AS_INSN_SIZE */ + #undef as_max #undef repl_len #undef decl_repl --- a/xen/include/asm-x86/spec_ctrl_asm.h +++ b/xen/include/asm-x86/spec_ctrl_asm.h @@ -228,19 +228,19 @@ /* Use after a VMEXIT from an HVM guest. */ #define SPEC_CTRL_ENTRY_FROM_HVM \ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM; \ - ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, \ + MERGE_ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, \ X86_FEATURE_SC_MSR_HVM /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ #define SPEC_CTRL_ENTRY_FROM_PV \ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ + MERGE_ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ X86_FEATURE_SC_MSR_PV /* Use in interrupt/exception context. May interrupt Xen or PV context. */ #define SPEC_CTRL_ENTRY_FROM_INTR \ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + MERGE_ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ X86_FEATURE_SC_MSR_PV /* Use when exiting to Xen context. */ @@ -250,16 +250,16 @@ /* Use when exiting to PV guest context. */ #define SPEC_CTRL_EXIT_TO_PV \ - ALTERNATIVE "", \ + MERGE_ALTERNATIVE "", \ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ - ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ + MERGE_ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ X86_FEATURE_SC_VERW_PV /* Use when exiting to HVM guest context. */ #define SPEC_CTRL_EXIT_TO_HVM \ - ALTERNATIVE "", \ + MERGE_ALTERNATIVE "", \ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM; \ - ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ + MERGE_ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ X86_FEATURE_SC_VERW_HVM /*
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |