x86: reduce code size of struct cpu_info member accesses Instead of addressing these fields via the base of the stack (which uniformly requires 4-byte displacements), address them from the end (which for everything other than guest_cpu_user_regs requires just 1-byte ones). This yields a code size reduction somewhere between 8k and 12k in my builds. Signed-off-by: Jan Beulich --- Note that just like patch 4 of the series this also isn't directly related to the SMEP/SMAP issue, but is again just a result of things realized while doing that work, and again depends on the earlier patches to apply cleanly. --- a/xen/arch/x86/hvm/svm/entry.S +++ b/xen/arch/x86/hvm/svm/entry.S @@ -31,7 +31,7 @@ #define CLGI .byte 0x0F,0x01,0xDD ENTRY(svm_asm_do_resume) - GET_CURRENT(%rbx) + GET_CURRENT(bx) .Lsvm_do_resume: call svm_intr_assist mov %rsp,%rdi @@ -97,7 +97,7 @@ UNLIKELY_END(svm_trace) VMRUN - GET_CURRENT(%rax) + GET_CURRENT(ax) push %rdi push %rsi push %rdx --- a/xen/arch/x86/hvm/vmx/entry.S +++ b/xen/arch/x86/hvm/vmx/entry.S @@ -40,7 +40,7 @@ ENTRY(vmx_asm_vmexit_handler) push %r10 push %r11 push %rbx - GET_CURRENT(%rbx) + GET_CURRENT(bx) push %rbp push %r12 push %r13 @@ -113,7 +113,7 @@ UNLIKELY_END(realmode) BUG /* vmx_vmentry_failure() shouldn't return. */ ENTRY(vmx_asm_do_vmentry) - GET_CURRENT(%rbx) + GET_CURRENT(bx) jmp .Lvmx_do_vmentry .Lvmx_goto_emulator: --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -26,7 +26,7 @@ UNLIKELY_START(ne, msi_check) UNLIKELY_END(msi_check) movl UREGS_rax(%rsp),%eax - GET_CURRENT(%rbx) + GET_CURRENT(bx) cmpl $NR_hypercalls,%eax jae compat_bad_hypercall @@ -202,7 +202,7 @@ ENTRY(compat_restore_all_guest) /* This mustn't modify registers other than %rax. */ ENTRY(cr4_pv32_restore) push %rdx - GET_CPUINFO_FIELD(cr4, %rdx) + GET_CPUINFO_FIELD(cr4, dx) mov (%rdx), %rax test $X86_CR4_SMEP|X86_CR4_SMAP,%eax jnz 0f @@ -245,7 +245,7 @@ ENTRY(cstar_enter) pushq %rcx pushq $0 SAVE_VOLATILE TRAP_syscall - GET_CURRENT(%rbx) + GET_CURRENT(bx) movq VCPU_domain(%rbx),%rcx cmpb $0,DOMAIN_is_32bit_pv(%rcx) je switch_to_kernel --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -97,7 +97,7 @@ ENTRY(lstar_enter) pushq %rcx pushq $0 SAVE_VOLATILE TRAP_syscall - GET_CURRENT(%rbx) + GET_CURRENT(bx) testb $TF_kernel_mode,VCPU_thread_flags(%rbx) jz switch_to_kernel @@ -246,7 +246,7 @@ GLOBAL(sysenter_eflags_saved) pushq $0 /* null rip */ pushq $0 SAVE_VOLATILE TRAP_syscall - GET_CURRENT(%rbx) + GET_CURRENT(bx) cmpb $0,VCPU_sysenter_disables_events(%rbx) movq VCPU_sysenter_addr(%rbx),%rax setne %cl @@ -288,7 +288,7 @@ UNLIKELY_START(ne, msi_check) call check_for_unexpected_msi UNLIKELY_END(msi_check) - GET_CURRENT(%rbx) + GET_CURRENT(bx) /* Check that the callback is non-null. */ leaq VCPU_int80_bounce(%rbx),%rdx @@ -420,10 +420,10 @@ domain_crash_page_fault: call show_page_walk ENTRY(dom_crash_sync_extable) # Get out of the guest-save area of the stack. - GET_STACK_BASE(%rax) + GET_STACK_END(ax) leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp # create_bounce_frame() temporarily clobbers CS.RPL. Fix up. - __GET_CURRENT(%rax) + __GET_CURRENT(ax) movq VCPU_domain(%rax),%rax testb $1,DOMAIN_is_32bit_pv(%rax) setz %al @@ -441,7 +441,7 @@ ENTRY(common_interrupt) /* No special register assumptions. */ ENTRY(ret_from_intr) - GET_CURRENT(%rbx) + GET_CURRENT(bx) testb $3,UREGS_cs(%rsp) jz restore_all_xen movq VCPU_domain(%rbx),%rax @@ -455,7 +455,7 @@ ENTRY(page_fault) GLOBAL(handle_exception) SAVE_ALL CLAC handle_exception_saved: - GET_CURRENT(%rbx) + GET_CURRENT(bx) testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) jz exception_with_ints_disabled @@ -649,7 +649,7 @@ handle_ist_exception: testb $3,UREGS_cs(%rsp) jz 1f /* Interrupted guest context. Copy the context to stack bottom. */ - GET_CPUINFO_FIELD(guest_cpu_user_regs,%rdi) + GET_CPUINFO_FIELD(guest_cpu_user_regs,di) movq %rsp,%rsi movl $UREGS_kernel_sizeof/8,%ecx movq %rdi,%rsp @@ -664,7 +664,7 @@ handle_ist_exception: /* We want to get straight to the IRET on the NMI exit path. */ testb $3,UREGS_cs(%rsp) jz restore_all_xen - GET_CURRENT(%rbx) + GET_CURRENT(bx) /* Send an IPI to ourselves to cover for the lack of event checking. */ movl VCPU_processor(%rbx),%eax shll $IRQSTAT_shift,%eax --- a/xen/include/asm-x86/asm_defns.h +++ b/xen/include/asm-x86/asm_defns.h @@ -127,19 +127,19 @@ void ret_from_intr(void); UNLIKELY_DONE(mp, tag); \ __UNLIKELY_END(tag) -#define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field) -#define GET_STACK_BASE(reg) \ - movq $~(STACK_SIZE-1),reg; \ - andq %rsp,reg +#define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) +#define GET_STACK_END(reg) \ + movl $STACK_SIZE-1, %e##reg; \ + orq %rsp, %r##reg #define GET_CPUINFO_FIELD(field, reg) \ - GET_STACK_BASE(reg); \ - addq $STACK_CPUINFO_FIELD(field),reg + GET_STACK_END(reg); \ + addq $STACK_CPUINFO_FIELD(field), %r##reg #define __GET_CURRENT(reg) \ - movq STACK_CPUINFO_FIELD(current_vcpu)(reg),reg + movq STACK_CPUINFO_FIELD(current_vcpu)(%r##reg), %r##reg #define GET_CURRENT(reg) \ - GET_STACK_BASE(reg); \ + GET_STACK_END(reg); \ __GET_CURRENT(reg) #ifndef NDEBUG --- a/xen/include/asm-x86/current.h +++ b/xen/include/asm-x86/current.h @@ -55,7 +55,7 @@ static inline struct cpu_info *get_cpu_i register unsigned long sp asm("rsp"); #endif - return (struct cpu_info *)((sp & ~(STACK_SIZE-1)) + STACK_SIZE) - 1; + return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1; } #define get_current() (get_cpu_info()->current_vcpu)