diff --git a/extras/mini-os/arch/x86/x86_64.S b/extras/mini-os/arch/x86/x86_64.S index a65e5d5..b4f351c 100644 --- a/extras/mini-os/arch/x86/x86_64.S +++ b/extras/mini-os/arch/x86/x86_64.S @@ -42,8 +42,10 @@ hypercall_page: NMI_MASK = 0x80000000 +#define RBX 40 #define RDI 112 #define ORIG_RAX 120 /* + error_code */ +#define RIP 128 #define EFLAGS 144 .macro RESTORE_ALL @@ -147,7 +149,43 @@ error_call_handler: ENTRY(hypervisor_callback) - zeroentry hypervisor_callback2 + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x10,%rsp /* skip rcx and r11 */ + pushq $0 /* push error code/oldrax */ + pushq %rax /* push real oldrax to the rdi slot */ + leaq hypervisor_callback2(%rip), %rax + + /* rdi slot contains rax, oldrax contains error code */ + cld + subq $14*8,%rsp + movq %rsi,13*8(%rsp) + movq 14*8(%rsp),%rsi /* load rax from rdi slot */ + movq %rdx,12*8(%rsp) + movq %rcx,11*8(%rsp) + movq %rsi,10*8(%rsp) /* store rax */ + movq %r8, 9*8(%rsp) + movq %r9, 8*8(%rsp) + movq %r10,7*8(%rsp) + movq %r11,6*8(%rsp) + movq %rbx,5*8(%rsp) + movq %rbp,4*8(%rsp) + movq %r12,3*8(%rsp) + movq %r13,2*8(%rsp) + movq %r14,1*8(%rsp) + movq %r15,(%rsp) + movq %rdi, RDI(%rsp) + + # check against re-entrance + movq RIP(%rsp),%rbx + cmpq $scrit,%rbx + jb 10f + cmpq $ecrit,%rbx + jb critical_region_fixup + +10: movq RBX(%rsp),%rbx # restore rbx + movq %rsp,%rdi + call *%rax ENTRY(hypervisor_callback2) movq %rdi, %rsp @@ -172,17 +210,40 @@ scrit: /**** START OF CRITICAL REGION ****/ 14: XEN_LOCKED_BLOCK_EVENTS(%rsi) XEN_PUT_VCPU_INFO(%rsi) - subq $6*8,%rsp - movq %rbx,5*8(%rsp) - movq %rbp,4*8(%rsp) - movq %r12,3*8(%rsp) - movq %r13,2*8(%rsp) - movq %r14,1*8(%rsp) - movq %r15,(%rsp) - movq %rsp,%rdi # set the argument again + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%rdi # set the argument again jmp 11b ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addq $critical_fixup_table - scrit, %rbx + movzbq (%rbx),%rbx # %rbx contains num bytes popped + mov %rsp,%rsi + add %rbx,%rsi # %esi points at end of src region + mov %rsp,%rdi + add $0xa8,%rdi # %edi points at end of dst region + mov %rbx,%rcx + shr $3,%rcx # convert bytes into count of 64-bit entities + je 16f # skip loop if nothing to copy +15: subq $8,%rsi # pre-decrementing copy loop + subq $8,%rdi + movq (%rsi),%rbx + movq %rbx,(%rdi) + loop 15b +16: movq %rdi,%rsp # final %rdi is top of merged stack + jmp 10b retint_kernel: retint_restore_args: @@ -210,6 +271,42 @@ error_exit: jmp retint_kernel +critical_fixup_table: + .byte 0x30,0x30,0x30 # testb $0xff,(%rsi) + .byte 0x30,0x30 # jne 14f + .byte 0x30,0x30,0x30,0x30 # mov (%rsp),%r11 + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x8(%rsp),%r10 + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x10(%rsp),%r9 + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x18(%rsp),%r8 + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x20(%rsp),%rax + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x28(%rsp),%rcx + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x30(%rsp),%rdx + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x38(%rsp),%rsi + .byte 0x30,0x30,0x30,0x30,0x30 # mov 0x40(%rsp),%rdi + .byte 0x30,0x30,0x30,0x30 # add $0x50,%rsp + .byte 0x80,0x80,0x80,0x80 # testl $NMI_MASK,2*8(%rsp) + .byte 0x80,0x80,0x80,0x80 + .byte 0x80,0x80 # jne 2f + .byte 0x80,0x80,0x80,0x80 # testb $1,(xen_features+XENFEAT_supervisor_mode_kernel) + .byte 0x80,0x80,0x80,0x80 + .byte 0x80,0x80 # jne 1f + .byte 0x80,0x80,0x80,0x80,0x80 # orb $0x3,0x8(%rsp) + .byte 0x80,0x80,0x80,0x80,0x80 # orb $0x3,0x20(%rsp) + .byte 0x80,0x80 # iretq + .byte 0x80,0x80,0x80,0x80 # andl $~NMI_MASK,0x10(%rsp) + .byte 0x80,0x80,0x80,0x80 + .byte 0x80,0x80 # pushq $0x0 + .byte 0x78,0x78,0x78,0x78,0x78 # jmp hypercall_page + (__HYPERVISOR_iret * 32) + .byte 0x30,0x30,0x30,0x30 # movb $0x1,0x1(%rsi) + .byte 0x30 # push %rbx + .byte 0x28 # push %rbp + .byte 0x20,0x20 # push %r12 + .byte 0x18,0x18 # push %r13 + .byte 0x10,0x10 # push %r14 + .byte 0x08,0x08 # push %r15 + .byte 0x00,0x00,0x00 # mov %rsp,%rdi + .byte 0x00,0x00,0x00,0x00,0x00 # jmpq 11b + ENTRY(failsafe_callback) popq %rcx