[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] x86-64: syscall/sysenter support for 32-bit apps
This is for both 32-bit apps in 64-bit pv guests and 32on64. While I coded both a hypercall interface and MSR emulation, I'm not really sure both mechanisms need to be there. Depends on more than just guest_context getting saved/restored as guest state during save/restore/migrate (namely the new fields holding callback addresses), which isn't implemented yet (and I likely won't do it). Since the 32-bit kernel doesn't make use of syscall (it would be possible to do so now, when running on a 64-bit hv), the compat mode guest code path for syscall wasn't tested. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> Index: 2007-07-03/xen/arch/x86/domain.c =================================================================== --- 2007-07-03.orig/xen/arch/x86/domain.c 2007-06-22 16:57:45.000000000 +0200 +++ 2007-07-03/xen/arch/x86/domain.c 2007-07-03 10:39:13.000000000 +0200 @@ -395,6 +395,12 @@ int vcpu_initialise(struct vcpu *v) v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT); +#ifdef __x86_64__ + v->arch.sysexit_cs = 3; + v->arch.syscall_eflags_mask = X86_EFLAGS_DF|X86_EFLAGS_TF|X86_EFLAGS_NT| + X86_EFLAGS_RF|X86_EFLAGS_VM; +#endif + return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0); } @@ -607,7 +613,18 @@ int arch_set_info_guest( v->arch.flags |= TF_kernel_mode; if ( !compat ) + { memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat)); +#ifdef __x86_64__ + /* + * Despite not being correct, be backwards compatible - most + * importantly in order to prevent the guest from being crashed + * due to use of syscall from compatibility mode when the kernel + * didn't set the compatibility mode callback. + */ + v->arch.syscall32_callback_eip = c.nat->syscall_callback_eip; +#endif + } #ifdef CONFIG_COMPAT else { @@ -1274,7 +1291,9 @@ void context_switch(struct vcpu *prev, s local_flush_tlb_one(GDT_VIRT_START(next) + FIRST_RESERVED_GDT_BYTE); - if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) ) + if ( (!is_pv_32on64_vcpu(next) + || (next->arch.syscall32_callback_cs & ~3)) == + !(efer & EFER_SCE) ) write_efer(efer ^ EFER_SCE); } #endif Index: 2007-07-03/xen/arch/x86/traps.c =================================================================== --- 2007-07-03.orig/xen/arch/x86/traps.c 2007-07-03 10:35:22.000000000 +0200 +++ 2007-07-03/xen/arch/x86/traps.c 2007-07-04 13:21:20.000000000 +0200 @@ -609,16 +609,21 @@ static int emulate_forced_invalid_op(str clear_bit(X86_FEATURE_DE, &d); clear_bit(X86_FEATURE_PSE, &d); clear_bit(X86_FEATURE_PGE, &d); + if ( !cpu_has_sep ) + clear_bit(X86_FEATURE_SEP, &d); +#ifdef __i386__ if ( !supervisor_mode_kernel ) clear_bit(X86_FEATURE_SEP, &d); +#endif if ( !IS_PRIV(current->domain) ) clear_bit(X86_FEATURE_MTRR, &d); } else if ( regs->eax == 0x80000001 ) { /* Modify Feature Information. */ - if ( is_pv_32bit_vcpu(current) ) - clear_bit(X86_FEATURE_SYSCALL % 32, &d); +#ifdef __i386__ + clear_bit(X86_FEATURE_SYSCALL % 32, &d); +#endif clear_bit(X86_FEATURE_RDTSCP % 32, &d); } else @@ -1695,6 +1700,8 @@ static int emulate_privileged_op(struct break; case 0x30: /* WRMSR */ + data = regs->eax; + res = ((u64)regs->edx << 32) | data; switch ( regs->ecx ) { #ifdef CONFIG_X86_64 @@ -1703,24 +1710,87 @@ static int emulate_privileged_op(struct goto fail; if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) ) goto fail; - v->arch.guest_context.fs_base = - ((u64)regs->edx << 32) | regs->eax; + v->arch.guest_context.fs_base = res; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) ) goto fail; - v->arch.guest_context.gs_base_kernel = - ((u64)regs->edx << 32) | regs->eax; + v->arch.guest_context.gs_base_kernel = res; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) ) goto fail; - v->arch.guest_context.gs_base_user = - ((u64)regs->edx << 32) | regs->eax; + v->arch.guest_context.gs_base_user = res; + break; + case MSR_STAR: + if ( is_pv_32on64_vcpu(v) ) + { + v->arch.syscall32_callback_eip = data; + v->arch.syscall32_callback_cs = (uint16_t)regs->edx; + fixup_guest_code_selector(v->domain, + v->arch.syscall32_callback_cs); + } + break; + case MSR_LSTAR: + if ( is_pv_32on64_vcpu(v) || !is_canonical_address(res) ) + goto fail; + v->arch.guest_context.syscall_callback_eip = res; + break; + case MSR_CSTAR: + if ( is_pv_32on64_vcpu(v) || !is_canonical_address(res) ) + goto fail; + v->arch.syscall32_callback_eip = res; + break; + case MSR_SYSCALL_MASK: + if ( is_pv_32on64_vcpu(v) || (uint32_t)regs->edx != 0 ) + goto fail; + v->arch.syscall_eflags_mask = data & + ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); + if ( data & X86_EFLAGS_IF ) + { + set_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); + v->arch.syscall32_disables_events = 1; + } + else + { + clear_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); + v->arch.syscall32_disables_events = 0; + } + break; + case MSR_IA32_SYSENTER_CS: + if ( is_pv_32on64_vcpu(v) ) + { + v->arch.sysenter_callback_cs = data; + fixup_guest_code_selector(v->domain, + v->arch.sysenter_callback_cs); + /* + * While this doesn't match real SYSENTER behavior, the guest + * generally doesn't have a need to switch stacks (or anything + * else that needs to keep interrupts disabled). If the guest + * really needs interrupts disabled on entry, it can still use + * the corresponding hypercall. + */ + v->arch.sysenter_disables_events = 0; + } + v->arch.sysexit_cs = (data + 16) | 3; + break; + case MSR_IA32_SYSENTER_EIP: + if ( !is_pv_32on64_vcpu(v) && !is_canonical_address(res) ) + goto fail; + v->arch.sysenter_callback_eip = is_pv_32on64_vcpu(v) ? data : res; + if ( !is_pv_32on64_vcpu(v) ) + /* See comment above. */ + v->arch.sysenter_disables_events = 0; + break; + case MSR_IA32_SYSENTER_ESP: + if ( !is_pv_32on64_vcpu(v) && !is_canonical_address(res) ) + goto fail; break; #endif default: @@ -1758,6 +1828,53 @@ static int emulate_privileged_op(struct regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_user >> 32; break; + case MSR_STAR: + if ( is_pv_32on64_vcpu(v) ) + { + regs->eax = v->arch.syscall32_callback_eip; + regs->edx = v->arch.syscall32_callback_cs | + (FLAT_COMPAT_USER_CS << 16); + } + else + regs->edx = FLAT_KERNEL_CS64 | (FLAT_USER_CS64 << 16); + break; + case MSR_LSTAR: + if ( is_pv_32on64_vcpu(v) ) + goto fail; + regs->eax = (uint32_t)v->arch.guest_context.syscall_callback_eip; + regs->edx = v->arch.guest_context.syscall_callback_eip >> 32; + break; + case MSR_CSTAR: + if ( is_pv_32on64_vcpu(v) ) + goto fail; + regs->eax = (uint32_t)v->arch.syscall32_callback_eip; + regs->edx = v->arch.syscall32_callback_eip >> 32; + break; + case MSR_SYSCALL_MASK: + if ( is_pv_32on64_vcpu(v) ) + goto fail; + data = v->arch.syscall_eflags_mask; + if ( test_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags) ) + data |= X86_EFLAGS_IF; + regs->eax = data; + regs->edx = 0; + break; + case MSR_IA32_SYSENTER_CS: + if ( is_pv_32on64_vcpu(v) ) + regs->eax = v->arch.sysenter_callback_cs; + else + regs->eax = FLAT_KERNEL_CS64; + regs->edx = 0; + break; + case MSR_IA32_SYSENTER_EIP: + regs->eax = (uint32_t)v->arch.sysenter_callback_eip; + regs->edx = v->arch.sysenter_callback_eip >> 32; + break; + case MSR_IA32_SYSENTER_ESP: + regs->eax = (uint32_t)v->arch.guest_context.kernel_sp; + regs->edx = v->arch.guest_context.kernel_sp >> 32; + break; #endif case MSR_EFER: if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) @@ -2026,6 +2143,13 @@ asmlinkage int do_debug(struct cpu_user_ if ( !guest_mode(regs) ) { +#ifdef __x86_64__ + /* + * Single stepping across sysenter must not result in the single step + * flag being lost: record it here for create_bounce_frame to pick up. + */ + v->arch.eflags_mask |= (regs->eflags & EF_TF); +#endif /* Clear TF just for absolute sanity. */ regs->eflags &= ~EF_TF; /* Index: 2007-07-03/xen/arch/x86/x86_32/traps.c =================================================================== --- 2007-07-03.orig/xen/arch/x86/x86_32/traps.c 2007-06-22 16:57:45.000000000 +0200 +++ 2007-07-03/xen/arch/x86/x86_32/traps.c 2007-07-03 10:39:14.000000000 +0200 @@ -329,12 +329,19 @@ static long register_guest_callback(stru break; #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL - case CALLBACKTYPE_sysenter: + case CALLBACKTYPE_sysenter_deprecated: if ( ! cpu_has_sep ) ret = -EINVAL; else if ( on_each_cpu(do_update_sysenter, ®->address, 1, 1) != 0 ) ret = -EIO; break; + + case CALLBACKTYPE_sysenter: + if ( ! cpu_has_sep ) + ret = -EINVAL; + else + do_update_sysenter(®->address); + break; #endif case CALLBACKTYPE_nmi: @@ -358,6 +365,7 @@ static long unregister_guest_callback(st case CALLBACKTYPE_event: case CALLBACKTYPE_failsafe: #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL + case CALLBACKTYPE_sysenter_deprecated: case CALLBACKTYPE_sysenter: #endif ret = -EINVAL; Index: 2007-07-03/xen/arch/x86/x86_64/asm-offsets.c =================================================================== --- 2007-07-03.orig/xen/arch/x86/x86_64/asm-offsets.c 2007-07-03 10:35:22.000000000 +0200 +++ 2007-07-03/xen/arch/x86/x86_64/asm-offsets.c 2007-07-04 12:51:10.000000000 +0200 @@ -71,6 +71,22 @@ void __dummy__(void) arch.guest_context.failsafe_callback_cs); OFFSET(VCPU_syscall_addr, struct vcpu, arch.guest_context.syscall_callback_eip); + OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip); + OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs); + OFFSET(VCPU_syscall32_disables_events, struct vcpu, + arch.syscall32_disables_events); + OFFSET(VCPU_syscall_eflags_mask, struct vcpu, arch.syscall_eflags_mask); + OFFSET(VCPU_sysenter_addr, struct vcpu, arch.sysenter_callback_eip); + OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs); + OFFSET(VCPU_sysenter_disables_events, struct vcpu, + arch.sysenter_disables_events); + OFFSET(VCPU_sysexit_addr, struct vcpu, arch.sysexit_eip); + OFFSET(VCPU_sysexit_sel, struct vcpu, arch.sysexit_cs); + OFFSET(VCPU_eflags_mask, struct vcpu, arch.eflags_mask); + OFFSET(VCPU_gp_fault_addr, struct vcpu, + arch.guest_context.trap_ctxt[TRAP_gp_fault].address); + OFFSET(VCPU_gp_fault_sel, struct vcpu, + arch.guest_context.trap_ctxt[TRAP_gp_fault].cs); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss); OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); Index: 2007-07-03/xen/arch/x86/x86_64/compat/entry.S =================================================================== --- 2007-07-03.orig/xen/arch/x86/x86_64/compat/entry.S 2007-07-03 10:35:22.000000000 +0200 +++ 2007-07-03/xen/arch/x86/x86_64/compat/entry.S 2007-07-04 13:26:46.000000000 +0200 @@ -188,6 +188,39 @@ ENTRY(compat_post_handle_exception) movb $0,TRAPBOUNCE_flags(%rdx) jmp compat_test_all_events +ENTRY(compat_syscall) + cmpb $0,VCPU_syscall32_disables_events(%rbx) + movzwl VCPU_syscall32_sel(%rbx),%esi + movq VCPU_syscall32_addr(%rbx),%rax + setne %cl + leaq VCPU_trap_bounce(%rbx),%rdx + testl $~3,%esi + leal (,%rcx,TBF_INTERRUPT),%ecx + jz 2f +1: movq %rax,TRAPBOUNCE_eip(%rdx) + movw %si,TRAPBOUNCE_cs(%rdx) + movb %cl,TRAPBOUNCE_flags(%rdx) + call compat_create_bounce_frame + jmp compat_test_all_events +2: movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) + movq VCPU_gp_fault_addr(%rbx),%rax + movzwl VCPU_gp_fault_sel(%rbx),%esi + movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl + movl $0,TRAPBOUNCE_error_code(%rdx) + jmp 1b + +ENTRY(compat_sysenter) + cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp) + movzwl VCPU_sysenter_sel(%rbx),%eax + movzwl VCPU_gp_fault_sel(%rbx),%ecx + cmovel %ecx,%eax + testl $~3,%eax + movl $FLAT_COMPAT_USER_SS,UREGS_ss(%rsp) + cmovzl %ecx,%eax + movw %ax,TRAPBOUNCE_cs(%rdx) + call compat_create_bounce_frame + jmp compat_test_all_events + ENTRY(compat_int80_direct_trap) call compat_create_bounce_frame jmp compat_test_all_events @@ -230,7 +263,9 @@ compat_create_bounce_frame: setz %ch # %ch == !saved_upcall_mask movl UREGS_eflags+8(%rsp),%eax andl $~X86_EFLAGS_IF,%eax - shlb $1,%ch # Bit 9 (EFLAGS.IF) + addb %ch,%ch # Bit 9 (EFLAGS.IF) + orl VCPU_eflags_mask(%rbx),%eax + movl $0,VCPU_eflags_mask(%rbx) orb %ch,%ah # Fold EFLAGS.IF into %eax .Lft6: movl %eax,%fs:2*4(%rsi) # EFLAGS movl UREGS_rip+8(%rsp),%eax Index: 2007-07-03/xen/arch/x86/x86_64/compat/traps.c =================================================================== --- 2007-07-03.orig/xen/arch/x86/x86_64/compat/traps.c 2007-07-03 10:35:22.000000000 +0200 +++ 2007-07-03/xen/arch/x86/x86_64/compat/traps.c 2007-07-03 11:54:46.000000000 +0200 @@ -160,12 +160,35 @@ static long compat_register_guest_callba &v->arch.guest_context.flags); break; + case CALLBACKTYPE_syscall: + v->arch.syscall32_callback_cs = reg->address.cs; + v->arch.syscall32_callback_eip = reg->address.eip; + v->arch.syscall32_disables_events = + (reg->flags & CALLBACKF_mask_events) != 0; + if ( v->arch.syscall32_callback_cs & ~3 ) + write_efer(read_efer() | EFER_SCE); + else + write_efer(read_efer() & ~EFER_SCE); + break; + + case CALLBACKTYPE_sysenter: + v->arch.sysenter_callback_cs = reg->address.cs; + v->arch.sysenter_callback_eip = reg->address.eip; + v->arch.sysenter_disables_events = + (reg->flags & CALLBACKF_mask_events) != 0; + break; + + case CALLBACKTYPE_sysexit: + v->arch.sysexit_cs = reg->address.cs | 3; + v->arch.sysexit_eip = reg->address.eip; + break; + case CALLBACKTYPE_nmi: ret = register_guest_nmi_callback(reg->address.eip); break; default: - ret = -EINVAL; + ret = -ENOSYS; break; } @@ -178,12 +201,20 @@ static long compat_unregister_guest_call switch ( unreg->type ) { + case CALLBACKTYPE_event: + case CALLBACKTYPE_failsafe: + case CALLBACKTYPE_syscall: + case CALLBACKTYPE_sysenter: + case CALLBACKTYPE_sysexit: + ret = -EINVAL; + break; + case CALLBACKTYPE_nmi: ret = unregister_guest_nmi_callback(); break; default: - ret = -EINVAL; + ret = -ENOSYS; break; } Index: 2007-07-03/xen/arch/x86/x86_64/entry.S =================================================================== --- 2007-07-03.orig/xen/arch/x86/x86_64/entry.S 2007-07-03 10:35:37.000000000 +0200 +++ 2007-07-03/xen/arch/x86/x86_64/entry.S 2007-07-04 12:48:33.000000000 +0200 @@ -26,15 +26,19 @@ ALIGN /* %rbx: struct vcpu */ switch_to_kernel: - leaq VCPU_trap_bounce(%rbx),%rdx + cmpw $FLAT_USER_CS32,UREGS_cs(%rsp) movq VCPU_syscall_addr(%rbx),%rax + leaq VCPU_trap_bounce(%rbx),%rdx + cmoveq VCPU_syscall32_addr(%rbx),%rax + btl $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx) movq %rax,TRAPBOUNCE_eip(%rdx) - movb $0,TRAPBOUNCE_flags(%rdx) - bt $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx) - jnc 1f - movb $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) -1: call create_bounce_frame - andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) + setc %cl + leal (,%rcx,TBF_INTERRUPT),%ecx + movb %cl,TRAPBOUNCE_flags(%rdx) + call create_bounce_frame + movl VCPU_syscall_eflags_mask(%rbx),%eax + notl %eax + andl %eax,UREGS_eflags(%rsp) jmp test_all_events /* %rbx: struct vcpu, interrupts disabled */ @@ -47,7 +51,7 @@ restore_all_guest: addq $8,%rsp popq %rcx # RIP popq %r11 # CS - cmpw $FLAT_KERNEL_CS32,%r11 + cmpw $FLAT_USER_CS32,%r11 popq %r11 # RFLAGS popq %rsp # RSP je 1f @@ -127,6 +131,9 @@ ENTRY(syscall_enter) movl $TRAP_syscall,4(%rsp) SAVE_ALL GET_CURRENT(%rbx) + movq VCPU_domain(%rbx),%rcx + testb $1,DOMAIN_is_32bit_pv(%rcx) + jnz compat_syscall testb $TF_kernel_mode,VCPU_thread_flags(%rbx) jz switch_to_kernel @@ -224,6 +231,41 @@ bad_hypercall: movq $-ENOSYS,UREGS_rax(%rsp) jmp test_all_events +ENTRY(sysenter_entry) + sti + pushq $FLAT_USER_SS + pushq $0 + pushfq + pushq $0 + pushq $0 + pushq $0 + movl $TRAP_syscall,4(%rsp) + SAVE_ALL + GET_CURRENT(%rbx) + movq VCPU_sysexit_addr(%rbx),%rax + movzwl VCPU_sysexit_sel(%rbx),%edx + cmpb $0,VCPU_sysenter_disables_events(%rbx) + movq %rax,UREGS_rip(%rsp) + movl %edx,UREGS_cs(%rsp) + movq VCPU_sysenter_addr(%rbx),%rax + setne %cl + leaq VCPU_trap_bounce(%rbx),%rdx + testq %rax,%rax + leal (,%rcx,TBF_INTERRUPT),%ecx + jz 2f +1: movq VCPU_domain(%rbx),%rdi + movq %rax,TRAPBOUNCE_eip(%rdx) + movb %cl,TRAPBOUNCE_flags(%rdx) + testb $1,DOMAIN_is_32bit_pv(%rdi) + jnz compat_sysenter + call create_bounce_frame + jmp test_all_events +2: movl %eax,TRAPBOUNCE_error_code(%rdx) + movq VCPU_gp_fault_addr(%rbx),%rax + movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl + movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) + jmp 1b + ENTRY(int80_direct_trap) pushq $0 SAVE_ALL @@ -296,9 +338,11 @@ create_bounce_frame: shrq $32,%rax testb $0xFF,%al # Bits 0-7: saved_upcall_mask setz %ch # %ch == !saved_upcall_mask - movq UREGS_eflags+8(%rsp),%rax - andq $~X86_EFLAGS_IF,%rax - shlb $1,%ch # Bit 9 (EFLAGS.IF) + movl UREGS_eflags+8(%rsp),%eax + andl $~X86_EFLAGS_IF,%eax + addb %ch,%ch # Bit 9 (EFLAGS.IF) + orl VCPU_eflags_mask(%rbx),%eax + movl $0,VCPU_eflags_mask(%rbx) orb %ch,%ah # Fold EFLAGS.IF into %eax .Lft5: movq %rax,16(%rsi) # RFLAGS movq UREGS_rip+8(%rsp),%rax Index: 2007-07-03/xen/arch/x86/x86_64/traps.c =================================================================== --- 2007-07-03.orig/xen/arch/x86/x86_64/traps.c 2007-07-03 10:34:30.000000000 +0200 +++ 2007-07-03/xen/arch/x86/x86_64/traps.c 2007-07-03 12:06:05.000000000 +0200 @@ -22,6 +22,7 @@ #include <public/callback.h> asmlinkage void syscall_enter(void); +asmlinkage void sysenter_entry(void); asmlinkage void compat_hypercall(void); asmlinkage void int80_direct_trap(void); @@ -323,12 +324,26 @@ void __init percpu_traps_init(void) /* Trampoline for SYSCALL entry from long mode. */ stack = &stack[IST_MAX * PAGE_SIZE]; /* Skip the IST stacks. */ - wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32)); + wrmsrl(MSR_LSTAR, (unsigned long)stack); stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64); - /* Trampoline for SYSCALL entry from compatibility mode. */ - wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32)); - stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32); + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + /* SYSENTER entry. */ + wrmsrl(MSR_IA32_SYSENTER_ESP, (unsigned long)stack_bottom); + wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry); + wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); + break; + case X86_VENDOR_AMD: + /* Trampoline for SYSCALL entry from compatibility mode. */ + stack = (char *)L1_CACHE_ALIGN((unsigned long)stack); + wrmsrl(MSR_CSTAR, (unsigned long)stack); + stack += write_stack_trampoline(stack, stack_bottom, FLAT_USER_CS32); + break; + default: + BUG(); + } /* Common SYSCALL parameters. */ wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS); @@ -353,6 +368,9 @@ static long register_guest_callback(stru long ret = 0; struct vcpu *v = current; + if ( !is_canonical_address(reg->address) ) + return -EINVAL; + switch ( reg->type ) { case CALLBACKTYPE_event: @@ -370,6 +388,14 @@ static long register_guest_callback(stru break; case CALLBACKTYPE_syscall: + /* See arch_set_info_guest() for why this is being done. */ + if ( v->arch.syscall32_callback_eip == + v->arch.guest_context.syscall_callback_eip ) + { + v->arch.syscall32_callback_eip = reg->address; + v->arch.syscall32_disables_events = + (reg->flags & CALLBACKF_mask_events) != 0; + } v->arch.guest_context.syscall_callback_eip = reg->address; if ( reg->flags & CALLBACKF_mask_events ) set_bit(_VGCF_syscall_disables_events, @@ -379,6 +405,43 @@ static long register_guest_callback(stru &v->arch.guest_context.flags); break; + case CALLBACKTYPE_syscall32: + v->arch.syscall32_callback_eip = reg->address; + v->arch.syscall32_disables_events = + (reg->flags & CALLBACKF_mask_events) != 0; + break; + + case CALLBACKTYPE_sfmask: + v->arch.syscall_eflags_mask = reg->address & + ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL); + if ( reg->address & X86_EFLAGS_IF ) + { + set_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); + v->arch.syscall32_disables_events = 1; + } + else + { + clear_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); + v->arch.syscall32_disables_events = 0; + } + break; + + case CALLBACKTYPE_sysenter: + v->arch.sysenter_callback_eip = reg->address; + v->arch.sysenter_disables_events = + (reg->flags & CALLBACKF_mask_events) != 0; + break; + + case CALLBACKTYPE_sysexit: + v->arch.sysexit_eip = reg->address; + if ( reg->flags & CALLBACKF_mask_events ) + v->arch.sysexit_cs = FLAT_USER_CS32; + else + v->arch.sysexit_cs = FLAT_USER_CS64; + break; + case CALLBACKTYPE_nmi: ret = register_guest_nmi_callback(reg->address); break; @@ -400,6 +463,10 @@ static long unregister_guest_callback(st case CALLBACKTYPE_event: case CALLBACKTYPE_failsafe: case CALLBACKTYPE_syscall: + case CALLBACKTYPE_syscall32: + case CALLBACKTYPE_sfmask: + case CALLBACKTYPE_sysenter: + case CALLBACKTYPE_sysexit: ret = -EINVAL; break; Index: 2007-07-03/xen/include/asm-x86/cpufeature.h =================================================================== --- 2007-07-03.orig/xen/include/asm-x86/cpufeature.h 2007-07-03 10:35:30.000000000 +0200 +++ 2007-07-03/xen/include/asm-x86/cpufeature.h 2007-07-03 10:39:14.000000000 +0200 @@ -130,7 +130,7 @@ #define cpu_has_pae 1 #define cpu_has_pge 1 #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) -#define cpu_has_sep 0 +#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) #define cpu_has_mtrr 1 #define cpu_has_mmx 1 #define cpu_has_fxsr 1 Index: 2007-07-03/xen/include/asm-x86/domain.h =================================================================== --- 2007-07-03.orig/xen/include/asm-x86/domain.h 2007-06-15 14:05:46.000000000 +0200 +++ 2007-07-03/xen/include/asm-x86/domain.h 2007-07-04 12:51:40.000000000 +0200 @@ -281,6 +281,16 @@ struct arch_vcpu #endif #ifdef CONFIG_X86_64 struct trap_bounce int80_bounce; + unsigned long syscall32_callback_eip; + unsigned long sysenter_callback_eip; + unsigned long sysexit_eip; + unsigned short syscall32_callback_cs; + unsigned short sysenter_callback_cs; + unsigned short sysexit_cs; + bool_t syscall32_disables_events; + bool_t sysenter_disables_events; + unsigned int syscall_eflags_mask; + unsigned int eflags_mask; #endif /* Virtual Machine Extensions */ Index: 2007-07-03/xen/include/public/callback.h =================================================================== --- 2007-07-03.orig/xen/include/public/callback.h 2006-11-08 10:37:31.000000000 +0100 +++ 2007-07-03/xen/include/public/callback.h 2007-07-03 10:39:14.000000000 +0200 @@ -38,13 +38,34 @@ #define CALLBACKTYPE_event 0 #define CALLBACKTYPE_failsafe 1 -#define CALLBACKTYPE_syscall 2 /* x86_64 only */ +#define CALLBACKTYPE_syscall 2 /* x86_64 hv only */ /* - * sysenter is only available on x86_32 with the - * supervisor_mode_kernel option enabled. + * sysenter_deprecated is only available on x86_32 with the + * supervisor_mode_kernel option enabled, and should not be used in new code. */ -#define CALLBACKTYPE_sysenter 3 +#define CALLBACKTYPE_sysenter_deprecated 3 #define CALLBACKTYPE_nmi 4 +#if __XEN_INTERFACE_VERSION__ < 0x00030206 +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#else +/* + * sysenter is only available + * - on x86_32 with the supervisor_mode_kernel option enabled, + * - on x86_64 hv for x86_32 pv or 32-bit guest support in x86_64 pv. + */ +#define CALLBACKTYPE_sysenter 5 +/* + * sysexit is only available on x86_64 hv, and is only used to fill a + * sysenter frame's return address (if the guest desires to have a non-NULL + * value there). Additionally, since CALLBACKF_mask_events is meaningless + * here, it is being (mis-)used for 64-bits guests to distinguish sysenter + * callers expected to be in 64-bit mode (flag set) from 32-bit ones (flag + * clear). + */ +#define CALLBACKTYPE_sysexit 6 +#define CALLBACKTYPE_syscall32 7 /* x86_64 only */ +#define CALLBACKTYPE_sfmask 8 /* x86_64 only */ +#endif /* * Disable event deliver during callback? This flag is ignored for event and Index: 2007-07-03/xen/include/public/xen-compat.h =================================================================== --- 2007-07-03.orig/xen/include/public/xen-compat.h 2006-11-16 14:06:41.000000000 +0100 +++ 2007-07-03/xen/include/public/xen-compat.h 2007-07-03 10:39:14.000000000 +0200 @@ -27,7 +27,7 @@ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |