This is accomplished by splitting the guest_context member, which by itself is larger than a page on x86-64. Quite a number of fields of this structure is completely meaningless for HVM guests, and thus a new struct pv_vcpu gets introduced, which is being overlaid with struct hvm_vcpu in struct arch_vcpu. The one member that is mostly responsible for the large size is trap_ctxt, which now gets allocated separately (unless fitting on the same page as struct arch_vcpu, as is currently the case for x86-32), and only for non-hvm, non-idle domains. This change pointed out a latent problem in arch_set_info_guest(), which is permitted to be called on already initialized vCPU-s, but so far copied the new state into struct arch_vcpu without (in this case) actually going through all the necessary accounting/validation steps. The logic gets changed so that the pieces that bypass accounting will at least be verified to be no different from the currently active bits, and the whole change will fail in case they are. The logic does *not* get adjusted here to do full error recovery, that is, partially modified state continues to not get unrolled in case of failure. Signed-off-by: Jan Beulich --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -87,14 +87,14 @@ void restore_rest_processor_state(void) /* Maybe load the debug registers. */ BUG_ON(is_hvm_vcpu(curr)); - if ( !is_idle_vcpu(curr) && curr->arch.guest_context.debugreg[7] ) + if ( !is_idle_vcpu(curr) && curr->arch.debugreg[7] ) { - write_debugreg(0, curr->arch.guest_context.debugreg[0]); - write_debugreg(1, curr->arch.guest_context.debugreg[1]); - write_debugreg(2, curr->arch.guest_context.debugreg[2]); - write_debugreg(3, curr->arch.guest_context.debugreg[3]); - write_debugreg(6, curr->arch.guest_context.debugreg[6]); - write_debugreg(7, curr->arch.guest_context.debugreg[7]); + write_debugreg(0, curr->arch.debugreg[0]); + write_debugreg(1, curr->arch.debugreg[1]); + write_debugreg(2, curr->arch.debugreg[2]); + write_debugreg(3, curr->arch.debugreg[3]); + write_debugreg(6, curr->arch.debugreg[6]); + write_debugreg(7, curr->arch.debugreg[7]); } /* Reload FPU state on next FPU use. */ --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -208,15 +208,16 @@ struct vcpu *alloc_vcpu_struct(void) * may require that the shadow CR3 points below 4GB, and hence the whole * structure must satisfy this restriction. Thus we specify MEMF_bits(32). */ - v = alloc_xenheap_pages(get_order_from_bytes(sizeof(*v)), MEMF_bits(32)); + BUILD_BUG_ON(sizeof(*v) > PAGE_SIZE); + v = alloc_xenheap_pages(0, MEMF_bits(32)); if ( v != NULL ) - memset(v, 0, sizeof(*v)); + clear_page(v); return v; } void free_vcpu_struct(struct vcpu *v) { - free_xenheap_pages(v, get_order_from_bytes(sizeof(*v))); + free_xenheap_page(v); } #ifdef __x86_64__ @@ -330,6 +331,12 @@ int switch_compat(struct domain *d) #define release_compat_l4(v) ((void)0) #endif +static inline bool_t standalone_trap_ctxt(struct vcpu *v) +{ + BUILD_BUG_ON(256 * sizeof(*v->arch.pv_vcpu.trap_ctxt) > PAGE_SIZE); + return 256 * sizeof(*v->arch.pv_vcpu.trap_ctxt) + sizeof(*v) > PAGE_SIZE; +} + int vcpu_initialise(struct vcpu *v) { struct domain *d = v->domain; @@ -369,21 +376,48 @@ int vcpu_initialise(struct vcpu *v) if ( (rc = xsave_alloc_save_area(v)) != 0 ) return rc; + if ( v->arch.xsave_area ) + v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; + else if ( !is_idle_domain(d) ) + { + v->arch.fpu_ctxt = _xmalloc(sizeof(v->arch.xsave_area->fpu_sse), 16); + if ( !v->arch.fpu_ctxt ) + { + rc = -ENOMEM; + goto done; + } + memset(v->arch.fpu_ctxt, 0, sizeof(v->arch.xsave_area->fpu_sse)); + } if ( is_hvm_domain(d) ) { - if ( (rc = hvm_vcpu_initialise(v)) != 0 ) - xsave_free_save_area(v); - return rc; + rc = hvm_vcpu_initialise(v); + goto done; } - /* PV guests by default have a 100Hz ticker. */ if ( !is_idle_domain(d) ) + { + if ( standalone_trap_ctxt(v) ) + { + v->arch.pv_vcpu.trap_ctxt = alloc_xenheap_page(); + if ( !v->arch.pv_vcpu.trap_ctxt ) + { + rc = -ENOMEM; + goto done; + } + clear_page(v->arch.pv_vcpu.trap_ctxt); + } + else + v->arch.pv_vcpu.trap_ctxt = (void *)v + PAGE_SIZE - + 256 * sizeof(*v->arch.pv_vcpu.trap_ctxt); + + /* PV guests by default have a 100Hz ticker. */ v->periodic_period = MILLISECS(10); - /* PV guests get an emulated PIT too for video BIOSes to use. */ - if ( !is_idle_domain(d) && (v->vcpu_id == 0) ) - pit_init(v, cpu_khz); + /* PV guests get an emulated PIT too for video BIOSes to use. */ + if ( v->vcpu_id == 0 ) + pit_init(v, cpu_khz); + } v->arch.schedule_tail = continue_nonidle_domain; v->arch.ctxt_switch_from = paravirt_ctxt_switch_from; @@ -395,12 +429,19 @@ int vcpu_initialise(struct vcpu *v) v->arch.cr3 = __pa(idle_pg_table); } - v->arch.guest_context.ctrlreg[4] = - real_cr4_to_pv_guest_cr4(mmu_cr4_features); + v->arch.pv_vcpu.ctrlreg[4] = real_cr4_to_pv_guest_cr4(mmu_cr4_features); rc = is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0; + done: if ( rc ) - xsave_free_save_area(v); + { + if ( v->arch.xsave_area ) + xsave_free_save_area(v); + else + xfree(v->arch.fpu_ctxt); + if ( !is_hvm_domain(d) && standalone_trap_ctxt(v) ) + free_xenheap_page(v->arch.pv_vcpu.trap_ctxt); + } return rc; } @@ -410,10 +451,15 @@ void vcpu_destroy(struct vcpu *v) if ( is_pv_32on64_vcpu(v) ) release_compat_l4(v); - xsave_free_save_area(v); + if ( v->arch.xsave_area ) + xsave_free_save_area(v); + else + xfree(v->arch.fpu_ctxt); if ( is_hvm_vcpu(v) ) hvm_vcpu_destroy(v); + else if ( standalone_trap_ctxt(v) ) + free_xenheap_page(v->arch.pv_vcpu.trap_ctxt); } int arch_domain_create(struct domain *d, unsigned int domcr_flags) @@ -648,14 +694,29 @@ unsigned long pv_guest_cr4_fixup(const s return (hv_cr4 & hv_cr4_mask) | (guest_cr4 & ~hv_cr4_mask); } -/* This is called by arch_final_setup_guest and do_boot_vcpu */ +#ifdef CONFIG_COMPAT +#define xen_vcpu_guest_context vcpu_guest_context +#define fpu_ctxt fpu_ctxt.x +CHECK_FIELD_(struct, vcpu_guest_context, fpu_ctxt); +#undef fpu_ctxt +#undef xen_vcpu_guest_context +#endif + +/* + * This is called by do_domctl(XEN_DOMCTL_setvcpucontext, ...), boot_vcpu(), + * and hvm_load_cpu_ctxt(). + * + * Note that for a HVM guest NULL may be passed for the context pointer, + * meaning "use current values". + */ int arch_set_info_guest( struct vcpu *v, vcpu_guest_context_u c) { struct domain *d = v->domain; unsigned long cr3_pfn = INVALID_MFN; unsigned long flags, cr4; - int i, rc = 0, compat; + unsigned int i; + int rc = 0, compat; /* The context is a compat-mode one if the target domain is compat-mode; * we expect the tools to DTRT even in compat-mode callers. */ @@ -666,7 +727,7 @@ int arch_set_info_guest( #else #define c(fld) (c.nat->fld) #endif - flags = c(flags); + flags = c.nat ? c(flags) : v->arch.vgc_flags; if ( !is_hvm_vcpu(v) ) { @@ -720,14 +781,32 @@ int arch_set_info_guest( if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) v->arch.flags |= TF_kernel_mode; - if ( !compat ) - memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat)); + v->arch.vgc_flags = flags; + + if ( c.nat ) + { + memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt)); + if ( !compat ) + { + memcpy(&v->arch.user_regs, &c.nat->user_regs, sizeof(c.nat->user_regs)); + if ( !is_hvm_vcpu(v) ) + memcpy(v->arch.pv_vcpu.trap_ctxt, c.nat->trap_ctxt, + sizeof(c.nat->trap_ctxt)); + } #ifdef CONFIG_COMPAT - else - XLAT_vcpu_guest_context(&v->arch.guest_context, c.cmp); + else + { + XLAT_cpu_user_regs(&v->arch.user_regs, &c.cmp->user_regs); + for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); ++i ) + XLAT_trap_info(v->arch.pv_vcpu.trap_ctxt + i, + c.cmp->trap_ctxt + i); + } #endif + for ( i = 0; i < ARRAY_SIZE(v->arch.debugreg); ++i ) + v->arch.debugreg[i] = c(debugreg[i]); + } - v->arch.guest_context.user_regs.eflags |= 2; + v->arch.user_regs.eflags |= 2; if ( is_hvm_vcpu(v) ) { @@ -735,25 +814,71 @@ int arch_set_info_guest( goto out; } + if ( !v->is_initialised ) + { + v->arch.pv_vcpu.ldt_base = c(ldt_base); + v->arch.pv_vcpu.ldt_ents = c(ldt_ents); + } + else + { + bool_t fail = v->arch.pv_vcpu.ctrlreg[3] != c(ctrlreg[3]); + +#ifdef CONFIG_X86_64 + fail |= v->arch.pv_vcpu.ctrlreg[1] != c(ctrlreg[1]); +#endif + + for ( i = 0; i < ARRAY_SIZE(v->arch.pv_vcpu.gdt_frames); ++i ) + fail |= v->arch.pv_vcpu.gdt_frames[i] != c(gdt_frames[i]); + fail |= v->arch.pv_vcpu.gdt_ents != c(gdt_ents); + + fail |= v->arch.pv_vcpu.ldt_base != c(ldt_base); + fail |= v->arch.pv_vcpu.ldt_ents != c(ldt_ents); + + if ( fail ) + return -EOPNOTSUPP; + } + + v->arch.pv_vcpu.kernel_ss = c(kernel_ss); + v->arch.pv_vcpu.kernel_sp = c(kernel_sp); + for ( i = 0; i < ARRAY_SIZE(v->arch.pv_vcpu.ctrlreg); ++i ) + v->arch.pv_vcpu.ctrlreg[i] = c(ctrlreg[i]); + + v->arch.pv_vcpu.event_callback_eip = c(event_callback_eip); + v->arch.pv_vcpu.failsafe_callback_eip = c(failsafe_callback_eip); +#ifdef CONFIG_X86_64 + if ( !compat ) + { + v->arch.pv_vcpu.syscall_callback_eip = c.nat->syscall_callback_eip; + v->arch.pv_vcpu.fs_base = c.nat->fs_base; + v->arch.pv_vcpu.gs_base_kernel = c.nat->gs_base_kernel; + v->arch.pv_vcpu.gs_base_user = c.nat->gs_base_user; + } + else +#endif + { + v->arch.pv_vcpu.event_callback_cs = c(event_callback_cs); + v->arch.pv_vcpu.failsafe_callback_cs = c(failsafe_callback_cs); + } + v->arch.pv_vcpu.vm_assist = c(vm_assist); + /* Only CR0.TS is modifiable by guest or admin. */ - v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS; - v->arch.guest_context.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS; + v->arch.pv_vcpu.ctrlreg[0] &= X86_CR0_TS; + v->arch.pv_vcpu.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS; init_int80_direct_trap(v); /* IOPL privileges are virtualised. */ - v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3; - v->arch.guest_context.user_regs.eflags &= ~X86_EFLAGS_IOPL; + v->arch.iopl = (v->arch.user_regs.eflags >> 12) & 3; + v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL; /* Ensure real hardware interrupts are enabled. */ - v->arch.guest_context.user_regs.eflags |= X86_EFLAGS_IF; + v->arch.user_regs.eflags |= X86_EFLAGS_IF; - cr4 = v->arch.guest_context.ctrlreg[4]; - v->arch.guest_context.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(v, cr4) : + cr4 = v->arch.pv_vcpu.ctrlreg[4]; + v->arch.pv_vcpu.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(v, cr4) : real_cr4_to_pv_guest_cr4(mmu_cr4_features); - memset(v->arch.guest_context.debugreg, 0, - sizeof(v->arch.guest_context.debugreg)); + memset(v->arch.debugreg, 0, sizeof(v->arch.debugreg)); for ( i = 0; i < 8; i++ ) (void)set_debugreg(v, i, c(debugreg[i])); @@ -768,10 +893,10 @@ int arch_set_info_guest( #ifdef CONFIG_COMPAT else { - unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)]; - unsigned int i, n = (c.cmp->gdt_ents + 511) / 512; + unsigned long gdt_frames[ARRAY_SIZE(v->arch.pv_vcpu.gdt_frames)]; + unsigned int n = (c.cmp->gdt_ents + 511) / 512; - if ( n > ARRAY_SIZE(c.cmp->gdt_frames) ) + if ( n > ARRAY_SIZE(v->arch.pv_vcpu.gdt_frames) ) return -EINVAL; for ( i = 0; i < n; ++i ) gdt_frames[i] = c.cmp->gdt_frames[i]; @@ -1103,7 +1228,7 @@ static DEFINE_PER_CPU(unsigned int, dirt static void load_segments(struct vcpu *n) { - struct vcpu_guest_context *nctxt = &n->arch.guest_context; + struct cpu_user_regs *uregs = &n->arch.user_regs; int all_segs_okay = 1; unsigned int dirty_segment_mask, cpu = smp_processor_id(); @@ -1112,46 +1237,46 @@ static void load_segments(struct vcpu *n per_cpu(dirty_segment_mask, cpu) = 0; /* Either selector != 0 ==> reload. */ - if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) ) - all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds); + if ( unlikely((dirty_segment_mask & DIRTY_DS) | uregs->ds) ) + all_segs_okay &= loadsegment(ds, uregs->ds); /* Either selector != 0 ==> reload. */ - if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) ) - all_segs_okay &= loadsegment(es, nctxt->user_regs.es); + if ( unlikely((dirty_segment_mask & DIRTY_ES) | uregs->es) ) + all_segs_okay &= loadsegment(es, uregs->es); /* * Either selector != 0 ==> reload. * Also reload to reset FS_BASE if it was non-zero. */ if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) | - nctxt->user_regs.fs) ) - all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs); + uregs->fs) ) + all_segs_okay &= loadsegment(fs, uregs->fs); /* * Either selector != 0 ==> reload. * Also reload to reset GS_BASE if it was non-zero. */ if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) | - nctxt->user_regs.gs) ) + uregs->gs) ) { /* Reset GS_BASE with user %gs? */ - if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user ) - all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs); + if ( (dirty_segment_mask & DIRTY_GS) || !n->arch.pv_vcpu.gs_base_user ) + all_segs_okay &= loadsegment(gs, uregs->gs); } if ( !is_pv_32on64_domain(n->domain) ) { /* This can only be non-zero if selector is NULL. */ - if ( nctxt->fs_base ) - wrmsrl(MSR_FS_BASE, nctxt->fs_base); + if ( n->arch.pv_vcpu.fs_base ) + wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base); /* Most kernels have non-zero GS base, so don't bother testing. */ /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ - wrmsrl(MSR_SHADOW_GS_BASE, nctxt->gs_base_kernel); + wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel); /* This can only be non-zero if selector is NULL. */ - if ( nctxt->gs_base_user ) - wrmsrl(MSR_GS_BASE, nctxt->gs_base_user); + if ( n->arch.pv_vcpu.gs_base_user ) + wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user); /* If in kernel mode then switch the GS bases around. */ if ( (n->arch.flags & TF_kernel_mode) ) @@ -1160,18 +1285,19 @@ static void load_segments(struct vcpu *n if ( unlikely(!all_segs_okay) ) { + struct pv_vcpu *pv = &n->arch.pv_vcpu; struct cpu_user_regs *regs = guest_cpu_user_regs(); unsigned long *rsp = (n->arch.flags & TF_kernel_mode) ? (unsigned long *)regs->rsp : - (unsigned long *)nctxt->kernel_sp; + (unsigned long *)pv->kernel_sp; unsigned long cs_and_mask, rflags; if ( is_pv_32on64_domain(n->domain) ) { unsigned int *esp = ring_1(regs) ? (unsigned int *)regs->rsp : - (unsigned int *)nctxt->kernel_sp; + (unsigned int *)pv->kernel_sp; unsigned int cs_and_mask, eflags; int ret = 0; @@ -1193,18 +1319,17 @@ static void load_segments(struct vcpu *n put_user(eflags, esp-1) | put_user(cs_and_mask, esp-2) | put_user(regs->_eip, esp-3) | - put_user(nctxt->user_regs.gs, esp-4) | - put_user(nctxt->user_regs.fs, esp-5) | - put_user(nctxt->user_regs.es, esp-6) | - put_user(nctxt->user_regs.ds, esp-7) ) + put_user(uregs->gs, esp-4) | + put_user(uregs->fs, esp-5) | + put_user(uregs->es, esp-6) | + put_user(uregs->ds, esp-7) ) { gdprintk(XENLOG_ERR, "Error while creating compat " "failsafe callback frame.\n"); domain_crash(n->domain); } - if ( test_bit(_VGCF_failsafe_disables_events, - &n->arch.guest_context.flags) ) + if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) ) vcpu_info(n, evtchn_upcall_mask) = 1; regs->entry_vector = TRAP_syscall; @@ -1212,7 +1337,7 @@ static void load_segments(struct vcpu *n regs->ss = FLAT_COMPAT_KERNEL_SS; regs->_esp = (unsigned long)(esp-7); regs->cs = FLAT_COMPAT_KERNEL_CS; - regs->_eip = nctxt->failsafe_callback_eip; + regs->_eip = pv->failsafe_callback_eip; return; } @@ -1234,10 +1359,10 @@ static void load_segments(struct vcpu *n put_user(rflags, rsp- 3) | put_user(cs_and_mask, rsp- 4) | put_user(regs->rip, rsp- 5) | - put_user(nctxt->user_regs.gs, rsp- 6) | - put_user(nctxt->user_regs.fs, rsp- 7) | - put_user(nctxt->user_regs.es, rsp- 8) | - put_user(nctxt->user_regs.ds, rsp- 9) | + put_user(uregs->gs, rsp- 6) | + put_user(uregs->fs, rsp- 7) | + put_user(uregs->es, rsp- 8) | + put_user(uregs->ds, rsp- 9) | put_user(regs->r11, rsp-10) | put_user(regs->rcx, rsp-11) ) { @@ -1246,8 +1371,7 @@ static void load_segments(struct vcpu *n domain_crash(n->domain); } - if ( test_bit(_VGCF_failsafe_disables_events, - &n->arch.guest_context.flags) ) + if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) ) vcpu_info(n, evtchn_upcall_mask) = 1; regs->entry_vector = TRAP_syscall; @@ -1256,14 +1380,13 @@ static void load_segments(struct vcpu *n regs->ss = FLAT_KERNEL_SS; regs->rsp = (unsigned long)(rsp-11); regs->cs = FLAT_KERNEL_CS; - regs->rip = nctxt->failsafe_callback_eip; + regs->rip = pv->failsafe_callback_eip; } } static void save_segments(struct vcpu *v) { - struct vcpu_guest_context *ctxt = &v->arch.guest_context; - struct cpu_user_regs *regs = &ctxt->user_regs; + struct cpu_user_regs *regs = &v->arch.user_regs; unsigned int dirty_segment_mask = 0; regs->ds = read_segment_register(ds); @@ -1280,9 +1403,9 @@ static void save_segments(struct vcpu *v if ( regs->fs || is_pv_32on64_domain(v->domain) ) { dirty_segment_mask |= DIRTY_FS; - ctxt->fs_base = 0; /* != 0 selector kills fs_base */ + v->arch.pv_vcpu.fs_base = 0; /* != 0 selector kills fs_base */ } - else if ( ctxt->fs_base ) + else if ( v->arch.pv_vcpu.fs_base ) { dirty_segment_mask |= DIRTY_FS_BASE; } @@ -1290,9 +1413,9 @@ static void save_segments(struct vcpu *v if ( regs->gs || is_pv_32on64_domain(v->domain) ) { dirty_segment_mask |= DIRTY_GS; - ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */ + v->arch.pv_vcpu.gs_base_user = 0; /* != 0 selector kills gs_base_user */ } - else if ( ctxt->gs_base_user ) + else if ( v->arch.pv_vcpu.gs_base_user ) { dirty_segment_mask |= DIRTY_GS_BASE_USER; } @@ -1310,8 +1433,8 @@ static void save_segments(struct vcpu *v static inline void switch_kernel_stack(struct vcpu *v) { struct tss_struct *tss = &this_cpu(init_tss); - tss->esp1 = v->arch.guest_context.kernel_sp; - tss->ss1 = v->arch.guest_context.kernel_ss; + tss->esp1 = v->arch.pv_vcpu.kernel_sp; + tss->ss1 = v->arch.pv_vcpu.kernel_ss; } #endif /* __i386__ */ @@ -1326,7 +1449,7 @@ static void paravirt_ctxt_switch_from(st * inside Xen, before we get a chance to reload DR7, and this cannot always * safely be handled. */ - if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) + if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) write_debugreg(7, 0); } @@ -1341,14 +1464,14 @@ static void paravirt_ctxt_switch_to(stru if ( unlikely(cr4 != read_cr4()) ) write_cr4(cr4); - if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) + if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) { - write_debugreg(0, v->arch.guest_context.debugreg[0]); - write_debugreg(1, v->arch.guest_context.debugreg[1]); - write_debugreg(2, v->arch.guest_context.debugreg[2]); - write_debugreg(3, v->arch.guest_context.debugreg[3]); - write_debugreg(6, v->arch.guest_context.debugreg[6]); - write_debugreg(7, v->arch.guest_context.debugreg[7]); + write_debugreg(0, v->arch.debugreg[0]); + write_debugreg(1, v->arch.debugreg[1]); + write_debugreg(2, v->arch.debugreg[2]); + write_debugreg(3, v->arch.debugreg[3]); + write_debugreg(6, v->arch.debugreg[6]); + write_debugreg(7, v->arch.debugreg[7]); } if ( (v->domain->arch.tsc_mode == TSC_MODE_PVRDTSCP) && @@ -1395,9 +1518,7 @@ static void __context_switch(void) if ( !is_idle_vcpu(p) ) { - memcpy(&p->arch.guest_context.user_regs, - stack_regs, - CTXT_SWITCH_STACK_BYTES); + memcpy(&p->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES); save_init_fpu(p); p->arch.ctxt_switch_from(p); } @@ -1413,9 +1534,7 @@ static void __context_switch(void) if ( !is_idle_vcpu(n) ) { - memcpy(stack_regs, - &n->arch.guest_context.user_regs, - CTXT_SWITCH_STACK_BYTES); + memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES); if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() ) set_xcr0(n->arch.xcr0); n->arch.ctxt_switch_to(n); @@ -1931,25 +2050,29 @@ int domain_relinquish_resources(struct d /* Tear down paging-assistance stuff. */ paging_teardown(d); - for_each_vcpu ( d, v ) + if ( !is_hvm_domain(d) ) { - /* Drop the in-use references to page-table bases. */ - vcpu_destroy_pagetables(v); + for_each_vcpu ( d, v ) + { + /* Drop the in-use references to page-table bases. */ + vcpu_destroy_pagetables(v); - /* - * Relinquish GDT mappings. No need for explicit unmapping of the - * LDT as it automatically gets squashed with the guest mappings. - */ - destroy_gdt(v); + /* + * Relinquish GDT mappings. No need for explicit unmapping of + * the LDT as it automatically gets squashed with the guest + * mappings. + */ + destroy_gdt(v); - unmap_vcpu_info(v); - } + unmap_vcpu_info(v); + } - if ( d->arch.pirq_eoi_map != NULL ) - { - unmap_domain_page_global(d->arch.pirq_eoi_map); - put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn)); - d->arch.pirq_eoi_map = NULL; + if ( d->arch.pirq_eoi_map != NULL ) + { + unmap_domain_page_global(d->arch.pirq_eoi_map); + put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn)); + d->arch.pirq_eoi_map = NULL; + } } d->arch.relmem = RELMEM_xen; --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -731,8 +731,8 @@ int __init construct_dom0( if ( is_pv_32on64_domain(d) ) { - v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS; - v->arch.guest_context.event_callback_cs = FLAT_COMPAT_KERNEL_CS; + v->arch.pv_vcpu.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS; + v->arch.pv_vcpu.event_callback_cs = FLAT_COMPAT_KERNEL_CS; } /* WARNING: The new domain must have its 'processor' field filled in! */ @@ -1160,7 +1160,7 @@ int __init construct_dom0( * ESI = start_info * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ - regs = &v->arch.guest_context.user_regs; + regs = &v->arch.user_regs; regs->ds = regs->es = regs->fs = regs->gs = !is_pv_32on64_domain(d) ? FLAT_KERNEL_DS : FLAT_COMPAT_KERNEL_DS; regs->ss = (!is_pv_32on64_domain(d) ? @@ -1178,12 +1178,12 @@ int __init construct_dom0( if ( supervisor_mode_kernel ) { - v->arch.guest_context.kernel_ss &= ~3; - v->arch.guest_context.user_regs.ss &= ~3; - v->arch.guest_context.user_regs.es &= ~3; - v->arch.guest_context.user_regs.ds &= ~3; - v->arch.guest_context.user_regs.fs &= ~3; - v->arch.guest_context.user_regs.gs &= ~3; + v->arch.pv_vcpu.kernel_ss &= ~3; + v->arch.user_regs.ss &= ~3; + v->arch.user_regs.es &= ~3; + v->arch.user_regs.ds &= ~3; + v->arch.user_regs.fs &= ~3; + v->arch.user_regs.gs &= ~3; printk("Dom0 runs in ring 0 (supervisor mode)\n"); if ( !test_bit(XENFEAT_supervisor_mode_kernel, parms.f_supported) ) --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -1469,7 +1469,7 @@ long arch_do_domctl( } offset += sizeof(v->arch.xcr0_accum); if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer, - offset, v->arch.xsave_area, + offset, (void *)v->arch.xsave_area, xsave_cntxt_size) ) { ret = -EFAULT; @@ -1594,36 +1594,56 @@ long arch_do_domctl( return ret; } +#ifdef CONFIG_COMPAT +#define xen_vcpu_guest_context vcpu_guest_context +#define fpu_ctxt fpu_ctxt.x +CHECK_FIELD_(struct, vcpu_guest_context, fpu_ctxt); +#undef fpu_ctxt +#undef xen_vcpu_guest_context +#endif + void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { + unsigned int i; + bool_t compat = is_pv_32on64_domain(v->domain); #ifdef CONFIG_COMPAT -#define c(fld) (!is_pv_32on64_domain(v->domain) ? (c.nat->fld) : (c.cmp->fld)) +#define c(fld) (!compat ? (c.nat->fld) : (c.cmp->fld)) #else #define c(fld) (c.nat->fld) #endif - /* Fill legacy context from xsave area first */ - if ( xsave_enabled(v) ) - memcpy(v->arch.xsave_area, &v->arch.guest_context.fpu_ctxt, - sizeof(v->arch.guest_context.fpu_ctxt)); - - if ( !is_pv_32on64_domain(v->domain) ) - memcpy(c.nat, &v->arch.guest_context, sizeof(*c.nat)); -#ifdef CONFIG_COMPAT - else - XLAT_vcpu_guest_context(c.cmp, &v->arch.guest_context); -#endif - - c(flags &= ~(VGCF_i387_valid|VGCF_in_kernel)); + if ( is_hvm_vcpu(v) ) + memset(c.nat, 0, sizeof(*c.nat)); + memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt)); + c(flags = v->arch.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel)); if ( v->fpu_initialised ) c(flags |= VGCF_i387_valid); if ( !test_bit(_VPF_down, &v->pause_flags) ) c(flags |= VGCF_online); + if ( !compat ) + { + memcpy(&c.nat->user_regs, &v->arch.user_regs, sizeof(c.nat->user_regs)); + if ( !is_hvm_vcpu(v) ) + memcpy(c.nat->trap_ctxt, v->arch.pv_vcpu.trap_ctxt, + sizeof(c.nat->trap_ctxt)); + } +#ifdef CONFIG_COMPAT + else + { + XLAT_cpu_user_regs(&c.cmp->user_regs, &v->arch.user_regs); + for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); ++i ) + XLAT_trap_info(c.cmp->trap_ctxt + i, + v->arch.pv_vcpu.trap_ctxt + i); + } +#endif + + for ( i = 0; i < ARRAY_SIZE(v->arch.debugreg); ++i ) + c(debugreg[i] = v->arch.debugreg[i]); if ( is_hvm_vcpu(v) ) { struct segment_register sreg; - memset(c.nat->ctrlreg, 0, sizeof(c.nat->ctrlreg)); + c.nat->ctrlreg[0] = v->arch.hvm_vcpu.guest_cr[0]; c.nat->ctrlreg[2] = v->arch.hvm_vcpu.guest_cr[2]; c.nat->ctrlreg[3] = v->arch.hvm_vcpu.guest_cr[3]; @@ -1643,6 +1663,39 @@ void arch_get_info_guest(struct vcpu *v, } else { + c(ldt_base = v->arch.pv_vcpu.ldt_base); + c(ldt_ents = v->arch.pv_vcpu.ldt_ents); + for ( i = 0; i < ARRAY_SIZE(v->arch.pv_vcpu.gdt_frames); ++i ) + c(gdt_frames[i] = v->arch.pv_vcpu.gdt_frames[i]); +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(ARRAY_SIZE(c.nat->gdt_frames) != + ARRAY_SIZE(c.cmp->gdt_frames)); +#endif + for ( ; i < ARRAY_SIZE(c.nat->gdt_frames); ++i ) + c(gdt_frames[i] = 0); + c(gdt_ents = v->arch.pv_vcpu.gdt_ents); + c(kernel_ss = v->arch.pv_vcpu.kernel_ss); + c(kernel_sp = v->arch.pv_vcpu.kernel_sp); + for ( i = 0; i < ARRAY_SIZE(v->arch.pv_vcpu.ctrlreg); ++i ) + c(ctrlreg[i] = v->arch.pv_vcpu.ctrlreg[i]); + c(event_callback_eip = v->arch.pv_vcpu.event_callback_eip); + c(failsafe_callback_eip = v->arch.pv_vcpu.failsafe_callback_eip); +#ifdef CONFIG_X86_64 + if ( !compat ) + { + c.nat->syscall_callback_eip = v->arch.pv_vcpu.syscall_callback_eip; + c.nat->fs_base = v->arch.pv_vcpu.fs_base; + c.nat->gs_base_kernel = v->arch.pv_vcpu.gs_base_kernel; + c.nat->gs_base_user = v->arch.pv_vcpu.gs_base_user; + } + else +#endif + { + c(event_callback_cs = v->arch.pv_vcpu.event_callback_cs); + c(failsafe_callback_cs = v->arch.pv_vcpu.failsafe_callback_cs); + } + c(vm_assist = v->arch.pv_vcpu.vm_assist); + /* IOPL privileges are virtualised: merge back into returned eflags. */ BUG_ON((c(user_regs.eflags) & X86_EFLAGS_IOPL) != 0); c(user_regs.eflags |= v->arch.iopl << 12); @@ -1673,7 +1726,7 @@ void arch_get_info_guest(struct vcpu *v, } #endif - if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) ) + if ( guest_kernel_mode(v, &v->arch.user_regs) ) c(flags |= VGCF_in_kernel); } --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -516,7 +516,6 @@ static int hvm_save_cpu_ctxt(struct doma struct vcpu *v; struct hvm_hw_cpu ctxt; struct segment_register seg; - struct vcpu_guest_context *vc; for_each_vcpu ( d, v ) { @@ -586,39 +585,37 @@ static int hvm_save_cpu_ctxt(struct doma ctxt.ldtr_base = seg.base; ctxt.ldtr_arbytes = seg.attr.bytes; - vc = &v->arch.guest_context; - if ( v->fpu_initialised ) - memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs)); + memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs)); else memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs)); - ctxt.rax = vc->user_regs.eax; - ctxt.rbx = vc->user_regs.ebx; - ctxt.rcx = vc->user_regs.ecx; - ctxt.rdx = vc->user_regs.edx; - ctxt.rbp = vc->user_regs.ebp; - ctxt.rsi = vc->user_regs.esi; - ctxt.rdi = vc->user_regs.edi; - ctxt.rsp = vc->user_regs.esp; - ctxt.rip = vc->user_regs.eip; - ctxt.rflags = vc->user_regs.eflags; + ctxt.rax = v->arch.user_regs.eax; + ctxt.rbx = v->arch.user_regs.ebx; + ctxt.rcx = v->arch.user_regs.ecx; + ctxt.rdx = v->arch.user_regs.edx; + ctxt.rbp = v->arch.user_regs.ebp; + ctxt.rsi = v->arch.user_regs.esi; + ctxt.rdi = v->arch.user_regs.edi; + ctxt.rsp = v->arch.user_regs.esp; + ctxt.rip = v->arch.user_regs.eip; + ctxt.rflags = v->arch.user_regs.eflags; #ifdef __x86_64__ - ctxt.r8 = vc->user_regs.r8; - ctxt.r9 = vc->user_regs.r9; - ctxt.r10 = vc->user_regs.r10; - ctxt.r11 = vc->user_regs.r11; - ctxt.r12 = vc->user_regs.r12; - ctxt.r13 = vc->user_regs.r13; - ctxt.r14 = vc->user_regs.r14; - ctxt.r15 = vc->user_regs.r15; + ctxt.r8 = v->arch.user_regs.r8; + ctxt.r9 = v->arch.user_regs.r9; + ctxt.r10 = v->arch.user_regs.r10; + ctxt.r11 = v->arch.user_regs.r11; + ctxt.r12 = v->arch.user_regs.r12; + ctxt.r13 = v->arch.user_regs.r13; + ctxt.r14 = v->arch.user_regs.r14; + ctxt.r15 = v->arch.user_regs.r15; #endif - ctxt.dr0 = vc->debugreg[0]; - ctxt.dr1 = vc->debugreg[1]; - ctxt.dr2 = vc->debugreg[2]; - ctxt.dr3 = vc->debugreg[3]; - ctxt.dr6 = vc->debugreg[6]; - ctxt.dr7 = vc->debugreg[7]; + ctxt.dr0 = v->arch.debugreg[0]; + ctxt.dr1 = v->arch.debugreg[1]; + ctxt.dr2 = v->arch.debugreg[2]; + ctxt.dr3 = v->arch.debugreg[3]; + ctxt.dr6 = v->arch.debugreg[6]; + ctxt.dr7 = v->arch.debugreg[7]; if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 ) return 1; @@ -643,7 +640,6 @@ static int hvm_load_cpu_ctxt(struct doma struct vcpu *v; struct hvm_hw_cpu ctxt; struct segment_register seg; - struct vcpu_guest_context *vc; /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); @@ -652,13 +648,12 @@ static int hvm_load_cpu_ctxt(struct doma gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid); return -EINVAL; } - vc = &v->arch.guest_context; /* Need to init this vcpu before loading its contents */ rc = 0; domain_lock(d); if ( !v->is_initialised ) - rc = boot_vcpu(d, vcpuid, vc); + rc = boot_vcpu(d, vcpuid, NULL); domain_unlock(d); if ( rc != 0 ) return rc; @@ -770,8 +765,6 @@ static int hvm_load_cpu_ctxt(struct doma seg.attr.bytes = ctxt.ldtr_arbytes; hvm_set_segment_register(v, x86_seg_ldtr, &seg); - memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); - /* In case xsave-absent save file is restored on a xsave-capable host */ if ( xsave_enabled(v) ) { @@ -782,35 +775,37 @@ static int hvm_load_cpu_ctxt(struct doma v->arch.xcr0_accum = XSTATE_FP_SSE; v->arch.xcr0 = XSTATE_FP_SSE; } + else + memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); - vc->user_regs.eax = ctxt.rax; - vc->user_regs.ebx = ctxt.rbx; - vc->user_regs.ecx = ctxt.rcx; - vc->user_regs.edx = ctxt.rdx; - vc->user_regs.ebp = ctxt.rbp; - vc->user_regs.esi = ctxt.rsi; - vc->user_regs.edi = ctxt.rdi; - vc->user_regs.esp = ctxt.rsp; - vc->user_regs.eip = ctxt.rip; - vc->user_regs.eflags = ctxt.rflags | 2; + v->arch.user_regs.eax = ctxt.rax; + v->arch.user_regs.ebx = ctxt.rbx; + v->arch.user_regs.ecx = ctxt.rcx; + v->arch.user_regs.edx = ctxt.rdx; + v->arch.user_regs.ebp = ctxt.rbp; + v->arch.user_regs.esi = ctxt.rsi; + v->arch.user_regs.edi = ctxt.rdi; + v->arch.user_regs.esp = ctxt.rsp; + v->arch.user_regs.eip = ctxt.rip; + v->arch.user_regs.eflags = ctxt.rflags | 2; #ifdef __x86_64__ - vc->user_regs.r8 = ctxt.r8; - vc->user_regs.r9 = ctxt.r9; - vc->user_regs.r10 = ctxt.r10; - vc->user_regs.r11 = ctxt.r11; - vc->user_regs.r12 = ctxt.r12; - vc->user_regs.r13 = ctxt.r13; - vc->user_regs.r14 = ctxt.r14; - vc->user_regs.r15 = ctxt.r15; + v->arch.user_regs.r8 = ctxt.r8; + v->arch.user_regs.r9 = ctxt.r9; + v->arch.user_regs.r10 = ctxt.r10; + v->arch.user_regs.r11 = ctxt.r11; + v->arch.user_regs.r12 = ctxt.r12; + v->arch.user_regs.r13 = ctxt.r13; + v->arch.user_regs.r14 = ctxt.r14; + v->arch.user_regs.r15 = ctxt.r15; #endif - vc->debugreg[0] = ctxt.dr0; - vc->debugreg[1] = ctxt.dr1; - vc->debugreg[2] = ctxt.dr2; - vc->debugreg[3] = ctxt.dr3; - vc->debugreg[6] = ctxt.dr6; - vc->debugreg[7] = ctxt.dr7; + v->arch.debugreg[0] = ctxt.dr0; + v->arch.debugreg[1] = ctxt.dr1; + v->arch.debugreg[2] = ctxt.dr2; + v->arch.debugreg[3] = ctxt.dr3; + v->arch.debugreg[6] = ctxt.dr6; + v->arch.debugreg[7] = ctxt.dr7; - vc->flags = VGCF_online; + v->arch.vgc_flags = VGCF_online; v->fpu_initialised = 1; /* Auxiliary processors should be woken immediately. */ @@ -975,7 +970,7 @@ int hvm_vcpu_initialise(struct vcpu *v) (void(*)(unsigned long))hvm_assert_evtchn_irq, (unsigned long)v); - v->arch.guest_context.user_regs.eflags = 2; + v->arch.user_regs.eflags = 2; if ( v->vcpu_id == 0 ) { @@ -2863,7 +2858,6 @@ static int hvmop_set_pci_intx_level( void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip) { struct domain *d = v->domain; - struct vcpu_guest_context *ctxt; struct segment_register reg; BUG_ON(vcpu_runnable(v)); @@ -2880,12 +2874,13 @@ void hvm_vcpu_reset_state(struct vcpu *v v->arch.guest_table = pagetable_null(); } - ctxt = &v->arch.guest_context; - memset(ctxt, 0, sizeof(*ctxt)); - ctxt->flags = VGCF_online; - ctxt->user_regs.eflags = 2; - ctxt->user_regs.edx = 0x00000f00; - ctxt->user_regs.eip = ip; + memset(v->arch.fpu_ctxt, 0, sizeof(v->arch.xsave_area->fpu_sse)); + v->arch.vgc_flags = VGCF_online; + memset(&v->arch.user_regs, 0, sizeof(v->arch.user_regs)); + v->arch.user_regs.eflags = 2; + v->arch.user_regs.edx = 0x00000f00; + v->arch.user_regs.eip = ip; + memset(&v->arch.debugreg, 0, sizeof(v->arch.debugreg)); v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET; hvm_update_guest_cr(v, 0); --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -117,12 +117,12 @@ static void svm_save_dr(struct vcpu *v) v->arch.hvm_vcpu.flag_dr_dirty = 0; vmcb_set_dr_intercepts(vmcb, ~0u); - v->arch.guest_context.debugreg[0] = read_debugreg(0); - v->arch.guest_context.debugreg[1] = read_debugreg(1); - v->arch.guest_context.debugreg[2] = read_debugreg(2); - v->arch.guest_context.debugreg[3] = read_debugreg(3); - v->arch.guest_context.debugreg[6] = vmcb_get_dr6(vmcb); - v->arch.guest_context.debugreg[7] = vmcb_get_dr7(vmcb); + v->arch.debugreg[0] = read_debugreg(0); + v->arch.debugreg[1] = read_debugreg(1); + v->arch.debugreg[2] = read_debugreg(2); + v->arch.debugreg[3] = read_debugreg(3); + v->arch.debugreg[6] = vmcb_get_dr6(vmcb); + v->arch.debugreg[7] = vmcb_get_dr7(vmcb); } static void __restore_debug_registers(struct vcpu *v) @@ -135,12 +135,12 @@ static void __restore_debug_registers(st v->arch.hvm_vcpu.flag_dr_dirty = 1; vmcb_set_dr_intercepts(vmcb, 0); - write_debugreg(0, v->arch.guest_context.debugreg[0]); - write_debugreg(1, v->arch.guest_context.debugreg[1]); - write_debugreg(2, v->arch.guest_context.debugreg[2]); - write_debugreg(3, v->arch.guest_context.debugreg[3]); - vmcb_set_dr6(vmcb, v->arch.guest_context.debugreg[6]); - vmcb_set_dr7(vmcb, v->arch.guest_context.debugreg[7]); + write_debugreg(0, v->arch.debugreg[0]); + write_debugreg(1, v->arch.debugreg[1]); + write_debugreg(2, v->arch.debugreg[2]); + write_debugreg(3, v->arch.debugreg[3]); + vmcb_set_dr6(vmcb, v->arch.debugreg[6]); + vmcb_set_dr7(vmcb, v->arch.debugreg[7]); } /* @@ -151,7 +151,7 @@ static void __restore_debug_registers(st */ static void svm_restore_dr(struct vcpu *v) { - if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) + if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) __restore_debug_registers(v); } --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -1128,7 +1128,7 @@ static void vmx_dump_sel2(char *name, ui void vmcs_dump_vcpu(struct vcpu *v) { - struct cpu_user_regs *regs = &v->arch.guest_context.user_regs; + struct cpu_user_regs *regs = &v->arch.user_regs; unsigned long long x; if ( v == current ) --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -132,7 +132,7 @@ static int vmx_vcpu_initialise(struct vc /* %eax == 1 signals full real-mode support to the guest loader. */ if ( v->vcpu_id == 0 ) - v->arch.guest_context.user_regs.eax = 1; + v->arch.user_regs.eax = 1; return 0; } @@ -400,13 +400,13 @@ static void vmx_save_dr(struct vcpu *v) v->arch.hvm_vmx.exec_control |= CPU_BASED_MOV_DR_EXITING; vmx_update_cpu_exec_control(v); - v->arch.guest_context.debugreg[0] = read_debugreg(0); - v->arch.guest_context.debugreg[1] = read_debugreg(1); - v->arch.guest_context.debugreg[2] = read_debugreg(2); - v->arch.guest_context.debugreg[3] = read_debugreg(3); - v->arch.guest_context.debugreg[6] = read_debugreg(6); + v->arch.debugreg[0] = read_debugreg(0); + v->arch.debugreg[1] = read_debugreg(1); + v->arch.debugreg[2] = read_debugreg(2); + v->arch.debugreg[3] = read_debugreg(3); + v->arch.debugreg[6] = read_debugreg(6); /* DR7 must be saved as it is used by vmx_restore_dr(). */ - v->arch.guest_context.debugreg[7] = __vmread(GUEST_DR7); + v->arch.debugreg[7] = __vmread(GUEST_DR7); } static void __restore_debug_registers(struct vcpu *v) @@ -416,11 +416,11 @@ static void __restore_debug_registers(st v->arch.hvm_vcpu.flag_dr_dirty = 1; - write_debugreg(0, v->arch.guest_context.debugreg[0]); - write_debugreg(1, v->arch.guest_context.debugreg[1]); - write_debugreg(2, v->arch.guest_context.debugreg[2]); - write_debugreg(3, v->arch.guest_context.debugreg[3]); - write_debugreg(6, v->arch.guest_context.debugreg[6]); + write_debugreg(0, v->arch.debugreg[0]); + write_debugreg(1, v->arch.debugreg[1]); + write_debugreg(2, v->arch.debugreg[2]); + write_debugreg(3, v->arch.debugreg[3]); + write_debugreg(6, v->arch.debugreg[6]); /* DR7 is loaded from the VMCS. */ } @@ -433,7 +433,7 @@ static void __restore_debug_registers(st static void vmx_restore_dr(struct vcpu *v) { /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */ - if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) + if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) __restore_debug_registers(v); } @@ -1352,7 +1352,7 @@ static void vmx_set_info_guest(struct vc vmx_vmcs_enter(v); - __vmwrite(GUEST_DR7, v->arch.guest_context.debugreg[7]); + __vmwrite(GUEST_DR7, v->arch.debugreg[7]); /* * If the interruptibility-state field indicates blocking by STI, @@ -1364,7 +1364,7 @@ static void vmx_set_info_guest(struct vc */ intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO); if ( v->domain->debugger_attached && - (v->arch.guest_context.user_regs.eflags & X86_EFLAGS_TF) && + (v->arch.user_regs.eflags & X86_EFLAGS_TF) && (intr_shadow & VMX_INTR_SHADOW_STI) ) { intr_shadow &= ~VMX_INTR_SHADOW_STI; --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -110,7 +110,7 @@ void save_init_fpu(struct vcpu *v) ASSERT(!is_idle_vcpu(v)); cr0 = read_cr0(); - fpu_ctxt = v->arch.guest_context.fpu_ctxt.x; + fpu_ctxt = v->arch.fpu_ctxt; /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ if ( cr0 & X86_CR0_TS ) @@ -176,7 +176,7 @@ void save_init_fpu(struct vcpu *v) static void restore_fpu(struct vcpu *v) { - char *fpu_ctxt = v->arch.guest_context.fpu_ctxt.x; + const char *fpu_ctxt = v->arch.fpu_ctxt; /* * FXRSTOR can fault if passed a corrupted data block. We handle this @@ -208,7 +208,7 @@ static void restore_fpu(struct vcpu *v) _ASM_EXTABLE(1b, 2b) : : "m" (*fpu_ctxt), - "i" (sizeof(v->arch.guest_context.fpu_ctxt)/4) + "i" (sizeof(v->arch.xsave_area->fpu_sse)/4) #ifdef __x86_64__ ,"cdaSDb" (fpu_ctxt) #endif @@ -216,7 +216,7 @@ static void restore_fpu(struct vcpu *v) } else { - asm volatile ( "frstor %0" : : "m" (v->arch.guest_context.fpu_ctxt) ); + asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) ); } } --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -646,7 +646,7 @@ int map_ldt_shadow_page(unsigned int off struct domain *d = v->domain; unsigned long gmfn, mfn; l1_pgentry_t l1e, nl1e; - unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT); + unsigned long gva = v->arch.pv_vcpu.ldt_base + (off << PAGE_SHIFT); int okay; BUG_ON(unlikely(in_irq())); @@ -3220,13 +3220,13 @@ int do_mmuext_op( okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); } - else if ( (curr->arch.guest_context.ldt_ents != ents) || - (curr->arch.guest_context.ldt_base != ptr) ) + else if ( (curr->arch.pv_vcpu.ldt_ents != ents) || + (curr->arch.pv_vcpu.ldt_base != ptr) ) { invalidate_shadow_ldt(curr, 0); flush_tlb_local(); - curr->arch.guest_context.ldt_base = ptr; - curr->arch.guest_context.ldt_ents = ents; + curr->arch.pv_vcpu.ldt_base = ptr; + curr->arch.pv_vcpu.ldt_ents = ents; load_LDT(curr); if ( ents != 0 ) (void)map_ldt_shadow_page(0); @@ -4416,13 +4416,13 @@ void destroy_gdt(struct vcpu *v) int i; unsigned long pfn; - v->arch.guest_context.gdt_ents = 0; + v->arch.pv_vcpu.gdt_ents = 0; for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ ) { if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 ) put_page_and_type(mfn_to_page(pfn)); l1e_write(&v->arch.perdomain_ptes[i], l1e_empty()); - v->arch.guest_context.gdt_frames[i] = 0; + v->arch.pv_vcpu.gdt_frames[i] = 0; } } @@ -4452,10 +4452,10 @@ long set_gdt(struct vcpu *v, destroy_gdt(v); /* Install the new GDT. */ - v->arch.guest_context.gdt_ents = entries; + v->arch.pv_vcpu.gdt_ents = entries; for ( i = 0; i < nr_pages; i++ ) { - v->arch.guest_context.gdt_frames[i] = frames[i]; + v->arch.pv_vcpu.gdt_frames[i] = frames[i]; l1e_write(&v->arch.perdomain_ptes[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); } --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -362,8 +362,8 @@ void vcpu_show_execution_state(struct vc vcpu_pause(v); /* acceptably dangerous */ vcpu_show_registers(v); - if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) ) - show_guest_stack(v, &v->arch.guest_context.user_regs); + if ( guest_kernel_mode(v, &v->arch.user_regs) ) + show_guest_stack(v, &v->arch.user_regs); vcpu_unpause(v); } @@ -430,7 +430,7 @@ static void do_guest_trap( trace_pv_trap(trapnr, regs->eip, use_error_code, regs->error_code); tb = &v->arch.trap_bounce; - ti = &v->arch.guest_context.trap_ctxt[trapnr]; + ti = &v->arch.pv_vcpu.trap_ctxt[trapnr]; tb->flags = TBF_EXCEPTION; tb->cs = ti->cs; @@ -458,9 +458,9 @@ static void instruction_done( regs->eflags &= ~X86_EFLAGS_RF; if ( bpmatch || (regs->eflags & X86_EFLAGS_TF) ) { - current->arch.guest_context.debugreg[6] |= bpmatch | 0xffff0ff0; + current->arch.debugreg[6] |= bpmatch | 0xffff0ff0; if ( regs->eflags & X86_EFLAGS_TF ) - current->arch.guest_context.debugreg[6] |= 0x4000; + current->arch.debugreg[6] |= 0x4000; do_guest_trap(TRAP_debug, regs, 0); } } @@ -471,20 +471,20 @@ static unsigned int check_guest_io_break unsigned int width, i, match = 0; unsigned long start; - if ( !(v->arch.guest_context.debugreg[5]) || - !(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) ) + if ( !(v->arch.debugreg[5]) || + !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ) return 0; for ( i = 0; i < 4; i++ ) { - if ( !(v->arch.guest_context.debugreg[5] & + if ( !(v->arch.debugreg[5] & (3 << (i * DR_ENABLE_SIZE))) ) continue; - start = v->arch.guest_context.debugreg[i]; + start = v->arch.debugreg[i]; width = 0; - switch ( (v->arch.guest_context.debugreg[7] >> + switch ( (v->arch.debugreg[7] >> (DR_CONTROL_SHIFT + i * DR_CONTROL_SIZE)) & 0xc ) { case DR_LEN_1: width = 1; break; @@ -1009,7 +1009,7 @@ void propagate_page_fault(unsigned long struct vcpu *v = current; struct trap_bounce *tb = &v->arch.trap_bounce; - v->arch.guest_context.ctrlreg[2] = addr; + v->arch.pv_vcpu.ctrlreg[2] = addr; arch_set_cr2(v, addr); /* Re-set error_code.user flag appropriately for the guest. */ @@ -1019,7 +1019,7 @@ void propagate_page_fault(unsigned long trace_pv_page_fault(addr, error_code); - ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault]; + ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_page_fault]; tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; tb->error_code = error_code; tb->cs = ti->cs; @@ -1073,7 +1073,7 @@ static int handle_gdt_ldt_mapping_fault( return 0; /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */ propagate_page_fault( - curr->arch.guest_context.ldt_base + offset, + curr->arch.pv_vcpu.ldt_base + offset, regs->error_code); } } @@ -1356,12 +1356,12 @@ long do_fpu_taskswitch(int set) if ( set ) { - v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS; + v->arch.pv_vcpu.ctrlreg[0] |= X86_CR0_TS; stts(); } else { - v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS; + v->arch.pv_vcpu.ctrlreg[0] &= ~X86_CR0_TS; if ( v->fpu_dirtied ) clts(); } @@ -1843,13 +1843,13 @@ static int emulate_privileged_op(struct data_base = 0UL; break; case lm_seg_fs: - data_base = v->arch.guest_context.fs_base; + data_base = v->arch.pv_vcpu.fs_base; break; case lm_seg_gs: if ( guest_kernel_mode(v, regs) ) - data_base = v->arch.guest_context.gs_base_kernel; + data_base = v->arch.pv_vcpu.gs_base_kernel; else - data_base = v->arch.guest_context.gs_base_user; + data_base = v->arch.pv_vcpu.gs_base_user; break; } } @@ -2052,7 +2052,7 @@ static int emulate_privileged_op(struct switch ( insn_fetch(u8, code_base, eip, code_limit) ) { case 0xf9: /* RDTSCP */ - if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) && + if ( (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) && !guest_kernel_mode(v, regs) ) goto fail; pv_soft_rdtsc(v, regs, 1); @@ -2062,7 +2062,7 @@ static int emulate_privileged_op(struct u64 new_xfeature = (u32)regs->eax | ((u64)regs->edx << 32); if ( lock || rep_prefix || opsize_prefix - || !(v->arch.guest_context.ctrlreg[4] & X86_CR4_OSXSAVE) ) + || !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) ) { do_guest_trap(TRAP_invalid_op, regs, 0); goto skip; @@ -2117,11 +2117,11 @@ static int emulate_privileged_op(struct { case 0: /* Read CR0 */ *reg = (read_cr0() & ~X86_CR0_TS) | - v->arch.guest_context.ctrlreg[0]; + v->arch.pv_vcpu.ctrlreg[0]; break; case 2: /* Read CR2 */ - *reg = v->arch.guest_context.ctrlreg[2]; + *reg = v->arch.pv_vcpu.ctrlreg[2]; break; case 3: /* Read CR3 */ @@ -2148,7 +2148,7 @@ static int emulate_privileged_op(struct break; case 4: /* Read CR4 */ - *reg = v->arch.guest_context.ctrlreg[4]; + *reg = v->arch.pv_vcpu.ctrlreg[4]; break; default: @@ -2190,7 +2190,7 @@ static int emulate_privileged_op(struct break; case 2: /* Write CR2 */ - v->arch.guest_context.ctrlreg[2] = *reg; + v->arch.pv_vcpu.ctrlreg[2] = *reg; arch_set_cr2(v, *reg); break; @@ -2208,7 +2208,7 @@ static int emulate_privileged_op(struct break; case 4: /* Write CR4 */ - v->arch.guest_context.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg); + v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg); write_cr4(pv_guest_cr4_to_real_cr4(v)); break; @@ -2240,21 +2240,21 @@ static int emulate_privileged_op(struct goto fail; if ( wrmsr_safe(MSR_FS_BASE, msr_content) ) goto fail; - v->arch.guest_context.fs_base = msr_content; + v->arch.pv_vcpu.fs_base = msr_content; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_GS_BASE, msr_content) ) goto fail; - v->arch.guest_context.gs_base_kernel = msr_content; + v->arch.pv_vcpu.gs_base_kernel = msr_content; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_SHADOW_GS_BASE, msr_content) ) goto fail; - v->arch.guest_context.gs_base_user = msr_content; + v->arch.pv_vcpu.gs_base_user = msr_content; break; #endif case MSR_K7_FID_VID_STATUS: @@ -2379,7 +2379,7 @@ static int emulate_privileged_op(struct } case 0x31: /* RDTSC */ - if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) && + if ( (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) && !guest_kernel_mode(v, regs) ) goto fail; if ( v->domain->arch.vtsc ) @@ -2395,20 +2395,20 @@ static int emulate_privileged_op(struct case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL; - regs->edx = v->arch.guest_context.fs_base >> 32; + regs->eax = v->arch.pv_vcpu.fs_base & 0xFFFFFFFFUL; + regs->edx = v->arch.pv_vcpu.fs_base >> 32; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL; - regs->edx = v->arch.guest_context.gs_base_kernel >> 32; + regs->eax = v->arch.pv_vcpu.gs_base_kernel & 0xFFFFFFFFUL; + regs->edx = v->arch.pv_vcpu.gs_base_kernel >> 32; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL; - regs->edx = v->arch.guest_context.gs_base_user >> 32; + regs->eax = v->arch.pv_vcpu.gs_base_user & 0xFFFFFFFFUL; + regs->edx = v->arch.pv_vcpu.gs_base_user >> 32; break; #endif case MSR_K7_FID_VID_CTL: @@ -2763,8 +2763,8 @@ static void emulate_gate_op(struct cpu_u do_guest_trap(TRAP_gp_fault, regs, 1); return; } - esp = v->arch.guest_context.kernel_sp; - ss = v->arch.guest_context.kernel_ss; + esp = v->arch.pv_vcpu.kernel_sp; + ss = v->arch.pv_vcpu.kernel_ss; if ( (ss & 3) != (sel & 3) || !read_descriptor(ss, v, regs, &base, &limit, &ar, 0) || ((ar >> 13) & 3) != (sel & 3) || @@ -2899,7 +2899,7 @@ asmlinkage void do_general_protection(st /* This fault must be due to instruction. */ const struct trap_info *ti; unsigned char vector = regs->error_code >> 3; - ti = &v->arch.guest_context.trap_ctxt[vector]; + ti = &v->arch.pv_vcpu.trap_ctxt[vector]; if ( permit_softint(TI_GET_DPL(ti), v, regs) ) { regs->eip += 2; @@ -3169,10 +3169,10 @@ asmlinkage void do_device_not_available( setup_fpu(curr); - if ( curr->arch.guest_context.ctrlreg[0] & X86_CR0_TS ) + if ( curr->arch.pv_vcpu.ctrlreg[0] & X86_CR0_TS ) { do_guest_trap(TRAP_no_device, regs, 0); - curr->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS; + curr->arch.pv_vcpu.ctrlreg[0] &= ~X86_CR0_TS; } else TRACE_0D(TRC_PV_MATH_STATE_RESTORE); @@ -3244,7 +3244,7 @@ asmlinkage void do_debug(struct cpu_user } /* Save debug status register where guest OS can peek at it */ - v->arch.guest_context.debugreg[6] = read_debugreg(6); + v->arch.debugreg[6] = read_debugreg(6); ler_enable(); do_guest_trap(TRAP_debug, regs, 0); @@ -3389,7 +3389,7 @@ long register_guest_nmi_callback(unsigne { struct vcpu *v = current; struct domain *d = v->domain; - struct trap_info *t = &v->arch.guest_context.trap_ctxt[TRAP_nmi]; + struct trap_info *t = &v->arch.pv_vcpu.trap_ctxt[TRAP_nmi]; t->vector = TRAP_nmi; t->flags = 0; @@ -3411,7 +3411,7 @@ long register_guest_nmi_callback(unsigne long unregister_guest_nmi_callback(void) { struct vcpu *v = current; - struct trap_info *t = &v->arch.guest_context.trap_ctxt[TRAP_nmi]; + struct trap_info *t = &v->arch.pv_vcpu.trap_ctxt[TRAP_nmi]; memset(t, 0, sizeof(*t)); @@ -3430,7 +3430,7 @@ int guest_has_trap_callback(struct domai BUG_ON(trap_nr > TRAP_syscall); v = d->vcpu[vcpuid]; - t = &v->arch.guest_context.trap_ctxt[trap_nr]; + t = &v->arch.pv_vcpu.trap_ctxt[trap_nr]; return (t->address != 0); } @@ -3489,7 +3489,7 @@ long do_set_trap_table(XEN_GUEST_HANDLE( { struct trap_info cur; struct vcpu *curr = current; - struct trap_info *dst = curr->arch.guest_context.trap_ctxt; + struct trap_info *dst = curr->arch.pv_vcpu.trap_ctxt; long rc = 0; /* If no table is presented then clear the entire virtual IDT. */ @@ -3594,7 +3594,7 @@ long set_debugreg(struct vcpu *v, int re { if ( ((value >> i) & 3) == DR_IO ) { - if ( !(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) ) + if ( !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ) return -EPERM; io_enable |= value & (3 << ((i - 16) >> 1)); } @@ -3607,7 +3607,7 @@ long set_debugreg(struct vcpu *v, int re } /* Guest DR5 is a handy stash for I/O intercept information. */ - v->arch.guest_context.debugreg[5] = io_enable; + v->arch.debugreg[5] = io_enable; value &= ~io_enable; /* @@ -3616,13 +3616,13 @@ long set_debugreg(struct vcpu *v, int re * context switch. */ if ( (v == curr) && - !(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) + !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) { - write_debugreg(0, v->arch.guest_context.debugreg[0]); - write_debugreg(1, v->arch.guest_context.debugreg[1]); - write_debugreg(2, v->arch.guest_context.debugreg[2]); - write_debugreg(3, v->arch.guest_context.debugreg[3]); - write_debugreg(6, v->arch.guest_context.debugreg[6]); + write_debugreg(0, v->arch.debugreg[0]); + write_debugreg(1, v->arch.debugreg[1]); + write_debugreg(2, v->arch.debugreg[2]); + write_debugreg(3, v->arch.debugreg[3]); + write_debugreg(6, v->arch.debugreg[6]); } } if ( v == curr ) @@ -3632,7 +3632,7 @@ long set_debugreg(struct vcpu *v, int re return -EINVAL; } - v->arch.guest_context.debugreg[reg] = value; + v->arch.debugreg[reg] = value; return 0; } @@ -3649,13 +3649,13 @@ unsigned long do_get_debugreg(int reg) { case 0 ... 3: case 6: - return curr->arch.guest_context.debugreg[reg]; + return curr->arch.debugreg[reg]; case 7: - return (curr->arch.guest_context.debugreg[7] | - curr->arch.guest_context.debugreg[5]); + return (curr->arch.debugreg[7] | + curr->arch.debugreg[5]); case 4 ... 5: - return ((curr->arch.guest_context.ctrlreg[4] & X86_CR4_DE) ? - curr->arch.guest_context.debugreg[reg + 2] : 0); + return ((curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ? + curr->arch.debugreg[reg + 2] : 0); } return -EINVAL; --- a/xen/arch/x86/x86_32/asm-offsets.c +++ b/xen/arch/x86/x86_32/asm-offsets.c @@ -55,19 +55,15 @@ void __dummy__(void) OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce); OFFSET(VCPU_thread_flags, struct vcpu, arch.flags); - OFFSET(VCPU_event_sel, struct vcpu, - arch.guest_context.event_callback_cs); - OFFSET(VCPU_event_addr, struct vcpu, - arch.guest_context.event_callback_eip); + OFFSET(VCPU_event_sel, struct vcpu, arch.pv_vcpu.event_callback_cs); + OFFSET(VCPU_event_addr, struct vcpu, arch.pv_vcpu.event_callback_eip); OFFSET(VCPU_failsafe_sel, struct vcpu, - arch.guest_context.failsafe_callback_cs); + arch.pv_vcpu.failsafe_callback_cs); OFFSET(VCPU_failsafe_addr, struct vcpu, - arch.guest_context.failsafe_callback_eip); - OFFSET(VCPU_kernel_ss, struct vcpu, - arch.guest_context.kernel_ss); - OFFSET(VCPU_kernel_sp, struct vcpu, - arch.guest_context.kernel_sp); - OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); + arch.pv_vcpu.failsafe_callback_eip); + OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); + OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask); --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -252,8 +252,8 @@ long do_stack_switch(unsigned long ss, u fixup_guest_stack_selector(current->domain, ss); - current->arch.guest_context.kernel_ss = ss; - current->arch.guest_context.kernel_sp = esp; + current->arch.pv_vcpu.kernel_ss = ss; + current->arch.pv_vcpu.kernel_sp = esp; t->ss1 = ss; t->esp1 = esp; --- a/xen/arch/x86/x86_32/seg_fixup.c +++ b/xen/arch/x86/x86_32/seg_fixup.c @@ -175,13 +175,13 @@ static int get_baselimit(u16 seg, unsign if ( ldt ) { table = (uint32_t *)LDT_VIRT_START(curr); - if ( idx >= curr->arch.guest_context.ldt_ents ) + if ( idx >= curr->arch.pv_vcpu.ldt_ents ) goto fail; } else /* gdt */ { table = (uint32_t *)GDT_VIRT_START(curr); - if ( idx >= curr->arch.guest_context.gdt_ents ) + if ( idx >= curr->arch.pv_vcpu.gdt_ents ) goto fail; } @@ -241,20 +241,20 @@ static int fixup_seg(u16 seg, unsigned l if ( ldt ) { table = (uint32_t *)LDT_VIRT_START(curr); - if ( idx >= curr->arch.guest_context.ldt_ents ) + if ( idx >= curr->arch.pv_vcpu.ldt_ents ) { - dprintk(XENLOG_DEBUG, "Segment %04x out of LDT range (%ld)\n", - seg, curr->arch.guest_context.ldt_ents); + dprintk(XENLOG_DEBUG, "Segment %04x out of LDT range (%u)\n", + seg, curr->arch.pv_vcpu.ldt_ents); goto fail; } } else /* gdt */ { table = (uint32_t *)GDT_VIRT_START(curr); - if ( idx >= curr->arch.guest_context.gdt_ents ) + if ( idx >= curr->arch.pv_vcpu.gdt_ents ) { - dprintk(XENLOG_DEBUG, "Segment %04x out of GDT range (%ld)\n", - seg, curr->arch.guest_context.gdt_ents); + dprintk(XENLOG_DEBUG, "Segment %04x out of GDT range (%u)\n", + seg, curr->arch.pv_vcpu.gdt_ents); goto fail; } } @@ -545,7 +545,7 @@ int gpf_emulate_4gb(struct cpu_user_regs /* If requested, give a callback on otherwise unused vector 15. */ if ( VM_ASSIST(curr->domain, VMASST_TYPE_4gb_segments_notify) ) { - struct trap_info *ti = &curr->arch.guest_context.trap_ctxt[15]; + struct trap_info *ti = &curr->arch.pv_vcpu.trap_ctxt[15]; struct trap_bounce *tb = &curr->arch.trap_bounce; tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -141,12 +141,12 @@ void vcpu_show_registers(const struct vc if ( is_hvm_vcpu(v) ) return; - crs[0] = v->arch.guest_context.ctrlreg[0]; + crs[0] = v->arch.pv_vcpu.ctrlreg[0]; crs[2] = v->vcpu_info->arch.cr2; crs[3] = pagetable_get_paddr(v->arch.guest_table); - crs[4] = v->arch.guest_context.ctrlreg[4]; + crs[4] = v->arch.pv_vcpu.ctrlreg[4]; - _show_registers(&v->arch.guest_context.user_regs, crs, CTXT_pv_guest, v); + _show_registers(&v->arch.user_regs, crs, CTXT_pv_guest, v); } void show_page_walk(unsigned long addr) @@ -372,7 +372,7 @@ void __devinit subarch_percpu_traps_init void init_int80_direct_trap(struct vcpu *v) { - struct trap_info *ti = &v->arch.guest_context.trap_ctxt[0x80]; + struct trap_info *ti = &v->arch.pv_vcpu.trap_ctxt[0x80]; /* * We can't virtualise interrupt gates, as there's no way to get @@ -419,19 +419,19 @@ static long register_guest_callback(stru switch ( reg->type ) { case CALLBACKTYPE_event: - v->arch.guest_context.event_callback_cs = reg->address.cs; - v->arch.guest_context.event_callback_eip = reg->address.eip; + v->arch.pv_vcpu.event_callback_cs = reg->address.cs; + v->arch.pv_vcpu.event_callback_eip = reg->address.eip; break; case CALLBACKTYPE_failsafe: - v->arch.guest_context.failsafe_callback_cs = reg->address.cs; - v->arch.guest_context.failsafe_callback_eip = reg->address.eip; + v->arch.pv_vcpu.failsafe_callback_cs = reg->address.cs; + v->arch.pv_vcpu.failsafe_callback_eip = reg->address.eip; if ( reg->flags & CALLBACKF_mask_events ) set_bit(_VGCF_failsafe_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); else clear_bit(_VGCF_failsafe_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); break; #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -68,16 +68,14 @@ void __dummy__(void) OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce); OFFSET(VCPU_int80_bounce, struct vcpu, arch.int80_bounce); OFFSET(VCPU_thread_flags, struct vcpu, arch.flags); - OFFSET(VCPU_event_addr, struct vcpu, - arch.guest_context.event_callback_eip); - OFFSET(VCPU_event_sel, struct vcpu, - arch.guest_context.event_callback_cs); + OFFSET(VCPU_event_addr, struct vcpu, arch.pv_vcpu.event_callback_eip); + OFFSET(VCPU_event_sel, struct vcpu, arch.pv_vcpu.event_callback_cs); OFFSET(VCPU_failsafe_addr, struct vcpu, - arch.guest_context.failsafe_callback_eip); + arch.pv_vcpu.failsafe_callback_eip); OFFSET(VCPU_failsafe_sel, struct vcpu, - arch.guest_context.failsafe_callback_cs); + arch.pv_vcpu.failsafe_callback_cs); OFFSET(VCPU_syscall_addr, struct vcpu, - arch.guest_context.syscall_callback_eip); + arch.pv_vcpu.syscall_callback_eip); OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip); OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs); OFFSET(VCPU_syscall32_disables_events, struct vcpu, @@ -86,13 +84,10 @@ void __dummy__(void) OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs); OFFSET(VCPU_sysenter_disables_events, struct vcpu, arch.sysenter_disables_events); - OFFSET(VCPU_gp_fault_addr, struct vcpu, - arch.guest_context.trap_ctxt[TRAP_gp_fault].address); - OFFSET(VCPU_gp_fault_sel, struct vcpu, - arch.guest_context.trap_ctxt[TRAP_gp_fault].cs); - OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); - OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss); - OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); + OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt); + OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp); + OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags); OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending); OFFSET(VCPU_mce_pending, struct vcpu, mce_pending); OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask); @@ -139,6 +134,11 @@ void __dummy__(void) DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); BLANK(); + OFFSET(TRAPINFO_eip, struct trap_info, address); + OFFSET(TRAPINFO_cs, struct trap_info, cs); + DEFINE(TRAPINFO_sizeof, sizeof(struct trap_info)); + BLANK(); + OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code); OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags); OFFSET(TRAPBOUNCE_cs, struct trap_bounce, cs); --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -222,18 +222,20 @@ ENTRY(compat_syscall) movb %cl,TRAPBOUNCE_flags(%rdx) call compat_create_bounce_frame jmp compat_test_all_events -2: movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) +2: movq VCPU_trap_ctxt(%rbx),%rsi + movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) subl $2,UREGS_rip(%rsp) - movq VCPU_gp_fault_addr(%rbx),%rax - movzwl VCPU_gp_fault_sel(%rbx),%esi + movl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%eax + movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rsi),%esi movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl movl $0,TRAPBOUNCE_error_code(%rdx) jmp 1b ENTRY(compat_sysenter) + movq VCPU_trap_ctxt(%rbx),%rcx cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp) movzwl VCPU_sysenter_sel(%rbx),%eax - movzwl VCPU_gp_fault_sel(%rbx),%ecx + movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rcx),%ecx cmovel %ecx,%eax testl $~3,%eax movl $FLAT_COMPAT_USER_SS,UREGS_ss(%rsp) --- a/xen/arch/x86/x86_64/compat/traps.c +++ b/xen/arch/x86/x86_64/compat/traps.c @@ -97,7 +97,7 @@ unsigned int compat_iret(void) * mode frames). */ const struct trap_info *ti; - u32 x, ksp = v->arch.guest_context.kernel_sp - 40; + u32 x, ksp = v->arch.pv_vcpu.kernel_sp - 40; unsigned int i; int rc = 0; @@ -122,9 +122,9 @@ unsigned int compat_iret(void) if ( rc ) goto exit_and_crash; regs->_esp = ksp; - regs->ss = v->arch.guest_context.kernel_ss; + regs->ss = v->arch.pv_vcpu.kernel_ss; - ti = &v->arch.guest_context.trap_ctxt[13]; + ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault]; if ( TI_GET_IF(ti) ) eflags &= ~X86_EFLAGS_IF; regs->_eflags &= ~(X86_EFLAGS_VM|X86_EFLAGS_RF| @@ -174,19 +174,19 @@ static long compat_register_guest_callba switch ( reg->type ) { case CALLBACKTYPE_event: - v->arch.guest_context.event_callback_cs = reg->address.cs; - v->arch.guest_context.event_callback_eip = reg->address.eip; + v->arch.pv_vcpu.event_callback_cs = reg->address.cs; + v->arch.pv_vcpu.event_callback_eip = reg->address.eip; break; case CALLBACKTYPE_failsafe: - v->arch.guest_context.failsafe_callback_cs = reg->address.cs; - v->arch.guest_context.failsafe_callback_eip = reg->address.eip; + v->arch.pv_vcpu.failsafe_callback_cs = reg->address.cs; + v->arch.pv_vcpu.failsafe_callback_eip = reg->address.eip; if ( reg->flags & CALLBACKF_mask_events ) set_bit(_VGCF_failsafe_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); else clear_bit(_VGCF_failsafe_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); break; case CALLBACKTYPE_syscall32: @@ -311,7 +311,7 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_compat int compat_set_trap_table(XEN_GUEST_HANDLE(trap_info_compat_t) traps) { struct compat_trap_info cur; - struct trap_info *dst = current->arch.guest_context.trap_ctxt; + struct trap_info *dst = current->arch.pv_vcpu.trap_ctxt; long rc = 0; /* If no table is presented then clear the entire virtual IDT. */ --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -286,8 +286,9 @@ sysenter_eflags_saved: jnz compat_sysenter call create_bounce_frame jmp test_all_events -2: movl %eax,TRAPBOUNCE_error_code(%rdx) - movq VCPU_gp_fault_addr(%rbx),%rax +2: movq VCPU_trap_ctxt(%rbx),%rcx + movl %eax,TRAPBOUNCE_error_code(%rdx) + movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rcx),%rax movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) jmp 1b --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -1100,8 +1100,8 @@ long subarch_memory_op(int op, XEN_GUEST long do_stack_switch(unsigned long ss, unsigned long esp) { fixup_guest_stack_selector(current->domain, ss); - current->arch.guest_context.kernel_ss = ss; - current->arch.guest_context.kernel_sp = esp; + current->arch.pv_vcpu.kernel_ss = ss; + current->arch.pv_vcpu.kernel_sp = esp; return 0; } @@ -1116,21 +1116,21 @@ long do_set_segment_base(unsigned int wh if ( wrmsr_safe(MSR_FS_BASE, base) ) ret = -EFAULT; else - v->arch.guest_context.fs_base = base; + v->arch.pv_vcpu.fs_base = base; break; case SEGBASE_GS_USER: if ( wrmsr_safe(MSR_SHADOW_GS_BASE, base) ) ret = -EFAULT; else - v->arch.guest_context.gs_base_user = base; + v->arch.pv_vcpu.gs_base_user = base; break; case SEGBASE_GS_KERNEL: if ( wrmsr_safe(MSR_GS_BASE, base) ) ret = -EFAULT; else - v->arch.guest_context.gs_base_kernel = base; + v->arch.pv_vcpu.gs_base_kernel = base; break; case SEGBASE_GS_USER_SEL: --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -146,19 +146,19 @@ void show_registers(struct cpu_user_regs void vcpu_show_registers(const struct vcpu *v) { - const struct cpu_user_regs *regs = &v->arch.guest_context.user_regs; + const struct cpu_user_regs *regs = &v->arch.user_regs; unsigned long crs[8]; /* No need to handle HVM for now. */ if ( is_hvm_vcpu(v) ) return; - crs[0] = v->arch.guest_context.ctrlreg[0]; + crs[0] = v->arch.pv_vcpu.ctrlreg[0]; crs[2] = arch_get_cr2(v); crs[3] = pagetable_get_paddr(guest_kernel_mode(v, regs) ? v->arch.guest_table : v->arch.guest_table_user); - crs[4] = v->arch.guest_context.ctrlreg[4]; + crs[4] = v->arch.pv_vcpu.ctrlreg[4]; _show_registers(regs, crs, CTXT_pv_guest, v); } @@ -421,7 +421,7 @@ void __devinit subarch_percpu_traps_init void init_int80_direct_trap(struct vcpu *v) { - struct trap_info *ti = &v->arch.guest_context.trap_ctxt[0x80]; + struct trap_info *ti = &v->arch.pv_vcpu.trap_ctxt[0x80]; struct trap_bounce *tb = &v->arch.int80_bounce; tb->flags = TBF_EXCEPTION; @@ -443,27 +443,27 @@ static long register_guest_callback(stru switch ( reg->type ) { case CALLBACKTYPE_event: - v->arch.guest_context.event_callback_eip = reg->address; + v->arch.pv_vcpu.event_callback_eip = reg->address; break; case CALLBACKTYPE_failsafe: - v->arch.guest_context.failsafe_callback_eip = reg->address; + v->arch.pv_vcpu.failsafe_callback_eip = reg->address; if ( reg->flags & CALLBACKF_mask_events ) set_bit(_VGCF_failsafe_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); else clear_bit(_VGCF_failsafe_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); break; case CALLBACKTYPE_syscall: - v->arch.guest_context.syscall_callback_eip = reg->address; + v->arch.pv_vcpu.syscall_callback_eip = reg->address; if ( reg->flags & CALLBACKF_mask_events ) set_bit(_VGCF_syscall_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); else clear_bit(_VGCF_syscall_disables_events, - &v->arch.guest_context.flags); + &v->arch.vgc_flags); break; case CALLBACKTYPE_syscall32: --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -352,11 +352,52 @@ struct pae_l3_cache { }; #define pae_l3_cache_init(c) ((void)0) #endif +struct pv_vcpu +{ + struct trap_info *trap_ctxt; + + unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE]; + unsigned long ldt_base; + unsigned int gdt_ents, ldt_ents; + + unsigned long kernel_ss, kernel_sp; + unsigned long ctrlreg[8]; + + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; + union { +#ifdef CONFIG_X86_64 + unsigned long syscall_callback_eip; +#endif + struct { + unsigned int event_callback_cs; + unsigned int failsafe_callback_cs; + }; + }; + + unsigned long vm_assist; + +#ifdef CONFIG_X86_64 + /* Segment base addresses. */ + unsigned long fs_base; + unsigned long gs_base_kernel; + unsigned long gs_base_user; +#endif +}; + struct arch_vcpu { - /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */ - struct vcpu_guest_context guest_context - __attribute__((__aligned__(16))); + /* + * guest context (mirroring struct vcpu_guest_context) common + * between pv and hvm guests + */ + + void *fpu_ctxt; + unsigned long vgc_flags; + struct cpu_user_regs user_regs; + unsigned long debugreg[8]; + + /* other state */ struct pae_l3_cache pae_l3_cache; @@ -389,7 +430,10 @@ struct arch_vcpu #endif /* Virtual Machine Extensions */ - struct hvm_vcpu hvm_vcpu; + union { + struct pv_vcpu pv_vcpu; + struct hvm_vcpu hvm_vcpu; + }; /* * Every domain has a L1 pagetable of its own. Per-domain mappings @@ -413,7 +457,7 @@ struct arch_vcpu * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in * #NM handler, we XRSTOR the states we XSAVE-ed; */ - void *xsave_area; + struct xsave_struct *xsave_area; uint64_t xcr0; /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen * itself, as we can never know whether guest OS depends on content @@ -461,7 +505,7 @@ unsigned long pv_guest_cr4_fixup(const s /* Convert between guest-visible and real CR4 values. */ #define pv_guest_cr4_to_real_cr4(v) \ - (((v)->arch.guest_context.ctrlreg[4] \ + (((v)->arch.pv_vcpu.ctrlreg[4] \ | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)) \ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \ | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0)) \ --- a/xen/include/asm-x86/ldt.h +++ b/xen/include/asm-x86/ldt.h @@ -9,7 +9,7 @@ static inline void load_LDT(struct vcpu struct desc_struct *desc; unsigned long ents; - if ( (ents = v->arch.guest_context.ldt_ents) == 0 ) + if ( (ents = v->arch.pv_vcpu.ldt_ents) == 0 ) { __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) ); } --- a/xen/include/asm-x86/regs.h +++ b/xen/include/asm-x86/regs.h @@ -19,6 +19,6 @@ (diff == 0); \ }) -#define return_reg(v) ((v)->arch.guest_context.user_regs.eax) +#define return_reg(v) ((v)->arch.user_regs.eax) #endif /* __X86_REGS_H__ */ --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -10,7 +10,6 @@ ? vcpu_time_info xen.h ! cpu_user_regs arch-x86/xen-@arch@.h ! trap_info arch-x86/xen.h -! vcpu_guest_context arch-x86/xen.h ? cpu_offline_action arch-x86/xen-mca.h ? mc arch-x86/xen-mca.h ? mcinfo_bank arch-x86/xen-mca.h