x86/PV: support data breakpoint extension registers Introducing an extension to XEN_DOMCTL_[gs]et_ext_vcpucontext similar to the generic MSR save/restore logic recently added for HVM. This also moves some debug register related declarations/definition to the header intended for these. Signed-off-by: Jan Beulich --- TBD: libxc adjustment still missing (want to get some basic feedback on the domctl extension first) --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1316,14 +1316,7 @@ static void paravirt_ctxt_switch_to(stru write_cr4(cr4); if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) - { - write_debugreg(0, v->arch.debugreg[0]); - write_debugreg(1, v->arch.debugreg[1]); - write_debugreg(2, v->arch.debugreg[2]); - write_debugreg(3, v->arch.debugreg[3]); - write_debugreg(6, v->arch.debugreg[6]); - write_debugreg(7, v->arch.debugreg[7]); - } + activate_debugregs(v); if ( (v->domain->arch.tsc_mode == TSC_MODE_PVRDTSCP) && boot_cpu_has(X86_FEATURE_RDTSCP) ) --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -52,6 +52,7 @@ long arch_do_domctl( { long ret = 0; bool_t copyback = 0; + unsigned long i; switch ( domctl->cmd ) { @@ -319,7 +320,6 @@ long arch_do_domctl( case XEN_DOMCTL_getmemlist: { - int i; unsigned long max_pfns = domctl->u.getmemlist.max_pfns; uint64_t mfn; struct page_info *page; @@ -645,7 +645,6 @@ long arch_do_domctl( unsigned long mfn = domctl->u.memory_mapping.first_mfn; unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns; int add = domctl->u.memory_mapping.add_mapping; - unsigned long i; ret = -EINVAL; if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */ @@ -809,6 +808,7 @@ long arch_do_domctl( { struct xen_domctl_ext_vcpucontext *evc; struct vcpu *v; + struct xen_domctl_ext_vcpu_msr msr; evc = &domctl->u.ext_vcpucontext; @@ -854,7 +854,42 @@ long arch_do_domctl( evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2; evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2; - ret = 0; + i = ret = 0; + if ( boot_cpu_has(X86_FEATURE_DBEXT) ) + { + unsigned int j; + + if ( v->arch.pv_vcpu.dr_mask[0] ) + { + if ( i < evc->msr_count && !ret ) + { + msr.index = MSR_AMD64_DR0_ADDRESS_MASK; + msr.reserved = 0; + msr.value = v->arch.pv_vcpu.dr_mask[0]; + if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) ) + ret = -EFAULT; + } + ++i; + } + for ( j = 0; j < 3; ++j ) + { + if ( !v->arch.pv_vcpu.dr_mask[1 + j] ) + continue; + if ( i < evc->msr_count && !ret ) + { + msr.index = MSR_AMD64_DR1_ADDRESS_MASK + j; + msr.reserved = 0; + msr.value = v->arch.pv_vcpu.dr_mask[1 + j]; + if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) ) + ret = -EFAULT; + } + ++i; + } + } + if ( i > evc->msr_count && !ret ) + ret = -ENOBUFS; + evc->msr_count = i; + vcpu_unpause(v); copyback = 1; } @@ -909,9 +944,49 @@ long arch_do_domctl( ret = vmce_restore_vcpu(v, &vmce); } + else if ( evc->size > offsetof(typeof(*evc), vmce) ) + ret = -EINVAL; else ret = 0; + if ( ret || evc->size <= offsetof(typeof(*evc), msrs) ) + /* nothing */; + else if ( evc->size < offsetof(typeof(*evc), msrs) + + sizeof(evc->msrs) ) + ret = -EINVAL; + else + { + for ( i = 0; i < evc->msr_count; ++i ) + { + ret = -EFAULT; + if ( copy_from_guest_offset(&msr, evc->msrs, i, 1) ) + break; + ret = -EINVAL; + if ( msr.reserved ) + break; + switch ( msr.index ) + { + case MSR_AMD64_DR0_ADDRESS_MASK: + if ( !boot_cpu_has(X86_FEATURE_DBEXT) || + (msr.value >> 32) ) + break; + v->arch.pv_vcpu.dr_mask[0] = msr.value; + continue; + case MSR_AMD64_DR1_ADDRESS_MASK ... + MSR_AMD64_DR3_ADDRESS_MASK: + if ( !boot_cpu_has(X86_FEATURE_DBEXT) || + (msr.value >> 32) ) + break; + msr.index -= MSR_AMD64_DR1_ADDRESS_MASK - 1; + v->arch.pv_vcpu.dr_mask[msr.index] = msr.value; + continue; + } + break; + } + if ( i == evc->msr_count ) + ret = 0; + } + domain_unpause(d); } } @@ -921,7 +996,6 @@ long arch_do_domctl( { xen_domctl_cpuid_t *ctl = &domctl->u.cpuid; cpuid_input_t *cpuid = NULL; - int i; for ( i = 0; i < MAX_CPUID_INPUT; i++ ) { --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2498,6 +2498,23 @@ static int emulate_privileged_op(struct if ( wrmsr_safe(regs->ecx, msr_content) != 0 ) goto fail; break; + + case MSR_AMD64_DR0_ADDRESS_MASK: + if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) ) + goto fail; + v->arch.pv_vcpu.dr_mask[0] = msr_content; + if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK ) + wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, msr_content); + break; + case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK: + if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) ) + goto fail; + v->arch.pv_vcpu.dr_mask + [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1] = msr_content; + if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK ) + wrmsrl(regs->_ecx, msr_content); + break; + default: if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 ) break; @@ -2585,6 +2602,21 @@ static int emulate_privileged_op(struct regs->eax = (uint32_t)msr_content; regs->edx = (uint32_t)(msr_content >> 32); break; + + case MSR_AMD64_DR0_ADDRESS_MASK: + if ( !boot_cpu_has(X86_FEATURE_DBEXT) ) + goto fail; + regs->eax = v->arch.pv_vcpu.dr_mask[0]; + regs->edx = 0; + break; + case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK: + if ( !boot_cpu_has(X86_FEATURE_DBEXT) ) + goto fail; + regs->eax = v->arch.pv_vcpu.dr_mask + [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1]; + regs->edx = 0; + break; + default: if ( rdmsr_hypervisor_regs(regs->ecx, &val) ) { @@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_ return rc; } -long set_debugreg(struct vcpu *v, int reg, unsigned long value) +void activate_debugregs(const struct vcpu *curr) +{ + ASSERT(curr == current); + + write_debugreg(0, curr->arch.debugreg[0]); + write_debugreg(1, curr->arch.debugreg[1]); + write_debugreg(2, curr->arch.debugreg[2]); + write_debugreg(3, curr->arch.debugreg[3]); + write_debugreg(6, curr->arch.debugreg[6]); + write_debugreg(7, curr->arch.debugreg[7]); + + if ( boot_cpu_has(X86_FEATURE_DBEXT) ) + { + wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]); + wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]); + wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]); + wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]); + } +} + +long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) { int i; struct vcpu *curr = current; @@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re if ( (v == curr) && !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) { - write_debugreg(0, v->arch.debugreg[0]); - write_debugreg(1, v->arch.debugreg[1]); - write_debugreg(2, v->arch.debugreg[2]); - write_debugreg(3, v->arch.debugreg[3]); - write_debugreg(6, v->arch.debugreg[6]); + activate_debugregs(curr); + break; } } if ( v == curr ) --- a/xen/include/asm-x86/debugreg.h +++ b/xen/include/asm-x86/debugreg.h @@ -64,4 +64,16 @@ #define DR_GLOBAL_EXACT_ENABLE (0x00000200ul) /* Global exact enable */ #define DR_GENERAL_DETECT (0x00002000ul) /* General detect enable */ +#define write_debugreg(reg, val) do { \ + unsigned long __val = val; \ + asm volatile ( "mov %0,%%db" #reg : : "r" (__val) ); \ +} while (0) +#define read_debugreg(reg) ({ \ + unsigned long __val; \ + asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) ); \ + __val; \ +}) +long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value); +void activate_debugregs(const struct vcpu *); + #endif /* _X86_DEBUGREG_H */ --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -374,6 +374,9 @@ struct pv_vcpu unsigned long shadow_ldt_mapcnt; spinlock_t shadow_ldt_lock; + /* data breakpoint extension MSRs */ + uint32_t dr_mask[4]; + /* Deferred VA-based update state. */ bool_t need_update_runstate_area; struct vcpu_time_info pending_system_time; --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -462,17 +462,6 @@ long set_gdt(struct vcpu *d, unsigned long *frames, unsigned int entries); -#define write_debugreg(reg, val) do { \ - unsigned long __val = val; \ - asm volatile ( "mov %0,%%db" #reg : : "r" (__val) ); \ -} while (0) -#define read_debugreg(reg) ({ \ - unsigned long __val; \ - asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) ); \ - __val; \ -}) -long set_debugreg(struct vcpu *p, int reg, unsigned long value); - /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static always_inline void rep_nop(void) { --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -36,7 +36,7 @@ #include "grant_table.h" #include "hvm/save.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a /* * NB. xen_domctl.domain is an IN/OUT parameter for this operation. @@ -563,6 +563,16 @@ typedef struct xen_domctl_pin_mem_cachea DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); +#if defined(__i386__) || defined(__x86_64__) +struct xen_domctl_ext_vcpu_msr { + uint32_t index; + uint32_t reserved; + uint64_aligned_t value; +}; +typedef struct xen_domctl_ext_vcpu_msr xen_domctl_ext_vcpu_msr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpu_msr_t); +#endif + /* XEN_DOMCTL_set_ext_vcpucontext */ /* XEN_DOMCTL_get_ext_vcpucontext */ struct xen_domctl_ext_vcpucontext { @@ -582,6 +592,7 @@ struct xen_domctl_ext_vcpucontext { uint16_t sysenter_callback_cs; uint8_t syscall32_disables_events; uint8_t sysenter_disables_events; + uint16_t msr_count; #if defined(__GNUC__) union { uint64_aligned_t mcg_cap; @@ -590,6 +601,7 @@ struct xen_domctl_ext_vcpucontext { #else struct hvm_vmce_vcpu vmce; #endif + XEN_GUEST_HANDLE_64(xen_domctl_ext_vcpu_msr_t) msrs; #endif }; typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;