[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v1 13/13] x86/PMU: Move vpmu files up from hvm directory
Since VPMU is now used by both HVM and PV we should move it up from HVM subtree: xen/arch/x86/hvm/vpmu.c => xen/arch/x86/vpmu.c xen/arch/x86/hvm/vmx/vpmu_core2.c => xen/arch/x86/vpmu_intel.c xen/arch/x86/hvm/svm/vpmu.c => xen/arch/x86/vpmu_amd.c xen/include/asm-x86/hvm/vpmu.h => xen/include/asm-x86/vpmu.h No code changes (except for adjusting Makefiles and paths for #includes). Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> --- xen/arch/x86/Makefile | 1 + xen/arch/x86/hvm/Makefile | 1 - xen/arch/x86/hvm/svm/Makefile | 1 - xen/arch/x86/hvm/svm/vpmu.c | 486 ------------------ xen/arch/x86/hvm/vmx/Makefile | 1 - xen/arch/x86/hvm/vmx/vpmu_core2.c | 938 ---------------------------------- xen/arch/x86/hvm/vpmu.c | 545 -------------------- xen/arch/x86/oprofile/op_model_ppro.c | 2 +- xen/arch/x86/traps.c | 2 +- xen/arch/x86/vpmu.c | 545 ++++++++++++++++++++ xen/arch/x86/vpmu_amd.c | 486 ++++++++++++++++++ xen/arch/x86/vpmu_intel.c | 938 ++++++++++++++++++++++++++++++++++ xen/include/asm-x86/domain.h | 1 + xen/include/asm-x86/hvm/vmx/vmcs.h | 1 - xen/include/asm-x86/hvm/vpmu.h | 97 ---- xen/include/asm-x86/vpmu.h | 97 ++++ 16 files changed, 2070 insertions(+), 2072 deletions(-) delete mode 100644 xen/arch/x86/hvm/svm/vpmu.c delete mode 100644 xen/arch/x86/hvm/vmx/vpmu_core2.c delete mode 100644 xen/arch/x86/hvm/vpmu.c create mode 100644 xen/arch/x86/vpmu.c create mode 100644 xen/arch/x86/vpmu_amd.c create mode 100644 xen/arch/x86/vpmu_intel.c delete mode 100644 xen/include/asm-x86/hvm/vpmu.h create mode 100644 xen/include/asm-x86/vpmu.h diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile index a27ac44..47d067d 100644 --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -58,6 +58,7 @@ obj-y += crash.o obj-y += tboot.o obj-y += hpet.o obj-y += xstate.o +obj-y += vpmu.o vpmu_intel.o vpmu_amd.o obj-$(crash_debug) += gdbstub.o diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile index eea5555..742b83b 100644 --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -22,4 +22,3 @@ obj-y += vlapic.o obj-y += vmsi.o obj-y += vpic.o obj-y += vpt.o -obj-y += vpmu.o \ No newline at end of file diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile index a10a55e..760d295 100644 --- a/xen/arch/x86/hvm/svm/Makefile +++ b/xen/arch/x86/hvm/svm/Makefile @@ -6,4 +6,3 @@ obj-y += nestedsvm.o obj-y += svm.o obj-y += svmdebug.o obj-y += vmcb.o -obj-y += vpmu.o diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c deleted file mode 100644 index 1815674..0000000 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ /dev/null @@ -1,486 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2010, Advanced Micro Devices, Inc. - * Parts of this code are Copyright (c) 2007, Intel Corporation - * - * Author: Wei Wang <wei.wang2@xxxxxxx> - * Tested by: Suravee Suthikulpanit <Suravee.Suthikulpanit@xxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <xen/config.h> -#include <xen/xenoprof.h> -#include <xen/hvm/save.h> -#include <xen/sched.h> -#include <xen/irq.h> -#include <asm/apic.h> -#include <asm/hvm/vlapic.h> -#include <asm/hvm/vpmu.h> -#include <public/xenpmu.h> - -#define MSR_F10H_EVNTSEL_GO_SHIFT 40 -#define MSR_F10H_EVNTSEL_EN_SHIFT 22 -#define MSR_F10H_COUNTER_LENGTH 48 - -#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) -#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT)) -#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) -#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1)))) - -static unsigned int __read_mostly num_counters; -static const u32 __read_mostly *counters; -static const u32 __read_mostly *ctrls; -static bool_t __read_mostly k7_counters_mirrored; - -/* PMU Counter MSRs. */ -static const u32 AMD_F10H_COUNTERS[] = { - MSR_K7_PERFCTR0, - MSR_K7_PERFCTR1, - MSR_K7_PERFCTR2, - MSR_K7_PERFCTR3 -}; - -/* PMU Control MSRs. */ -static const u32 AMD_F10H_CTRLS[] = { - MSR_K7_EVNTSEL0, - MSR_K7_EVNTSEL1, - MSR_K7_EVNTSEL2, - MSR_K7_EVNTSEL3 -}; - -static const u32 AMD_F15H_COUNTERS[] = { - MSR_AMD_FAM15H_PERFCTR0, - MSR_AMD_FAM15H_PERFCTR1, - MSR_AMD_FAM15H_PERFCTR2, - MSR_AMD_FAM15H_PERFCTR3, - MSR_AMD_FAM15H_PERFCTR4, - MSR_AMD_FAM15H_PERFCTR5 -}; - -static const u32 AMD_F15H_CTRLS[] = { - MSR_AMD_FAM15H_EVNTSEL0, - MSR_AMD_FAM15H_EVNTSEL1, - MSR_AMD_FAM15H_EVNTSEL2, - MSR_AMD_FAM15H_EVNTSEL3, - MSR_AMD_FAM15H_EVNTSEL4, - MSR_AMD_FAM15H_EVNTSEL5 -}; - -static inline int get_pmu_reg_type(u32 addr) -{ - if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) ) - return MSR_TYPE_CTRL; - - if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) ) - return MSR_TYPE_COUNTER; - - if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) && - (addr <= MSR_AMD_FAM15H_PERFCTR5 ) ) - { - if (addr & 1) - return MSR_TYPE_COUNTER; - else - return MSR_TYPE_CTRL; - } - - /* unsupported registers */ - return -1; -} - -static inline u32 get_fam15h_addr(u32 addr) -{ - switch ( addr ) - { - case MSR_K7_PERFCTR0: - return MSR_AMD_FAM15H_PERFCTR0; - case MSR_K7_PERFCTR1: - return MSR_AMD_FAM15H_PERFCTR1; - case MSR_K7_PERFCTR2: - return MSR_AMD_FAM15H_PERFCTR2; - case MSR_K7_PERFCTR3: - return MSR_AMD_FAM15H_PERFCTR3; - case MSR_K7_EVNTSEL0: - return MSR_AMD_FAM15H_EVNTSEL0; - case MSR_K7_EVNTSEL1: - return MSR_AMD_FAM15H_EVNTSEL1; - case MSR_K7_EVNTSEL2: - return MSR_AMD_FAM15H_EVNTSEL2; - case MSR_K7_EVNTSEL3: - return MSR_AMD_FAM15H_EVNTSEL3; - default: - break; - } - - return addr; -} - -static void amd_vpmu_set_msr_bitmap(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - - for ( i = 0; i < num_counters; i++ ) - { - svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE); - svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE); - } - - ctxt->msr_bitmap_set = 1; -} - -static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - - for ( i = 0; i < num_counters; i++ ) - { - svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW); - svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW); - } - - ctxt->msr_bitmap_set = 0; -} - -static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - return 1; -} - -static inline void context_load(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - - for ( i = 0; i < num_counters; i++ ) - { - wrmsrl(counters[i], ctxt->counters[i]); - wrmsrl(ctrls[i], ctxt->ctrls[i]); - } -} - -static void amd_vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - - vpmu_reset(vpmu, VPMU_FROZEN); - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - unsigned int i; - - for ( i = 0; i < num_counters; i++ ) - wrmsrl(ctrls[i], ctxt->ctrls[i]); - - return; - } - - context_load(v); -} - -static inline void context_save(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - - /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ - for ( i = 0; i < num_counters; i++ ) - rdmsrl(counters[i], ctxt->counters[i]); -} - -static int amd_vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctx = vpmu->context; - unsigned int i; - - if ( !vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - for ( i = 0; i < num_counters; i++ ) - wrmsrl(ctrls[i], 0); - - vpmu_set(vpmu, VPMU_FROZEN); - } - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) - return 0; - - context_save(v); - - if ( is_hvm_domain(v->domain) && - !vpmu_is_set(vpmu, VPMU_RUNNING) && ctx->msr_bitmap_set ) - amd_vpmu_unset_msr_bitmap(v); - - return 1; -} - -static void context_update(unsigned int msr, u64 msr_content) -{ - unsigned int i; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - - if ( k7_counters_mirrored && - ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) ) - { - msr = get_fam15h_addr(msr); - } - - for ( i = 0; i < num_counters; i++ ) - { - if ( msr == ctrls[i] ) - { - ctxt->ctrls[i] = msr_content; - return; - } - else if (msr == counters[i] ) - { - ctxt->counters[i] = msr_content; - return; - } - } -} - -static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - /* For all counters, enable guest only mode for HVM guest */ - if ( is_hvm_domain(v->domain) && (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - !(is_guest_mode(msr_content)) ) - { - set_guest_mode(msr_content); - } - - /* check if the first counter is enabled */ - if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) - return 1; - vpmu_set(vpmu, VPMU_RUNNING); - apic_write(APIC_LVTPC, APIC_DM_NMI); - vpmu->hw_lapic_lvtpc = APIC_DM_NMI; - - if ( is_hvm_domain(v->domain) && - !((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) - amd_vpmu_set_msr_bitmap(v); - } - - /* stop saving & restore if guest stops first counter */ - if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); - vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED; - vpmu_reset(vpmu, VPMU_RUNNING); - if ( is_hvm_domain(v->domain) && - ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) - amd_vpmu_unset_msr_bitmap(v); - release_pmu_ownship(PMU_OWNER_HVM); - } - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) - || vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - context_load(v); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - vpmu_reset(vpmu, VPMU_FROZEN); - } - - /* Update vpmu context immediately */ - context_update(msr, msr_content); - - /* Write to hw counters */ - wrmsrl(msr, msr_content); - return 1; -} - -static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) - || vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - context_load(v); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - vpmu_reset(vpmu, VPMU_FROZEN); - } - - rdmsrl(msr, *msr_content); - - return 1; -} - -static int amd_vpmu_initialise(struct vcpu *v) -{ - struct amd_vpmu_context *ctxt; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t family = current_cpu_data.x86; - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return 0; - - if ( counters == NULL ) - { - switch ( family ) - { - case 0x15: - num_counters = F15H_NUM_COUNTERS; - counters = AMD_F15H_COUNTERS; - ctrls = AMD_F15H_CTRLS; - k7_counters_mirrored = 1; - break; - case 0x10: - case 0x12: - case 0x14: - case 0x16: - default: - num_counters = F10H_NUM_COUNTERS; - counters = AMD_F10H_COUNTERS; - ctrls = AMD_F10H_CTRLS; - k7_counters_mirrored = 0; - break; - } - } - - if ( is_hvm_domain(v->domain) ) - { - ctxt = xzalloc(struct amd_vpmu_context); - if ( !ctxt ) - { - gdprintk(XENLOG_WARNING, "Insufficient memory for PMU, " - " PMU feature is unavailable on domain %d vcpu %d.\n", - v->vcpu_id, v->domain->domain_id); - return -ENOMEM; - } - } - else - ctxt = &v->arch.vpmu.xenpmu_data->pmu.amd; - - vpmu->context = ctxt; - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - return 0; -} - -static void amd_vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( is_hvm_domain(v->domain) ) - { - if ( ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) - amd_vpmu_unset_msr_bitmap(v); - - xfree(vpmu->context); - release_pmu_ownship(PMU_OWNER_HVM); - } - - vpmu->context = NULL; - vpmu_clear(vpmu); -} - -/* VPMU part of the 'q' keyhandler */ -static void amd_vpmu_dump(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct amd_vpmu_context *ctxt = vpmu->context; - unsigned int i; - - printk(" VPMU state: 0x%x ", vpmu->flags); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - { - printk("\n"); - return; - } - - printk("("); - if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) ) - printk("PASSIVE_DOMAIN_ALLOCATED, "); - if ( vpmu_is_set(vpmu, VPMU_FROZEN) ) - printk("FROZEN, "); - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) - printk("SAVE, "); - if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) - printk("RUNNING, "); - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - printk("LOADED, "); - printk("ALLOCATED)\n"); - - for ( i = 0; i < num_counters; i++ ) - { - uint64_t ctrl, cntr; - - rdmsrl(ctrls[i], ctrl); - rdmsrl(counters[i], cntr); - printk(" 0x%08x: 0x%lx (0x%lx in HW) 0x%08x: 0x%lx (0x%lx in HW)\n", - ctrls[i], ctxt->ctrls[i], ctrl, - counters[i], ctxt->counters[i], cntr); - } -} - -struct arch_vpmu_ops amd_vpmu_ops = { - .do_wrmsr = amd_vpmu_do_wrmsr, - .do_rdmsr = amd_vpmu_do_rdmsr, - .do_interrupt = amd_vpmu_do_interrupt, - .arch_vpmu_destroy = amd_vpmu_destroy, - .arch_vpmu_save = amd_vpmu_save, - .arch_vpmu_load = amd_vpmu_load, - .arch_vpmu_dump = amd_vpmu_dump -}; - -int svm_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t family = current_cpu_data.x86; - int ret = 0; - - /* vpmu enabled? */ - if ( vpmu_flags == VPMU_OFF ) - return 0; - - switch ( family ) - { - case 0x10: - case 0x12: - case 0x14: - case 0x15: - case 0x16: - ret = amd_vpmu_initialise(v); - if ( !ret ) - vpmu->arch_vpmu_ops = &amd_vpmu_ops; - return ret; - } - - printk("VPMU: Initialization failed. " - "AMD processor family %d has not " - "been supported\n", family); - return -EINVAL; -} - diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile index 373b3d9..04a29ce 100644 --- a/xen/arch/x86/hvm/vmx/Makefile +++ b/xen/arch/x86/hvm/vmx/Makefile @@ -3,5 +3,4 @@ obj-y += intr.o obj-y += realmode.o obj-y += vmcs.o obj-y += vmx.o -obj-y += vpmu_core2.o obj-y += vvmx.o diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c deleted file mode 100644 index 3f5941a..0000000 --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c +++ /dev/null @@ -1,938 +0,0 @@ -/* - * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan <haitao.shan@xxxxxxxxx> - */ - -#include <xen/config.h> -#include <xen/sched.h> -#include <xen/xenoprof.h> -#include <xen/irq.h> -#include <asm/system.h> -#include <asm/regs.h> -#include <asm/types.h> -#include <asm/apic.h> -#include <asm/traps.h> -#include <asm/msr.h> -#include <asm/msr-index.h> -#include <asm/hvm/support.h> -#include <asm/hvm/vlapic.h> -#include <asm/hvm/vmx/vmx.h> -#include <asm/hvm/vmx/vmcs.h> -#include <public/sched.h> -#include <public/hvm/save.h> -#include <public/xenpmu.h> -#include <asm/hvm/vpmu.h> - -/* - * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID - * instruction. - * cpuid 0xa - Architectural Performance Monitoring Leaf - * Register eax - */ -#define PMU_VERSION_SHIFT 0 /* Version ID */ -#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */ -#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT) - -#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */ -#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */ -#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT) - -#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */ -#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */ -#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT) -/* Register edx */ -#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */ -#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */ -#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT) - -#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */ -#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */ -#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT) - - -/* Intel-specific VPMU features */ -#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ -#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ - -/* - * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed - * counters. 4 bits for every counter. - */ -#define FIXED_CTR_CTRL_BITS 4 -#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1) - -/* The index into the core2_ctrls_msr[] of this MSR used in core2_vpmu_dump() */ -#define MSR_CORE_PERF_FIXED_CTR_CTRL_IDX 0 - -static int arch_pmc_cnt; /* Number of general-purpose performance counters */ - -/* - * QUIRK to workaround an issue on various family 6 cpus. - * The issue leads to endless PMC interrupt loops on the processor. - * If the interrupt handler is running and a pmc reaches the value 0, this - * value remains forever and it triggers immediately a new interrupt after - * finishing the handler. - * A workaround is to read all flagged counters and if the value is 0 write - * 1 (or another value != 0) into it. - * There exist no errata and the real cause of this behaviour is unknown. - */ -bool_t __read_mostly is_pmc_quirk; - -static void check_pmc_quirk(void) -{ - if ( current_cpu_data.x86 == 6 ) - is_pmc_quirk = 1; - else - is_pmc_quirk = 0; -} - -static void handle_pmc_quirk(u64 msr_content) -{ - int i; - u64 val; - - if ( !is_pmc_quirk ) - return; - - val = msr_content; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - if ( val & 0x1 ) - { - u64 cnt; - rdmsrl(MSR_P6_PERFCTR0 + i, cnt); - if ( cnt == 0 ) - wrmsrl(MSR_P6_PERFCTR0 + i, 1); - } - val >>= 1; - } - val = msr_content >> 32; - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - if ( val & 0x1 ) - { - u64 cnt; - rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt); - if ( cnt == 0 ) - wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1); - } - val >>= 1; - } -} - -/* - * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15] - */ -static int core2_get_pmc_count(void) -{ - u32 eax, ebx, ecx, edx; - - cpuid(0xa, &eax, &ebx, &ecx, &edx); - return ( (eax & PMU_GENERAL_NR_MASK) >> PMU_GENERAL_NR_SHIFT ); -} - -static u64 core2_calc_intial_glb_ctrl_msr(void) -{ - int arch_pmc_bits = (1 << arch_pmc_cnt) - 1; - u64 fix_pmc_bits = (1 << VPMU_CORE2_NUM_FIXED) - 1; - return ((fix_pmc_bits << 32) | arch_pmc_bits); -} - -/* edx bits 5-12: Bit width of fixed-function performance counters */ -static int core2_get_bitwidth_fix_count(void) -{ - u32 eax, ebx, ecx, edx; - - cpuid(0xa, &eax, &ebx, &ecx, &edx); - return ((edx & PMU_FIXED_WIDTH_MASK) >> PMU_FIXED_WIDTH_SHIFT); -} - -static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) -{ - int i; - - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - if ( core2_fix_counters_msr[i] == msr_index ) - { - *type = MSR_TYPE_COUNTER; - *index = i; - return 1; - } - } - - for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) - { - if ( core2_ctrls_msr[i] == msr_index ) - { - *type = MSR_TYPE_CTRL; - *index = i; - return 1; - } - } - - if ( (msr_index == MSR_CORE_PERF_GLOBAL_CTRL) || - (msr_index == MSR_CORE_PERF_GLOBAL_STATUS) || - (msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL) ) - { - *type = MSR_TYPE_GLOBAL; - return 1; - } - - if ( (msr_index >= MSR_IA32_PERFCTR0) && - (msr_index < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) ) - { - *type = MSR_TYPE_ARCH_COUNTER; - *index = msr_index - MSR_IA32_PERFCTR0; - return 1; - } - - if ( (msr_index >= MSR_P6_EVNTSEL0) && - (msr_index < (MSR_P6_EVNTSEL0 + arch_pmc_cnt)) ) - { - *type = MSR_TYPE_ARCH_CTRL; - *index = msr_index - MSR_P6_EVNTSEL0; - return 1; - } - - return 0; -} - -#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000) -static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) -{ - int i; - - /* Allow Read/Write PMU Counters MSR Directly. */ - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - clear_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), msr_bitmap); - clear_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - - /* Allow Read PMU Non-global Controls Directly. */ - for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) - clear_bit(msraddr_to_bitpos(core2_ctrls_msr[i]), msr_bitmap); - for ( i = 0; i < arch_pmc_cnt; i++ ) - clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap); -} - -static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) -{ - int i; - - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - set_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), msr_bitmap); - set_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) - set_bit(msraddr_to_bitpos(core2_ctrls_msr[i]), msr_bitmap); - for ( i = 0; i < arch_pmc_cnt; i++ ) - set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap); -} - -static inline void __core2_vpmu_save(struct vcpu *v) -{ - int i; - struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context; - - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - rdmsrl(core2_fix_counters_msr[i], core2_vpmu_cxt->fix_counters[i]); - for ( i = 0; i < arch_pmc_cnt; i++ ) - rdmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter); - - if ( !is_hvm_domain(v->domain) ) - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); -} - -static int core2_vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) - return 0; - - if ( !is_hvm_domain(v->domain) ) - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - __core2_vpmu_save(v); - - /* Unset PMU MSR bitmap to trap lazy load. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && cpu_has_vmx_msr_bitmap - && is_hvm_domain(v->domain) ) - core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - - return 1; -} - -static inline void __core2_vpmu_load(struct vcpu *v) -{ - int i; - struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context; - - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - wrmsrl(core2_fix_counters_msr[i], core2_vpmu_cxt->fix_counters[i]); - for ( i = 0; i < arch_pmc_cnt; i++ ) - wrmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter); - - for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) - wrmsrl(core2_ctrls_msr[i], core2_vpmu_cxt->ctrls[i]); - for ( i = 0; i < arch_pmc_cnt; i++ ) - wrmsrl(MSR_P6_EVNTSEL0+i, core2_vpmu_cxt->arch_msr_pair[i].control); - - if ( !is_hvm_domain(v->domain) ) - { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); - } -} - -static void core2_vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return; - - __core2_vpmu_load(v); -} - -static int core2_vpmu_alloc_resource(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct core2_vpmu_context *core2_vpmu_cxt; - struct core2_pmu_enable *pmu_enable = NULL; - - pmu_enable = xzalloc_bytes(sizeof(struct core2_pmu_enable)); - if ( !pmu_enable ) - return 0; - - if ( is_hvm_domain(v->domain) ) - { - if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) - goto out_err; - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) - goto out_err; - - if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) - goto out_err; - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, - core2_calc_intial_glb_ctrl_msr()); - - core2_vpmu_cxt = xzalloc_bytes(sizeof(struct core2_vpmu_context)); - if ( !core2_vpmu_cxt ) - goto out_err; - } - else - { - core2_vpmu_cxt = &v->arch.vpmu.xenpmu_data->pmu.intel; - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - } - - core2_vpmu_cxt->pmu_enable = pmu_enable; - vpmu->context = (void *)core2_vpmu_cxt; - - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - - return 1; - -out_err: - xfree(pmu_enable); - vmx_rm_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL); - vmx_rm_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL); - release_pmu_ownship(PMU_OWNER_HVM); - - printk("Failed to allocate VPMU resources for domain %u vcpu %u\n", - v->vcpu_id, v->domain->domain_id); - - return 0; -} - -static void core2_vpmu_save_msr_context(struct vcpu *v, int type, - int index, u64 msr_data) -{ - struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context; - - switch ( type ) - { - case MSR_TYPE_CTRL: - core2_vpmu_cxt->ctrls[index] = msr_data; - break; - case MSR_TYPE_ARCH_CTRL: - core2_vpmu_cxt->arch_msr_pair[index].control = msr_data; - break; - } -} - -static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( !is_core2_vpmu_msr(msr_index, type, index) ) - return 0; - - if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) && - !core2_vpmu_alloc_resource(current) ) - return 0; - - /* Do the lazy load staff. */ - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - __core2_vpmu_load(current); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - if ( cpu_has_vmx_msr_bitmap && is_hvm_domain(current->domain) ) - core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap); - } - return 1; -} - -static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) -{ - u64 global_ctrl, non_global_ctrl; - char pmu_enable = 0; - int i, tmp; - int type = -1, index = -1; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct core2_vpmu_context *core2_vpmu_cxt = NULL; - - if ( !core2_vpmu_msr_common_check(msr, &type, &index) ) - { - /* Special handling for BTS */ - if ( msr == MSR_IA32_DEBUGCTLMSR ) - { - uint64_t supported = IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS | - IA32_DEBUGCTLMSR_BTINT; - - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) - supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS | - IA32_DEBUGCTLMSR_BTS_OFF_USR; - if ( msr_content & supported ) - { - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - return 1; - gdprintk(XENLOG_WARNING, "Debug Store is not supported on this cpu\n"); - - if ( is_hvm_domain(v->domain) ) - hvm_inject_hw_exception(TRAP_gp_fault, 0); - else - send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); - - return 0; - } - } - return 0; - } - - core2_vpmu_cxt = vpmu->context; - switch ( msr ) - { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - core2_vpmu_cxt->global_ovf_status &= ~msr_content; - core2_vpmu_cxt->global_ovf_ctrl = msr_content; - return 1; - case MSR_CORE_PERF_GLOBAL_STATUS: - gdprintk(XENLOG_INFO, "Can not write readonly MSR: " - "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); - if ( is_hvm_domain(v->domain) ) - hvm_inject_hw_exception(TRAP_gp_fault, 0); - else - send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); - return 1; - case MSR_IA32_PEBS_ENABLE: - if ( msr_content & 1 ) - gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " - "which is not supported.\n"); - return 1; - case MSR_IA32_DS_AREA: - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) - { - if ( !is_canonical_address(msr_content) ) - { - gdprintk(XENLOG_WARNING, - "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n", - msr_content); - if ( is_hvm_domain(v->domain) ) - hvm_inject_hw_exception(TRAP_gp_fault, 0); - else - send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); - return 1; - } - core2_vpmu_cxt->pmu_enable->ds_area_enable = msr_content ? 1 : 0; - break; - } - gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); - return 1; - case MSR_CORE_PERF_GLOBAL_CTRL: - global_ctrl = msr_content; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl); - core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] = - global_ctrl & (non_global_ctrl >> 22) & 1; - global_ctrl >>= 1; - } - - rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl); - global_ctrl = msr_content >> 32; - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] = - (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0); - non_global_ctrl >>= FIXED_CTR_CTRL_BITS; - global_ctrl >>= 1; - } - break; - case MSR_CORE_PERF_FIXED_CTR_CTRL: - non_global_ctrl = msr_content; - if ( is_hvm_domain(v->domain) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); - global_ctrl >>= 32; - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] = - (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0); - non_global_ctrl >>= 4; - global_ctrl >>= 1; - } - break; - default: - tmp = msr - MSR_P6_EVNTSEL0; - if ( is_hvm_domain(v->domain) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); - if ( tmp >= 0 && tmp < arch_pmc_cnt ) - core2_vpmu_cxt->pmu_enable->arch_pmc_enable[tmp] = - (global_ctrl >> tmp) & (msr_content >> 22) & 1; - } - - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - pmu_enable |= core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i]; - for ( i = 0; i < arch_pmc_cnt; i++ ) - pmu_enable |= core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i]; - pmu_enable |= core2_vpmu_cxt->pmu_enable->ds_area_enable; - if ( pmu_enable ) - vpmu_set(vpmu, VPMU_RUNNING); - else - vpmu_reset(vpmu, VPMU_RUNNING); - - if ( is_hvm_domain(v->domain) ) - { - /* Setup LVTPC in local apic */ - if ( vpmu_is_set(vpmu, VPMU_RUNNING) && - is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) ) - { - apic_write_around(APIC_LVTPC, APIC_DM_NMI); - vpmu->hw_lapic_lvtpc = APIC_DM_NMI; - } - else - { - apic_write_around(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); - vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED; - } - } - - core2_vpmu_save_msr_context(v, type, index, msr_content); - if ( type != MSR_TYPE_GLOBAL ) - { - u64 mask; - int inject_gp = 0; - switch ( type ) - { - case MSR_TYPE_ARCH_CTRL: /* MSR_P6_EVNTSEL[0,...] */ - mask = ~((1ull << 32) - 1); - if (msr_content & mask) - inject_gp = 1; - break; - case MSR_TYPE_CTRL: /* IA32_FIXED_CTR_CTRL */ - if ( msr == MSR_IA32_DS_AREA ) - break; - /* 4 bits per counter, currently 3 fixed counters implemented. */ - mask = ~((1ull << (VPMU_CORE2_NUM_FIXED * FIXED_CTR_CTRL_BITS)) - 1); - if (msr_content & mask) - inject_gp = 1; - break; - case MSR_TYPE_COUNTER: /* IA32_FIXED_CTR[0-2] */ - mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1); - if (msr_content & mask) - inject_gp = 1; - break; - } - - if (inject_gp) - { - if ( is_hvm_domain(v->domain) ) - hvm_inject_hw_exception(TRAP_gp_fault, 0); - else - send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); - } - else - wrmsrl(msr, msr_content); - } - else - { - if ( is_hvm_domain(v->domain) ) - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - else - { - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - core2_vpmu_cxt->global_ctrl = msr_content; - } - } - - return 1; -} - -static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - int type = -1, index = -1; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct core2_vpmu_context *core2_vpmu_cxt = NULL; - - if ( core2_vpmu_msr_common_check(msr, &type, &index) ) - { - core2_vpmu_cxt = vpmu->context; - switch ( msr ) - { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - *msr_content = 0; - break; - case MSR_CORE_PERF_GLOBAL_STATUS: - *msr_content = core2_vpmu_cxt->global_ovf_status; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if ( is_hvm_domain(v->domain) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content); - break; - default: - rdmsrl(msr, *msr_content); - } - } - else - { - /* Extension for BTS */ - if ( msr == MSR_IA32_MISC_ENABLE ) - { - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; - } - else - return 0; - } - - return 1; -} - -static void core2_vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - if (input == 0x1) - { - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) - { - /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ - *edx |= cpufeat_mask(X86_FEATURE_DS); - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) - *ecx |= cpufeat_mask(X86_FEATURE_DTES64); - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) - *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); - } - } -} - -/* Dump vpmu info on console, called in the context of keyhandler 'q'. */ -static void core2_vpmu_dump(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int i; - struct core2_vpmu_context *core2_vpmu_cxt = NULL; - u64 val; - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - if ( vpmu_set(vpmu, VPMU_CONTEXT_LOADED) ) - printk(" vPMU loaded\n"); - else - printk(" vPMU allocated\n"); - return; - } - - printk(" vPMU running\n"); - core2_vpmu_cxt = vpmu->context; - - /* Print the contents of the counter and its configuration msr. */ - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - struct arch_msr_pair* msr_pair = core2_vpmu_cxt->arch_msr_pair; - if ( core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] ) - printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n", - i, msr_pair[i].counter, msr_pair[i].control); - } - /* - * The configuration of the fixed counter is 4 bits each in the - * MSR_CORE_PERF_FIXED_CTR_CTRL. - */ - val = core2_vpmu_cxt->ctrls[MSR_CORE_PERF_FIXED_CTR_CTRL_IDX]; - for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) - { - if ( core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] ) - printk(" fixed_%d: 0x%016lx ctrl: 0x%lx\n", - i, core2_vpmu_cxt->fix_counters[i], - val & FIXED_CTR_CTRL_MASK); - val >>= FIXED_CTR_CTRL_BITS; - } -} - -static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - u64 msr_content; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content); - if ( msr_content ) - { - if ( is_pmc_quirk ) - handle_pmc_quirk(msr_content); - core2_vpmu_cxt->global_ovf_status |= msr_content; - msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); - } - else - { - /* No PMC overflow but perhaps a Trace Message interrupt. */ - msr_content = __vmread(GUEST_IA32_DEBUGCTL); - if ( !(msr_content & IA32_DEBUGCTLMSR_TR) ) - return 0; - } - - /* HW sets the MASK bit when performance counter interrupt occurs*/ - vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED; - apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); - - return 1; -} - -static int core2_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - u64 msr_content; - struct cpuinfo_x86 *c = ¤t_cpu_data; - - if ( !(vpmu_flags & VPMU_INTEL_BTS) ) - goto func_out; - /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */ - if ( cpu_has(c, X86_FEATURE_DS) ) - { - if ( !cpu_has(c, X86_FEATURE_DTES64) ) - { - printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area" - " - Debug Store disabled for d%d:v%d\n", - v->domain->domain_id, v->vcpu_id); - goto func_out; - } - vpmu_set(vpmu, VPMU_CPU_HAS_DS); - rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); - if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL ) - { - /* If BTS_UNAVAIL is set reset the DS feature. */ - vpmu_reset(vpmu, VPMU_CPU_HAS_DS); - printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL" - " - Debug Store disabled for d%d:v%d\n", - v->domain->domain_id, v->vcpu_id); - } - else - { - vpmu_set(vpmu, VPMU_CPU_HAS_BTS); - if ( !cpu_has(c, X86_FEATURE_DSCPL) ) - printk(XENLOG_G_INFO - "vpmu: CPU doesn't support CPL-Qualified BTS\n"); - printk("******************************************************\n"); - printk("** WARNING: Emulation of BTS Feature is switched on **\n"); - printk("** Using this processor feature in a virtualized **\n"); - printk("** environment is not 100%% safe. **\n"); - printk("** Setting the DS buffer address with wrong values **\n"); - printk("** may lead to hypervisor hangs or crashes. **\n"); - printk("** It is NOT recommended for production use! **\n"); - printk("******************************************************\n"); - } - } -func_out: - - arch_pmc_cnt = core2_get_pmc_count(); - check_pmc_quirk(); - - /* PV domains can allocate resources immediately */ - if ( !is_hvm_domain(v->domain) ) - if ( !core2_vpmu_alloc_resource(v) ) - return 1; - - return 0; -} - -static void core2_vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context; - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( is_hvm_domain(v->domain) ) - { - xfree(core2_vpmu_cxt->pmu_enable); - xfree(vpmu->context); - if ( cpu_has_vmx_msr_bitmap ) - core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - } - - release_pmu_ownship(PMU_OWNER_HVM); - vpmu_clear(vpmu); -} - -struct arch_vpmu_ops core2_vpmu_ops = { - .do_wrmsr = core2_vpmu_do_wrmsr, - .do_rdmsr = core2_vpmu_do_rdmsr, - .do_interrupt = core2_vpmu_do_interrupt, - .do_cpuid = core2_vpmu_do_cpuid, - .arch_vpmu_destroy = core2_vpmu_destroy, - .arch_vpmu_save = core2_vpmu_save, - .arch_vpmu_load = core2_vpmu_load, - .arch_vpmu_dump = core2_vpmu_dump -}; - -static void core2_no_vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* - * As in this case the vpmu is not enabled reset some bits in the - * architectural performance monitoring related part. - */ - if ( input == 0xa ) - { - *eax &= ~PMU_VERSION_MASK; - *eax &= ~PMU_GENERAL_NR_MASK; - *eax &= ~PMU_GENERAL_WIDTH_MASK; - - *edx &= ~PMU_FIXED_NR_MASK; - *edx &= ~PMU_FIXED_WIDTH_MASK; - } -} - -/* - * If its a vpmu msr set it to 0. - */ -static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - int type = -1, index = -1; - if ( !is_core2_vpmu_msr(msr, &type, &index) ) - return 0; - *msr_content = 0; - return 1; -} - -/* - * These functions are used in case vpmu is not enabled. - */ -struct arch_vpmu_ops core2_no_vpmu_ops = { - .do_rdmsr = core2_no_vpmu_do_rdmsr, - .do_cpuid = core2_no_vpmu_do_cpuid, -}; - -int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t family = current_cpu_data.x86; - uint8_t cpu_model = current_cpu_data.x86_model; - int ret = 0; - - vpmu->arch_vpmu_ops = &core2_no_vpmu_ops; - if ( vpmu_flags == VPMU_OFF ) - return 0; - - if ( family == 6 ) - { - switch ( cpu_model ) - { - /* Core2: */ - case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 0x1d: /* six-core 45 nm xeon "Dunnington" */ - - case 0x2a: /* SandyBridge */ - case 0x2d: /* SandyBridge, "Romley-EP" */ - - /* Nehalem: */ - case 0x1a: /* 45 nm nehalem, "Bloomfield" */ - case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ - case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ - - /* Westmere: */ - case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ - case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ - case 0x27: /* 32 nm Westmere-EX */ - - case 0x3a: /* IvyBridge */ - case 0x3e: /* IvyBridge EP */ - case 0x3c: /* Haswell */ - ret = core2_vpmu_initialise(v, vpmu_flags); - if ( !ret ) - vpmu->arch_vpmu_ops = &core2_vpmu_ops; - return ret; - } - } - - printk("VPMU: Initialization failed. " - "Intel processor family %d model %d has not " - "been supported\n", family, cpu_model); - return -EINVAL; -} - diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c deleted file mode 100644 index f28b7af..0000000 --- a/xen/arch/x86/hvm/vpmu.c +++ /dev/null @@ -1,545 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan <haitao.shan@xxxxxxxxx> - */ -#include <xen/config.h> -#include <xen/sched.h> -#include <xen/xenoprof.h> -#include <xen/event.h> -#include <xen/softirq.h> -#include <xen/hypercall.h> -#include <xen/guest_access.h> -#include <asm/regs.h> -#include <asm/types.h> -#include <asm/msr.h> -#include <asm/hvm/support.h> -#include <asm/hvm/vmx/vmx.h> -#include <asm/hvm/vmx/vmcs.h> -#include <asm/hvm/vpmu.h> -#include <asm/hvm/svm/svm.h> -#include <asm/hvm/svm/vmcb.h> -#include <asm/apic.h> -#include <asm/nmi.h> -#include <public/xenpmu.h> - -/* - * "vpmu" : vpmu generally enabled - * "vpmu=off" : vpmu generally disabled - * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on. - */ -uint32_t __read_mostly vpmu_mode = VPMU_OFF; -static void parse_vpmu_param(char *s); -custom_param("vpmu", parse_vpmu_param); - -static void vpmu_save_force(void *arg); -static DEFINE_PER_CPU(struct vcpu *, last_vcpu); - -static void __init parse_vpmu_param(char *s) -{ - switch ( parse_bool(s) ) - { - case 0: - break; - default: - if ( !strcmp(s, "bts") ) - vpmu_mode |= VPMU_INTEL_BTS; - else if ( *s ) - { - printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); - break; - } - /* fall through */ - case 1: - vpmu_mode |= VPMU_ON; - break; - } -} - -static void vpmu_lvtpc_update(uint32_t val) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - vpmu->hw_lapic_lvtpc = APIC_DM_NMI | (val & APIC_LVT_MASKED); - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); -} - -int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( (vpmu_mode & VPMU_PRIV) && (current->domain != dom0) ) - return 0; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content); - return 0; -} - -int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu_mode & VPMU_PRIV && current->domain != dom0 ) - return 0; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); - return 0; -} - -int vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu; - - - /* dom0 will handle this interrupt */ - if ( (vpmu_mode & VPMU_PRIV) || - (v->domain->domain_id >= DOMID_FIRST_RESERVED) ) - { - if ( smp_processor_id() >= dom0->max_vcpus ) - return 0; - v = dom0->vcpu[smp_processor_id()]; - } - - vpmu = vcpu_vpmu(v); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return 0; - - if ( !is_hvm_domain(v->domain) || vpmu_mode & VPMU_PRIV ) - { - /* PV guest or dom0 is doing system profiling */ - void *p; - struct cpu_user_regs *gregs; - - p = v->arch.vpmu.xenpmu_data; - - /* PV guest will be reading PMU MSRs from xenpmu_data */ - vpmu_save_force(v); - - /* Store appropriate registers in xenpmu_data - * - * Note: '!current->is_running' is possible when 'set_current(next)' - * for the (HVM) guest has been called but 'reset_stack_and_jump()' - * has not (i.e. the guest is not actually running yet). - */ - if ( !is_hvm_domain(current->domain) || - ((vpmu_mode & VPMU_PRIV) && !current->is_running) ) - { - /* - * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) - * and therefore we treat it the same way as a non-priviledged - * PV 32-bit domain. - */ - if ( is_pv_32bit_domain(current->domain) ) - { - struct compat_cpu_user_regs cmp; - - gregs = guest_cpu_user_regs(); - XLAT_cpu_user_regs(&cmp, gregs); - memcpy(p, &cmp, sizeof(struct compat_cpu_user_regs)); - } - else if ( (current->domain != dom0) && !is_idle_vcpu(current) && - !(vpmu_mode & VPMU_PRIV) ) - { - /* PV guest */ - gregs = guest_cpu_user_regs(); - memcpy(p, gregs, sizeof(struct cpu_user_regs)); - } - else - memcpy(p, regs, sizeof(struct cpu_user_regs)); - } - else - { - /* HVM guest */ - struct segment_register cs; - - gregs = guest_cpu_user_regs(); - hvm_get_segment_register(current, x86_seg_cs, &cs); - gregs->cs = cs.attr.fields.dpl; - - memcpy(p, gregs, sizeof(struct cpu_user_regs)); - } - - v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id; - v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id; - - raise_softirq(PMU_SOFTIRQ); - vpmu_set(vpmu, VPMU_WAIT_FOR_FLUSH); - - return 1; - } - else if ( vpmu->arch_vpmu_ops ) - { - /* HVM guest */ - struct vlapic *vlapic; - u32 vlapic_lvtpc; - unsigned char int_vec; - - if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ) - return 0; - - vlapic = vcpu_vlapic(v); - if ( !is_vlapic_lvtpc_enabled(vlapic) ) - return 1; - - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); - int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; - - if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) - vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); - else - v->nmi_pending = 1; - return 1; - } - - return 0; -} - -void vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid ) - vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); -} - -static void vpmu_save_force(void *arg) -{ - struct vcpu *v = (struct vcpu *)arg; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) - return; - - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - - if ( vpmu->arch_vpmu_ops ) - (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); - - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - - per_cpu(last_vcpu, smp_processor_id()) = NULL; -} - -void vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int pcpu = smp_processor_id(); - - if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) - return; - - vpmu->last_pcpu = pcpu; - per_cpu(last_vcpu, pcpu) = v; - - if ( vpmu->arch_vpmu_ops ) - if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - - apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); -} - -void vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int pcpu = smp_processor_id(); - struct vcpu *prev = NULL; - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - /* First time this VCPU is running here */ - if ( vpmu->last_pcpu != pcpu ) - { - /* - * Get the context from last pcpu that we ran on. Note that if another - * VCPU is running there it must have saved this VPCU's context before - * startig to run (see below). - * There should be no race since remote pcpu will disable interrupts - * before saving the context. - */ - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - on_selected_cpus(cpumask_of(vpmu->last_pcpu), - vpmu_save_force, (void *)v, 1); - } - - /* Prevent forced context save from remote CPU */ - local_irq_disable(); - - prev = per_cpu(last_vcpu, pcpu); - - if ( prev != v && prev ) - { - vpmu = vcpu_vpmu(prev); - - /* Someone ran here before us */ - vpmu_save_force(prev); - - vpmu = vcpu_vpmu(v); - } - - local_irq_enable(); - - /* Only when PMU is counting, we load PMU context immediately. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) - return; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) - { - apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); - vpmu->arch_vpmu_ops->arch_vpmu_load(v); - } - - /* - * PMU interrupt may happen while loading the context above. That - * may cause vpmu_save_force() in the handler so we we don't - * want to mark the context as loaded. - */ - if ( !vpmu_is_set(vpmu, VPMU_WAIT_FOR_FLUSH) ) - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); -} - -void vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t vendor = current_cpu_data.x86_vendor; - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - vpmu_destroy(v); - vpmu_clear(vpmu); - vpmu->context = NULL; - - switch ( vendor ) - { - case X86_VENDOR_AMD: - if ( svm_vpmu_initialise(v, vpmu_mode) != 0 ) - vpmu_mode = VPMU_OFF; - break; - - case X86_VENDOR_INTEL: - if ( vmx_vpmu_initialise(v, vpmu_mode) != 0 ) - vpmu_mode = VPMU_OFF; - break; - - default: - printk("VPMU: Initialization failed. " - "Unknown CPU vendor %d\n", vendor); - vpmu_mode = VPMU_OFF; - break; - } -} - -void vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) - { - /* Unload VPMU first. This will stop counters from running */ - on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), - vpmu_save_force, (void *)v, 1); - - vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); - } -} - -/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */ -void vpmu_dump(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump ) - vpmu->arch_vpmu_ops->arch_vpmu_dump(v); -} - -int pmu_nmi_interrupt(struct cpu_user_regs *regs, int cpu) -{ - return vpmu_do_interrupt(regs); -} - -/* Process the softirq set by PMU NMI handler */ -void pmu_virq(void) -{ - struct vcpu *v = current; - - if ( (vpmu_mode & VPMU_PRIV) || - (v->domain->domain_id >= DOMID_FIRST_RESERVED) ) - { - if ( smp_processor_id() >= dom0->max_vcpus ) - { - printk(KERN_WARNING "PMU softirq on unexpected processor %d\n", - smp_processor_id()); - return; - } - v = dom0->vcpu[smp_processor_id()]; - } - - send_guest_vcpu_virq(v, VIRQ_XENPMU); -} - -static int pvpmu_init(struct domain *d, xenpmu_params_t *params) -{ - struct vcpu *v; - static int pvpmu_initted = 0; - - if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) - return -EINVAL; - - if ( !pvpmu_initted ) - { - if (reserve_lapic_nmi() == 0) - set_nmi_callback(pmu_nmi_interrupt); - else - { - printk("Failed to reserve PMU NMI\n"); - return -EBUSY; - } - open_softirq(PMU_SOFTIRQ, pmu_virq); - pvpmu_initted = 1; - } - - if ( !mfn_valid(params->mfn) || - !get_page_and_type(mfn_to_page(params->mfn), d, PGT_writable_page) ) - return -EINVAL; - - v = d->vcpu[params->vcpu]; - v->arch.vpmu.xenpmu_data = map_domain_page_global(params->mfn); - memset(v->arch.vpmu.xenpmu_data, 0, PAGE_SIZE); - - vpmu_initialise(v); - - return 0; -} - -static void pvpmu_finish(struct domain *d, xenpmu_params_t *params) -{ - struct vcpu *v; - uint64_t mfn; - - if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) - return; - - v = d->vcpu[params->vcpu]; - if (v != current) - vcpu_pause(v); - - if ( v->arch.vpmu.xenpmu_data ) - { - mfn = domain_page_map_to_mfn(v->arch.vpmu.xenpmu_data); - if ( mfn_valid(mfn) ) - { - unmap_domain_page_global(v->arch.vpmu.xenpmu_data); - put_page_and_type(mfn_to_page(mfn)); - } - } - vpmu_destroy(v); - - if (v != current) - vcpu_unpause(v); -} - -long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) -{ - int ret = -EINVAL; - xenpmu_params_t pmu_params; - uint32_t mode, flags; - - switch ( op ) - { - case XENPMU_mode_set: - if ( !is_control_domain(current->domain) ) - return -EPERM; - - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - - mode = (uint32_t)pmu_params.control & VPMU_MODE_MASK; - if ( (mode & ~(VPMU_ON | VPMU_PRIV)) || - ((mode & VPMU_ON) && (mode & VPMU_PRIV)) ) - return -EINVAL; - - vpmu_mode &= ~VPMU_MODE_MASK; - vpmu_mode |= mode; - - ret = 0; - break; - - case XENPMU_mode_get: - pmu_params.control = vpmu_mode & VPMU_MODE_MASK; - if ( copy_to_guest(arg, &pmu_params, 1) ) - return -EFAULT; - ret = 0; - break; - - case XENPMU_flags_set: - if ( !is_control_domain(current->domain) ) - return -EPERM; - - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - - flags = (uint64_t)pmu_params.control & VPMU_FLAGS_MASK; - if ( flags & ~VPMU_INTEL_BTS ) - return -EINVAL; - - vpmu_mode &= ~VPMU_FLAGS_MASK; - vpmu_mode |= flags; - - ret = 0; - break; - - case XENPMU_flags_get: - pmu_params.control = vpmu_mode & VPMU_FLAGS_MASK; - if ( copy_to_guest(arg, &pmu_params, 1) ) - return -EFAULT; - ret = 0; - break; - - case XENPMU_init: - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - ret = pvpmu_init(current->domain, &pmu_params); - break; - - case XENPMU_finish: - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - pvpmu_finish(current->domain, &pmu_params); - break; - - case XENPMU_lvtpc_set: - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - - vpmu_lvtpc_update((uint32_t)pmu_params.lvtpc); - ret = 0; - break; - - case XENPMU_flush: - vpmu_reset(vcpu_vpmu(current), VPMU_WAIT_FOR_FLUSH); - vpmu_load(current); - ret = 0; - break; - } - - return ret; -} diff --git a/xen/arch/x86/oprofile/op_model_ppro.c b/xen/arch/x86/oprofile/op_model_ppro.c index 2939a40..9135801 100644 --- a/xen/arch/x86/oprofile/op_model_ppro.c +++ b/xen/arch/x86/oprofile/op_model_ppro.c @@ -19,7 +19,7 @@ #include <asm/processor.h> #include <asm/regs.h> #include <asm/current.h> -#include <asm/hvm/vpmu.h> +#include <asm/vpmu.h> #include "op_x86_model.h" #include "op_counter.h" diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 64c9c25..bca0a37 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -71,7 +71,7 @@ #include <asm/apic.h> #include <asm/mc146818rtc.h> #include <asm/hpet.h> -#include <asm/hvm/vpmu.h> +#include <asm/vpmu.h> #include <public/arch-x86/cpuid.h> #include <xsm/xsm.h> diff --git a/xen/arch/x86/vpmu.c b/xen/arch/x86/vpmu.c new file mode 100644 index 0000000..3f24903 --- /dev/null +++ b/xen/arch/x86/vpmu.c @@ -0,0 +1,545 @@ +/* + * vpmu.c: PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan <haitao.shan@xxxxxxxxx> + */ +#include <xen/config.h> +#include <xen/sched.h> +#include <xen/xenoprof.h> +#include <xen/event.h> +#include <xen/softirq.h> +#include <xen/hypercall.h> +#include <xen/guest_access.h> +#include <asm/regs.h> +#include <asm/types.h> +#include <asm/msr.h> +#include <asm/hvm/support.h> +#include <asm/hvm/vmx/vmx.h> +#include <asm/hvm/vmx/vmcs.h> +#include <asm/vpmu.h> +#include <asm/hvm/svm/svm.h> +#include <asm/hvm/svm/vmcb.h> +#include <asm/apic.h> +#include <asm/nmi.h> +#include <public/xenpmu.h> + +/* + * "vpmu" : vpmu generally enabled + * "vpmu=off" : vpmu generally disabled + * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on. + */ +uint32_t __read_mostly vpmu_mode = VPMU_OFF; +static void parse_vpmu_param(char *s); +custom_param("vpmu", parse_vpmu_param); + +static void vpmu_save_force(void *arg); +static DEFINE_PER_CPU(struct vcpu *, last_vcpu); + +static void __init parse_vpmu_param(char *s) +{ + switch ( parse_bool(s) ) + { + case 0: + break; + default: + if ( !strcmp(s, "bts") ) + vpmu_mode |= VPMU_INTEL_BTS; + else if ( *s ) + { + printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); + break; + } + /* fall through */ + case 1: + vpmu_mode |= VPMU_ON; + break; + } +} + +static void vpmu_lvtpc_update(uint32_t val) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + vpmu->hw_lapic_lvtpc = APIC_DM_NMI | (val & APIC_LVT_MASKED); + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); +} + +int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( (vpmu_mode & VPMU_PRIV) && (current->domain != dom0) ) + return 0; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) + return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content); + return 0; +} + +int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu_mode & VPMU_PRIV && current->domain != dom0 ) + return 0; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) + return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); + return 0; +} + +int vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu; + + + /* dom0 will handle this interrupt */ + if ( (vpmu_mode & VPMU_PRIV) || + (v->domain->domain_id >= DOMID_FIRST_RESERVED) ) + { + if ( smp_processor_id() >= dom0->max_vcpus ) + return 0; + v = dom0->vcpu[smp_processor_id()]; + } + + vpmu = vcpu_vpmu(v); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return 0; + + if ( !is_hvm_domain(v->domain) || vpmu_mode & VPMU_PRIV ) + { + /* PV guest or dom0 is doing system profiling */ + void *p; + struct cpu_user_regs *gregs; + + p = v->arch.vpmu.xenpmu_data; + + /* PV guest will be reading PMU MSRs from xenpmu_data */ + vpmu_save_force(v); + + /* Store appropriate registers in xenpmu_data + * + * Note: '!current->is_running' is possible when 'set_current(next)' + * for the (HVM) guest has been called but 'reset_stack_and_jump()' + * has not (i.e. the guest is not actually running yet). + */ + if ( !is_hvm_domain(current->domain) || + ((vpmu_mode & VPMU_PRIV) && !current->is_running) ) + { + /* + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) + * and therefore we treat it the same way as a non-priviledged + * PV 32-bit domain. + */ + if ( is_pv_32bit_domain(current->domain) ) + { + struct compat_cpu_user_regs cmp; + + gregs = guest_cpu_user_regs(); + XLAT_cpu_user_regs(&cmp, gregs); + memcpy(p, &cmp, sizeof(struct compat_cpu_user_regs)); + } + else if ( (current->domain != dom0) && !is_idle_vcpu(current) && + !(vpmu_mode & VPMU_PRIV) ) + { + /* PV guest */ + gregs = guest_cpu_user_regs(); + memcpy(p, gregs, sizeof(struct cpu_user_regs)); + } + else + memcpy(p, regs, sizeof(struct cpu_user_regs)); + } + else + { + /* HVM guest */ + struct segment_register cs; + + gregs = guest_cpu_user_regs(); + hvm_get_segment_register(current, x86_seg_cs, &cs); + gregs->cs = cs.attr.fields.dpl; + + memcpy(p, gregs, sizeof(struct cpu_user_regs)); + } + + v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id; + v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id; + + raise_softirq(PMU_SOFTIRQ); + vpmu_set(vpmu, VPMU_WAIT_FOR_FLUSH); + + return 1; + } + else if ( vpmu->arch_vpmu_ops ) + { + /* HVM guest */ + struct vlapic *vlapic; + u32 vlapic_lvtpc; + unsigned char int_vec; + + if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ) + return 0; + + vlapic = vcpu_vlapic(v); + if ( !is_vlapic_lvtpc_enabled(vlapic) ) + return 1; + + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); + int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; + + if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) + vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); + else + v->nmi_pending = 1; + return 1; + } + + return 0; +} + +void vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid ) + vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); +} + +static void vpmu_save_force(void *arg) +{ + struct vcpu *v = (struct vcpu *)arg; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) + return; + + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + + if ( vpmu->arch_vpmu_ops ) + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); + + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + + per_cpu(last_vcpu, smp_processor_id()) = NULL; +} + +void vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + + if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) + return; + + vpmu->last_pcpu = pcpu; + per_cpu(last_vcpu, pcpu) = v; + + if ( vpmu->arch_vpmu_ops ) + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); +} + +void vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + struct vcpu *prev = NULL; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + /* First time this VCPU is running here */ + if ( vpmu->last_pcpu != pcpu ) + { + /* + * Get the context from last pcpu that we ran on. Note that if another + * VCPU is running there it must have saved this VPCU's context before + * startig to run (see below). + * There should be no race since remote pcpu will disable interrupts + * before saving the context. + */ + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_save_force, (void *)v, 1); + } + + /* Prevent forced context save from remote CPU */ + local_irq_disable(); + + prev = per_cpu(last_vcpu, pcpu); + + if ( prev != v && prev ) + { + vpmu = vcpu_vpmu(prev); + + /* Someone ran here before us */ + vpmu_save_force(prev); + + vpmu = vcpu_vpmu(v); + } + + local_irq_enable(); + + /* Only when PMU is counting, we load PMU context immediately. */ + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) + return; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) + { + apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); + vpmu->arch_vpmu_ops->arch_vpmu_load(v); + } + + /* + * PMU interrupt may happen while loading the context above. That + * may cause vpmu_save_force() in the handler so we we don't + * want to mark the context as loaded. + */ + if ( !vpmu_is_set(vpmu, VPMU_WAIT_FOR_FLUSH) ) + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); +} + +void vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t vendor = current_cpu_data.x86_vendor; + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + vpmu_destroy(v); + vpmu_clear(vpmu); + vpmu->context = NULL; + + switch ( vendor ) + { + case X86_VENDOR_AMD: + if ( svm_vpmu_initialise(v, vpmu_mode) != 0 ) + vpmu_mode = VPMU_OFF; + break; + + case X86_VENDOR_INTEL: + if ( vmx_vpmu_initialise(v, vpmu_mode) != 0 ) + vpmu_mode = VPMU_OFF; + break; + + default: + printk("VPMU: Initialization failed. " + "Unknown CPU vendor %d\n", vendor); + vpmu_mode = VPMU_OFF; + break; + } +} + +void vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) + { + /* Unload VPMU first. This will stop counters from running */ + on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), + vpmu_save_force, (void *)v, 1); + + vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); + } +} + +/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */ +void vpmu_dump(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump ) + vpmu->arch_vpmu_ops->arch_vpmu_dump(v); +} + +int pmu_nmi_interrupt(struct cpu_user_regs *regs, int cpu) +{ + return vpmu_do_interrupt(regs); +} + +/* Process the softirq set by PMU NMI handler */ +void pmu_virq(void) +{ + struct vcpu *v = current; + + if ( (vpmu_mode & VPMU_PRIV) || + (v->domain->domain_id >= DOMID_FIRST_RESERVED) ) + { + if ( smp_processor_id() >= dom0->max_vcpus ) + { + printk(KERN_WARNING "PMU softirq on unexpected processor %d\n", + smp_processor_id()); + return; + } + v = dom0->vcpu[smp_processor_id()]; + } + + send_guest_vcpu_virq(v, VIRQ_XENPMU); +} + +static int pvpmu_init(struct domain *d, xenpmu_params_t *params) +{ + struct vcpu *v; + static int pvpmu_initted = 0; + + if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) + return -EINVAL; + + if ( !pvpmu_initted ) + { + if (reserve_lapic_nmi() == 0) + set_nmi_callback(pmu_nmi_interrupt); + else + { + printk("Failed to reserve PMU NMI\n"); + return -EBUSY; + } + open_softirq(PMU_SOFTIRQ, pmu_virq); + pvpmu_initted = 1; + } + + if ( !mfn_valid(params->mfn) || + !get_page_and_type(mfn_to_page(params->mfn), d, PGT_writable_page) ) + return -EINVAL; + + v = d->vcpu[params->vcpu]; + v->arch.vpmu.xenpmu_data = map_domain_page_global(params->mfn); + memset(v->arch.vpmu.xenpmu_data, 0, PAGE_SIZE); + + vpmu_initialise(v); + + return 0; +} + +static void pvpmu_finish(struct domain *d, xenpmu_params_t *params) +{ + struct vcpu *v; + uint64_t mfn; + + if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) + return; + + v = d->vcpu[params->vcpu]; + if (v != current) + vcpu_pause(v); + + if ( v->arch.vpmu.xenpmu_data ) + { + mfn = domain_page_map_to_mfn(v->arch.vpmu.xenpmu_data); + if ( mfn_valid(mfn) ) + { + unmap_domain_page_global(v->arch.vpmu.xenpmu_data); + put_page_and_type(mfn_to_page(mfn)); + } + } + vpmu_destroy(v); + + if (v != current) + vcpu_unpause(v); +} + +long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) +{ + int ret = -EINVAL; + xenpmu_params_t pmu_params; + uint32_t mode, flags; + + switch ( op ) + { + case XENPMU_mode_set: + if ( !is_control_domain(current->domain) ) + return -EPERM; + + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + + mode = (uint32_t)pmu_params.control & VPMU_MODE_MASK; + if ( (mode & ~(VPMU_ON | VPMU_PRIV)) || + ((mode & VPMU_ON) && (mode & VPMU_PRIV)) ) + return -EINVAL; + + vpmu_mode &= ~VPMU_MODE_MASK; + vpmu_mode |= mode; + + ret = 0; + break; + + case XENPMU_mode_get: + pmu_params.control = vpmu_mode & VPMU_MODE_MASK; + if ( copy_to_guest(arg, &pmu_params, 1) ) + return -EFAULT; + ret = 0; + break; + + case XENPMU_flags_set: + if ( !is_control_domain(current->domain) ) + return -EPERM; + + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + + flags = (uint64_t)pmu_params.control & VPMU_FLAGS_MASK; + if ( flags & ~VPMU_INTEL_BTS ) + return -EINVAL; + + vpmu_mode &= ~VPMU_FLAGS_MASK; + vpmu_mode |= flags; + + ret = 0; + break; + + case XENPMU_flags_get: + pmu_params.control = vpmu_mode & VPMU_FLAGS_MASK; + if ( copy_to_guest(arg, &pmu_params, 1) ) + return -EFAULT; + ret = 0; + break; + + case XENPMU_init: + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + ret = pvpmu_init(current->domain, &pmu_params); + break; + + case XENPMU_finish: + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + pvpmu_finish(current->domain, &pmu_params); + break; + + case XENPMU_lvtpc_set: + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + + vpmu_lvtpc_update((uint32_t)pmu_params.lvtpc); + ret = 0; + break; + + case XENPMU_flush: + vpmu_reset(vcpu_vpmu(current), VPMU_WAIT_FOR_FLUSH); + vpmu_load(current); + ret = 0; + break; + } + + return ret; +} diff --git a/xen/arch/x86/vpmu_amd.c b/xen/arch/x86/vpmu_amd.c new file mode 100644 index 0000000..f64ffc0 --- /dev/null +++ b/xen/arch/x86/vpmu_amd.c @@ -0,0 +1,486 @@ +/* + * vpmu.c: PMU virtualization for HVM domain. + * + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Parts of this code are Copyright (c) 2007, Intel Corporation + * + * Author: Wei Wang <wei.wang2@xxxxxxx> + * Tested by: Suravee Suthikulpanit <Suravee.Suthikulpanit@xxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <xen/config.h> +#include <xen/xenoprof.h> +#include <xen/hvm/save.h> +#include <xen/sched.h> +#include <xen/irq.h> +#include <asm/apic.h> +#include <asm/hvm/vlapic.h> +#include <asm/vpmu.h> +#include <public/xenpmu.h> + +#define MSR_F10H_EVNTSEL_GO_SHIFT 40 +#define MSR_F10H_EVNTSEL_EN_SHIFT 22 +#define MSR_F10H_COUNTER_LENGTH 48 + +#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) +#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT)) +#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) +#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1)))) + +static unsigned int __read_mostly num_counters; +static const u32 __read_mostly *counters; +static const u32 __read_mostly *ctrls; +static bool_t __read_mostly k7_counters_mirrored; + +/* PMU Counter MSRs. */ +static const u32 AMD_F10H_COUNTERS[] = { + MSR_K7_PERFCTR0, + MSR_K7_PERFCTR1, + MSR_K7_PERFCTR2, + MSR_K7_PERFCTR3 +}; + +/* PMU Control MSRs. */ +static const u32 AMD_F10H_CTRLS[] = { + MSR_K7_EVNTSEL0, + MSR_K7_EVNTSEL1, + MSR_K7_EVNTSEL2, + MSR_K7_EVNTSEL3 +}; + +static const u32 AMD_F15H_COUNTERS[] = { + MSR_AMD_FAM15H_PERFCTR0, + MSR_AMD_FAM15H_PERFCTR1, + MSR_AMD_FAM15H_PERFCTR2, + MSR_AMD_FAM15H_PERFCTR3, + MSR_AMD_FAM15H_PERFCTR4, + MSR_AMD_FAM15H_PERFCTR5 +}; + +static const u32 AMD_F15H_CTRLS[] = { + MSR_AMD_FAM15H_EVNTSEL0, + MSR_AMD_FAM15H_EVNTSEL1, + MSR_AMD_FAM15H_EVNTSEL2, + MSR_AMD_FAM15H_EVNTSEL3, + MSR_AMD_FAM15H_EVNTSEL4, + MSR_AMD_FAM15H_EVNTSEL5 +}; + +static inline int get_pmu_reg_type(u32 addr) +{ + if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) ) + return MSR_TYPE_CTRL; + + if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) ) + return MSR_TYPE_COUNTER; + + if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) && + (addr <= MSR_AMD_FAM15H_PERFCTR5 ) ) + { + if (addr & 1) + return MSR_TYPE_COUNTER; + else + return MSR_TYPE_CTRL; + } + + /* unsupported registers */ + return -1; +} + +static inline u32 get_fam15h_addr(u32 addr) +{ + switch ( addr ) + { + case MSR_K7_PERFCTR0: + return MSR_AMD_FAM15H_PERFCTR0; + case MSR_K7_PERFCTR1: + return MSR_AMD_FAM15H_PERFCTR1; + case MSR_K7_PERFCTR2: + return MSR_AMD_FAM15H_PERFCTR2; + case MSR_K7_PERFCTR3: + return MSR_AMD_FAM15H_PERFCTR3; + case MSR_K7_EVNTSEL0: + return MSR_AMD_FAM15H_EVNTSEL0; + case MSR_K7_EVNTSEL1: + return MSR_AMD_FAM15H_EVNTSEL1; + case MSR_K7_EVNTSEL2: + return MSR_AMD_FAM15H_EVNTSEL2; + case MSR_K7_EVNTSEL3: + return MSR_AMD_FAM15H_EVNTSEL3; + default: + break; + } + + return addr; +} + +static void amd_vpmu_set_msr_bitmap(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + for ( i = 0; i < num_counters; i++ ) + { + svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE); + svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE); + } + + ctxt->msr_bitmap_set = 1; +} + +static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + for ( i = 0; i < num_counters; i++ ) + { + svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW); + svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW); + } + + ctxt->msr_bitmap_set = 0; +} + +static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + return 1; +} + +static inline void context_load(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + for ( i = 0; i < num_counters; i++ ) + { + wrmsrl(counters[i], ctxt->counters[i]); + wrmsrl(ctrls[i], ctxt->ctrls[i]); + } +} + +static void amd_vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + vpmu_reset(vpmu, VPMU_FROZEN); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + unsigned int i; + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], ctxt->ctrls[i]); + + return; + } + + context_load(v); +} + +static inline void context_save(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ + for ( i = 0; i < num_counters; i++ ) + rdmsrl(counters[i], ctxt->counters[i]); +} + +static int amd_vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctx = vpmu->context; + unsigned int i; + + if ( !vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], 0); + + vpmu_set(vpmu, VPMU_FROZEN); + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + return 0; + + context_save(v); + + if ( is_hvm_domain(v->domain) && + !vpmu_is_set(vpmu, VPMU_RUNNING) && ctx->msr_bitmap_set ) + amd_vpmu_unset_msr_bitmap(v); + + return 1; +} + +static void context_update(unsigned int msr, u64 msr_content) +{ + unsigned int i; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + + if ( k7_counters_mirrored && + ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) ) + { + msr = get_fam15h_addr(msr); + } + + for ( i = 0; i < num_counters; i++ ) + { + if ( msr == ctrls[i] ) + { + ctxt->ctrls[i] = msr_content; + return; + } + else if (msr == counters[i] ) + { + ctxt->counters[i] = msr_content; + return; + } + } +} + +static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + /* For all counters, enable guest only mode for HVM guest */ + if ( is_hvm_domain(v->domain) && (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + !(is_guest_mode(msr_content)) ) + { + set_guest_mode(msr_content); + } + + /* check if the first counter is enabled */ + if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) + return 1; + vpmu_set(vpmu, VPMU_RUNNING); + apic_write(APIC_LVTPC, APIC_DM_NMI); + vpmu->hw_lapic_lvtpc = APIC_DM_NMI; + + if ( is_hvm_domain(v->domain) && + !((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) + amd_vpmu_set_msr_bitmap(v); + } + + /* stop saving & restore if guest stops first counter */ + if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED; + vpmu_reset(vpmu, VPMU_RUNNING); + if ( is_hvm_domain(v->domain) && + ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) + amd_vpmu_unset_msr_bitmap(v); + release_pmu_ownship(PMU_OWNER_HVM); + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + context_load(v); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); + } + + /* Update vpmu context immediately */ + context_update(msr, msr_content); + + /* Write to hw counters */ + wrmsrl(msr, msr_content); + return 1; +} + +static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + context_load(v); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); + } + + rdmsrl(msr, *msr_content); + + return 1; +} + +static int amd_vpmu_initialise(struct vcpu *v) +{ + struct amd_vpmu_context *ctxt; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t family = current_cpu_data.x86; + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return 0; + + if ( counters == NULL ) + { + switch ( family ) + { + case 0x15: + num_counters = F15H_NUM_COUNTERS; + counters = AMD_F15H_COUNTERS; + ctrls = AMD_F15H_CTRLS; + k7_counters_mirrored = 1; + break; + case 0x10: + case 0x12: + case 0x14: + case 0x16: + default: + num_counters = F10H_NUM_COUNTERS; + counters = AMD_F10H_COUNTERS; + ctrls = AMD_F10H_CTRLS; + k7_counters_mirrored = 0; + break; + } + } + + if ( is_hvm_domain(v->domain) ) + { + ctxt = xzalloc(struct amd_vpmu_context); + if ( !ctxt ) + { + gdprintk(XENLOG_WARNING, "Insufficient memory for PMU, " + " PMU feature is unavailable on domain %d vcpu %d.\n", + v->vcpu_id, v->domain->domain_id); + return -ENOMEM; + } + } + else + ctxt = &v->arch.vpmu.xenpmu_data->pmu.amd; + + vpmu->context = ctxt; + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + return 0; +} + +static void amd_vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( is_hvm_domain(v->domain) ) + { + if ( ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) + amd_vpmu_unset_msr_bitmap(v); + + xfree(vpmu->context); + release_pmu_ownship(PMU_OWNER_HVM); + } + + vpmu->context = NULL; + vpmu_clear(vpmu); +} + +/* VPMU part of the 'q' keyhandler */ +static void amd_vpmu_dump(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct amd_vpmu_context *ctxt = vpmu->context; + unsigned int i; + + printk(" VPMU state: 0x%x ", vpmu->flags); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + { + printk("\n"); + return; + } + + printk("("); + if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) ) + printk("PASSIVE_DOMAIN_ALLOCATED, "); + if ( vpmu_is_set(vpmu, VPMU_FROZEN) ) + printk("FROZEN, "); + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + printk("SAVE, "); + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) + printk("RUNNING, "); + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + printk("LOADED, "); + printk("ALLOCATED)\n"); + + for ( i = 0; i < num_counters; i++ ) + { + uint64_t ctrl, cntr; + + rdmsrl(ctrls[i], ctrl); + rdmsrl(counters[i], cntr); + printk(" 0x%08x: 0x%lx (0x%lx in HW) 0x%08x: 0x%lx (0x%lx in HW)\n", + ctrls[i], ctxt->ctrls[i], ctrl, + counters[i], ctxt->counters[i], cntr); + } +} + +struct arch_vpmu_ops amd_vpmu_ops = { + .do_wrmsr = amd_vpmu_do_wrmsr, + .do_rdmsr = amd_vpmu_do_rdmsr, + .do_interrupt = amd_vpmu_do_interrupt, + .arch_vpmu_destroy = amd_vpmu_destroy, + .arch_vpmu_save = amd_vpmu_save, + .arch_vpmu_load = amd_vpmu_load, + .arch_vpmu_dump = amd_vpmu_dump +}; + +int svm_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t family = current_cpu_data.x86; + int ret = 0; + + /* vpmu enabled? */ + if ( vpmu_flags == VPMU_OFF ) + return 0; + + switch ( family ) + { + case 0x10: + case 0x12: + case 0x14: + case 0x15: + case 0x16: + ret = amd_vpmu_initialise(v); + if ( !ret ) + vpmu->arch_vpmu_ops = &amd_vpmu_ops; + return ret; + } + + printk("VPMU: Initialization failed. " + "AMD processor family %d has not " + "been supported\n", family); + return -EINVAL; +} + diff --git a/xen/arch/x86/vpmu_intel.c b/xen/arch/x86/vpmu_intel.c new file mode 100644 index 0000000..d7570c5 --- /dev/null +++ b/xen/arch/x86/vpmu_intel.c @@ -0,0 +1,938 @@ +/* + * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan <haitao.shan@xxxxxxxxx> + */ + +#include <xen/config.h> +#include <xen/sched.h> +#include <xen/xenoprof.h> +#include <xen/irq.h> +#include <asm/system.h> +#include <asm/regs.h> +#include <asm/types.h> +#include <asm/apic.h> +#include <asm/traps.h> +#include <asm/msr.h> +#include <asm/msr-index.h> +#include <asm/hvm/support.h> +#include <asm/hvm/vlapic.h> +#include <asm/hvm/vmx/vmx.h> +#include <asm/hvm/vmx/vmcs.h> +#include <public/sched.h> +#include <public/hvm/save.h> +#include <public/xenpmu.h> +#include <asm/vpmu.h> + +/* + * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID + * instruction. + * cpuid 0xa - Architectural Performance Monitoring Leaf + * Register eax + */ +#define PMU_VERSION_SHIFT 0 /* Version ID */ +#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */ +#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT) + +#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */ +#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */ +#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT) + +#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */ +#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */ +#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT) +/* Register edx */ +#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */ +#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */ +#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT) + +#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */ +#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */ +#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT) + + +/* Intel-specific VPMU features */ +#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ +#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ + +/* + * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed + * counters. 4 bits for every counter. + */ +#define FIXED_CTR_CTRL_BITS 4 +#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1) + +/* The index into the core2_ctrls_msr[] of this MSR used in core2_vpmu_dump() */ +#define MSR_CORE_PERF_FIXED_CTR_CTRL_IDX 0 + +static int arch_pmc_cnt; /* Number of general-purpose performance counters */ + +/* + * QUIRK to workaround an issue on various family 6 cpus. + * The issue leads to endless PMC interrupt loops on the processor. + * If the interrupt handler is running and a pmc reaches the value 0, this + * value remains forever and it triggers immediately a new interrupt after + * finishing the handler. + * A workaround is to read all flagged counters and if the value is 0 write + * 1 (or another value != 0) into it. + * There exist no errata and the real cause of this behaviour is unknown. + */ +bool_t __read_mostly is_pmc_quirk; + +static void check_pmc_quirk(void) +{ + if ( current_cpu_data.x86 == 6 ) + is_pmc_quirk = 1; + else + is_pmc_quirk = 0; +} + +static void handle_pmc_quirk(u64 msr_content) +{ + int i; + u64 val; + + if ( !is_pmc_quirk ) + return; + + val = msr_content; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + if ( val & 0x1 ) + { + u64 cnt; + rdmsrl(MSR_P6_PERFCTR0 + i, cnt); + if ( cnt == 0 ) + wrmsrl(MSR_P6_PERFCTR0 + i, 1); + } + val >>= 1; + } + val = msr_content >> 32; + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + if ( val & 0x1 ) + { + u64 cnt; + rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt); + if ( cnt == 0 ) + wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1); + } + val >>= 1; + } +} + +/* + * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15] + */ +static int core2_get_pmc_count(void) +{ + u32 eax, ebx, ecx, edx; + + cpuid(0xa, &eax, &ebx, &ecx, &edx); + return ( (eax & PMU_GENERAL_NR_MASK) >> PMU_GENERAL_NR_SHIFT ); +} + +static u64 core2_calc_intial_glb_ctrl_msr(void) +{ + int arch_pmc_bits = (1 << arch_pmc_cnt) - 1; + u64 fix_pmc_bits = (1 << VPMU_CORE2_NUM_FIXED) - 1; + return ((fix_pmc_bits << 32) | arch_pmc_bits); +} + +/* edx bits 5-12: Bit width of fixed-function performance counters */ +static int core2_get_bitwidth_fix_count(void) +{ + u32 eax, ebx, ecx, edx; + + cpuid(0xa, &eax, &ebx, &ecx, &edx); + return ((edx & PMU_FIXED_WIDTH_MASK) >> PMU_FIXED_WIDTH_SHIFT); +} + +static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) +{ + int i; + + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + if ( core2_fix_counters_msr[i] == msr_index ) + { + *type = MSR_TYPE_COUNTER; + *index = i; + return 1; + } + } + + for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) + { + if ( core2_ctrls_msr[i] == msr_index ) + { + *type = MSR_TYPE_CTRL; + *index = i; + return 1; + } + } + + if ( (msr_index == MSR_CORE_PERF_GLOBAL_CTRL) || + (msr_index == MSR_CORE_PERF_GLOBAL_STATUS) || + (msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL) ) + { + *type = MSR_TYPE_GLOBAL; + return 1; + } + + if ( (msr_index >= MSR_IA32_PERFCTR0) && + (msr_index < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) ) + { + *type = MSR_TYPE_ARCH_COUNTER; + *index = msr_index - MSR_IA32_PERFCTR0; + return 1; + } + + if ( (msr_index >= MSR_P6_EVNTSEL0) && + (msr_index < (MSR_P6_EVNTSEL0 + arch_pmc_cnt)) ) + { + *type = MSR_TYPE_ARCH_CTRL; + *index = msr_index - MSR_P6_EVNTSEL0; + return 1; + } + + return 0; +} + +#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000) +static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) +{ + int i; + + /* Allow Read/Write PMU Counters MSR Directly. */ + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + clear_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), msr_bitmap); + clear_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + + /* Allow Read PMU Non-global Controls Directly. */ + for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) + clear_bit(msraddr_to_bitpos(core2_ctrls_msr[i]), msr_bitmap); + for ( i = 0; i < arch_pmc_cnt; i++ ) + clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap); +} + +static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) +{ + int i; + + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + set_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), msr_bitmap); + set_bit(msraddr_to_bitpos(core2_fix_counters_msr[i]), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) + set_bit(msraddr_to_bitpos(core2_ctrls_msr[i]), msr_bitmap); + for ( i = 0; i < arch_pmc_cnt; i++ ) + set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap); +} + +static inline void __core2_vpmu_save(struct vcpu *v) +{ + int i; + struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context; + + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + rdmsrl(core2_fix_counters_msr[i], core2_vpmu_cxt->fix_counters[i]); + for ( i = 0; i < arch_pmc_cnt; i++ ) + rdmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter); + + if ( !is_hvm_domain(v->domain) ) + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); +} + +static int core2_vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) + return 0; + + if ( !is_hvm_domain(v->domain) ) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + __core2_vpmu_save(v); + + /* Unset PMU MSR bitmap to trap lazy load. */ + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && cpu_has_vmx_msr_bitmap + && is_hvm_domain(v->domain) ) + core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); + + return 1; +} + +static inline void __core2_vpmu_load(struct vcpu *v) +{ + int i; + struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context; + + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + wrmsrl(core2_fix_counters_msr[i], core2_vpmu_cxt->fix_counters[i]); + for ( i = 0; i < arch_pmc_cnt; i++ ) + wrmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter); + + for ( i = 0; i < VPMU_CORE2_NUM_CTRLS; i++ ) + wrmsrl(core2_ctrls_msr[i], core2_vpmu_cxt->ctrls[i]); + for ( i = 0; i < arch_pmc_cnt; i++ ) + wrmsrl(MSR_P6_EVNTSEL0+i, core2_vpmu_cxt->arch_msr_pair[i].control); + + if ( !is_hvm_domain(v->domain) ) + { + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); + } +} + +static void core2_vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + + __core2_vpmu_load(v); +} + +static int core2_vpmu_alloc_resource(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct core2_vpmu_context *core2_vpmu_cxt; + struct core2_pmu_enable *pmu_enable = NULL; + + pmu_enable = xzalloc_bytes(sizeof(struct core2_pmu_enable)); + if ( !pmu_enable ) + return 0; + + if ( is_hvm_domain(v->domain) ) + { + if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) + goto out_err; + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) + goto out_err; + + if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) + goto out_err; + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, + core2_calc_intial_glb_ctrl_msr()); + + core2_vpmu_cxt = xzalloc_bytes(sizeof(struct core2_vpmu_context)); + if ( !core2_vpmu_cxt ) + goto out_err; + } + else + { + core2_vpmu_cxt = &v->arch.vpmu.xenpmu_data->pmu.intel; + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + } + + core2_vpmu_cxt->pmu_enable = pmu_enable; + vpmu->context = (void *)core2_vpmu_cxt; + + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + + return 1; + +out_err: + xfree(pmu_enable); + vmx_rm_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL); + vmx_rm_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL); + release_pmu_ownship(PMU_OWNER_HVM); + + printk("Failed to allocate VPMU resources for domain %u vcpu %u\n", + v->vcpu_id, v->domain->domain_id); + + return 0; +} + +static void core2_vpmu_save_msr_context(struct vcpu *v, int type, + int index, u64 msr_data) +{ + struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context; + + switch ( type ) + { + case MSR_TYPE_CTRL: + core2_vpmu_cxt->ctrls[index] = msr_data; + break; + case MSR_TYPE_ARCH_CTRL: + core2_vpmu_cxt->arch_msr_pair[index].control = msr_data; + break; + } +} + +static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( !is_core2_vpmu_msr(msr_index, type, index) ) + return 0; + + if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) && + !core2_vpmu_alloc_resource(current) ) + return 0; + + /* Do the lazy load staff. */ + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + __core2_vpmu_load(current); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + if ( cpu_has_vmx_msr_bitmap && is_hvm_domain(current->domain) ) + core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap); + } + return 1; +} + +static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) +{ + u64 global_ctrl, non_global_ctrl; + char pmu_enable = 0; + int i, tmp; + int type = -1, index = -1; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct core2_vpmu_context *core2_vpmu_cxt = NULL; + + if ( !core2_vpmu_msr_common_check(msr, &type, &index) ) + { + /* Special handling for BTS */ + if ( msr == MSR_IA32_DEBUGCTLMSR ) + { + uint64_t supported = IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS | + IA32_DEBUGCTLMSR_BTINT; + + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) + supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS | + IA32_DEBUGCTLMSR_BTS_OFF_USR; + if ( msr_content & supported ) + { + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + return 1; + gdprintk(XENLOG_WARNING, "Debug Store is not supported on this cpu\n"); + + if ( is_hvm_domain(v->domain) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + else + send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); + + return 0; + } + } + return 0; + } + + core2_vpmu_cxt = vpmu->context; + switch ( msr ) + { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + core2_vpmu_cxt->global_ovf_status &= ~msr_content; + core2_vpmu_cxt->global_ovf_ctrl = msr_content; + return 1; + case MSR_CORE_PERF_GLOBAL_STATUS: + gdprintk(XENLOG_INFO, "Can not write readonly MSR: " + "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); + if ( is_hvm_domain(v->domain) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + else + send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); + return 1; + case MSR_IA32_PEBS_ENABLE: + if ( msr_content & 1 ) + gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " + "which is not supported.\n"); + return 1; + case MSR_IA32_DS_AREA: + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + { + if ( !is_canonical_address(msr_content) ) + { + gdprintk(XENLOG_WARNING, + "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n", + msr_content); + if ( is_hvm_domain(v->domain) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + else + send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); + return 1; + } + core2_vpmu_cxt->pmu_enable->ds_area_enable = msr_content ? 1 : 0; + break; + } + gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); + return 1; + case MSR_CORE_PERF_GLOBAL_CTRL: + global_ctrl = msr_content; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl); + core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] = + global_ctrl & (non_global_ctrl >> 22) & 1; + global_ctrl >>= 1; + } + + rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl); + global_ctrl = msr_content >> 32; + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] = + (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0); + non_global_ctrl >>= FIXED_CTR_CTRL_BITS; + global_ctrl >>= 1; + } + break; + case MSR_CORE_PERF_FIXED_CTR_CTRL: + non_global_ctrl = msr_content; + if ( is_hvm_domain(v->domain) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); + global_ctrl >>= 32; + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] = + (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0); + non_global_ctrl >>= 4; + global_ctrl >>= 1; + } + break; + default: + tmp = msr - MSR_P6_EVNTSEL0; + if ( is_hvm_domain(v->domain) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); + if ( tmp >= 0 && tmp < arch_pmc_cnt ) + core2_vpmu_cxt->pmu_enable->arch_pmc_enable[tmp] = + (global_ctrl >> tmp) & (msr_content >> 22) & 1; + } + + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + pmu_enable |= core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i]; + for ( i = 0; i < arch_pmc_cnt; i++ ) + pmu_enable |= core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i]; + pmu_enable |= core2_vpmu_cxt->pmu_enable->ds_area_enable; + if ( pmu_enable ) + vpmu_set(vpmu, VPMU_RUNNING); + else + vpmu_reset(vpmu, VPMU_RUNNING); + + if ( is_hvm_domain(v->domain) ) + { + /* Setup LVTPC in local apic */ + if ( vpmu_is_set(vpmu, VPMU_RUNNING) && + is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) ) + { + apic_write_around(APIC_LVTPC, APIC_DM_NMI); + vpmu->hw_lapic_lvtpc = APIC_DM_NMI; + } + else + { + apic_write_around(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED; + } + } + + core2_vpmu_save_msr_context(v, type, index, msr_content); + if ( type != MSR_TYPE_GLOBAL ) + { + u64 mask; + int inject_gp = 0; + switch ( type ) + { + case MSR_TYPE_ARCH_CTRL: /* MSR_P6_EVNTSEL[0,...] */ + mask = ~((1ull << 32) - 1); + if (msr_content & mask) + inject_gp = 1; + break; + case MSR_TYPE_CTRL: /* IA32_FIXED_CTR_CTRL */ + if ( msr == MSR_IA32_DS_AREA ) + break; + /* 4 bits per counter, currently 3 fixed counters implemented. */ + mask = ~((1ull << (VPMU_CORE2_NUM_FIXED * FIXED_CTR_CTRL_BITS)) - 1); + if (msr_content & mask) + inject_gp = 1; + break; + case MSR_TYPE_COUNTER: /* IA32_FIXED_CTR[0-2] */ + mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1); + if (msr_content & mask) + inject_gp = 1; + break; + } + + if (inject_gp) + { + if ( is_hvm_domain(v->domain) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + else + send_guest_trap(v->domain, v->vcpu_id, TRAP_gp_fault); + } + else + wrmsrl(msr, msr_content); + } + else + { + if ( is_hvm_domain(v->domain) ) + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + else + { + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + core2_vpmu_cxt->global_ctrl = msr_content; + } + } + + return 1; +} + +static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + int type = -1, index = -1; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct core2_vpmu_context *core2_vpmu_cxt = NULL; + + if ( core2_vpmu_msr_common_check(msr, &type, &index) ) + { + core2_vpmu_cxt = vpmu->context; + switch ( msr ) + { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + *msr_content = 0; + break; + case MSR_CORE_PERF_GLOBAL_STATUS: + *msr_content = core2_vpmu_cxt->global_ovf_status; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + if ( is_hvm_domain(v->domain) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content); + break; + default: + rdmsrl(msr, *msr_content); + } + } + else + { + /* Extension for BTS */ + if ( msr == MSR_IA32_MISC_ENABLE ) + { + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; + } + else + return 0; + } + + return 1; +} + +static void core2_vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + if (input == 0x1) + { + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + { + /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ + *edx |= cpufeat_mask(X86_FEATURE_DS); + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) + *ecx |= cpufeat_mask(X86_FEATURE_DTES64); + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) + *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); + } + } +} + +/* Dump vpmu info on console, called in the context of keyhandler 'q'. */ +static void core2_vpmu_dump(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int i; + struct core2_vpmu_context *core2_vpmu_cxt = NULL; + u64 val; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + if ( vpmu_set(vpmu, VPMU_CONTEXT_LOADED) ) + printk(" vPMU loaded\n"); + else + printk(" vPMU allocated\n"); + return; + } + + printk(" vPMU running\n"); + core2_vpmu_cxt = vpmu->context; + + /* Print the contents of the counter and its configuration msr. */ + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + struct arch_msr_pair* msr_pair = core2_vpmu_cxt->arch_msr_pair; + if ( core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] ) + printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n", + i, msr_pair[i].counter, msr_pair[i].control); + } + /* + * The configuration of the fixed counter is 4 bits each in the + * MSR_CORE_PERF_FIXED_CTR_CTRL. + */ + val = core2_vpmu_cxt->ctrls[MSR_CORE_PERF_FIXED_CTR_CTRL_IDX]; + for ( i = 0; i < VPMU_CORE2_NUM_FIXED; i++ ) + { + if ( core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] ) + printk(" fixed_%d: 0x%016lx ctrl: 0x%lx\n", + i, core2_vpmu_cxt->fix_counters[i], + val & FIXED_CTR_CTRL_MASK); + val >>= FIXED_CTR_CTRL_BITS; + } +} + +static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + u64 msr_content; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content); + if ( msr_content ) + { + if ( is_pmc_quirk ) + handle_pmc_quirk(msr_content); + core2_vpmu_cxt->global_ovf_status |= msr_content; + msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); + } + else + { + /* No PMC overflow but perhaps a Trace Message interrupt. */ + msr_content = __vmread(GUEST_IA32_DEBUGCTL); + if ( !(msr_content & IA32_DEBUGCTLMSR_TR) ) + return 0; + } + + /* HW sets the MASK bit when performance counter interrupt occurs*/ + vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED; + apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); + + return 1; +} + +static int core2_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + u64 msr_content; + struct cpuinfo_x86 *c = ¤t_cpu_data; + + if ( !(vpmu_flags & VPMU_INTEL_BTS) ) + goto func_out; + /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */ + if ( cpu_has(c, X86_FEATURE_DS) ) + { + if ( !cpu_has(c, X86_FEATURE_DTES64) ) + { + printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area" + " - Debug Store disabled for d%d:v%d\n", + v->domain->domain_id, v->vcpu_id); + goto func_out; + } + vpmu_set(vpmu, VPMU_CPU_HAS_DS); + rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); + if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL ) + { + /* If BTS_UNAVAIL is set reset the DS feature. */ + vpmu_reset(vpmu, VPMU_CPU_HAS_DS); + printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL" + " - Debug Store disabled for d%d:v%d\n", + v->domain->domain_id, v->vcpu_id); + } + else + { + vpmu_set(vpmu, VPMU_CPU_HAS_BTS); + if ( !cpu_has(c, X86_FEATURE_DSCPL) ) + printk(XENLOG_G_INFO + "vpmu: CPU doesn't support CPL-Qualified BTS\n"); + printk("******************************************************\n"); + printk("** WARNING: Emulation of BTS Feature is switched on **\n"); + printk("** Using this processor feature in a virtualized **\n"); + printk("** environment is not 100%% safe. **\n"); + printk("** Setting the DS buffer address with wrong values **\n"); + printk("** may lead to hypervisor hangs or crashes. **\n"); + printk("** It is NOT recommended for production use! **\n"); + printk("******************************************************\n"); + } + } +func_out: + + arch_pmc_cnt = core2_get_pmc_count(); + check_pmc_quirk(); + + /* PV domains can allocate resources immediately */ + if ( !is_hvm_domain(v->domain) ) + if ( !core2_vpmu_alloc_resource(v) ) + return 1; + + return 0; +} + +static void core2_vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( is_hvm_domain(v->domain) ) + { + xfree(core2_vpmu_cxt->pmu_enable); + xfree(vpmu->context); + if ( cpu_has_vmx_msr_bitmap ) + core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); + } + + release_pmu_ownship(PMU_OWNER_HVM); + vpmu_clear(vpmu); +} + +struct arch_vpmu_ops core2_vpmu_ops = { + .do_wrmsr = core2_vpmu_do_wrmsr, + .do_rdmsr = core2_vpmu_do_rdmsr, + .do_interrupt = core2_vpmu_do_interrupt, + .do_cpuid = core2_vpmu_do_cpuid, + .arch_vpmu_destroy = core2_vpmu_destroy, + .arch_vpmu_save = core2_vpmu_save, + .arch_vpmu_load = core2_vpmu_load, + .arch_vpmu_dump = core2_vpmu_dump +}; + +static void core2_no_vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* + * As in this case the vpmu is not enabled reset some bits in the + * architectural performance monitoring related part. + */ + if ( input == 0xa ) + { + *eax &= ~PMU_VERSION_MASK; + *eax &= ~PMU_GENERAL_NR_MASK; + *eax &= ~PMU_GENERAL_WIDTH_MASK; + + *edx &= ~PMU_FIXED_NR_MASK; + *edx &= ~PMU_FIXED_WIDTH_MASK; + } +} + +/* + * If its a vpmu msr set it to 0. + */ +static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + int type = -1, index = -1; + if ( !is_core2_vpmu_msr(msr, &type, &index) ) + return 0; + *msr_content = 0; + return 1; +} + +/* + * These functions are used in case vpmu is not enabled. + */ +struct arch_vpmu_ops core2_no_vpmu_ops = { + .do_rdmsr = core2_no_vpmu_do_rdmsr, + .do_cpuid = core2_no_vpmu_do_cpuid, +}; + +int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t family = current_cpu_data.x86; + uint8_t cpu_model = current_cpu_data.x86_model; + int ret = 0; + + vpmu->arch_vpmu_ops = &core2_no_vpmu_ops; + if ( vpmu_flags == VPMU_OFF ) + return 0; + + if ( family == 6 ) + { + switch ( cpu_model ) + { + /* Core2: */ + case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 0x1d: /* six-core 45 nm xeon "Dunnington" */ + + case 0x2a: /* SandyBridge */ + case 0x2d: /* SandyBridge, "Romley-EP" */ + + /* Nehalem: */ + case 0x1a: /* 45 nm nehalem, "Bloomfield" */ + case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ + case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ + + /* Westmere: */ + case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ + case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ + case 0x27: /* 32 nm Westmere-EX */ + + case 0x3a: /* IvyBridge */ + case 0x3e: /* IvyBridge EP */ + case 0x3c: /* Haswell */ + ret = core2_vpmu_initialise(v, vpmu_flags); + if ( !ret ) + vpmu->arch_vpmu_ops = &core2_vpmu_ops; + return ret; + } + } + + printk("VPMU: Initialization failed. " + "Intel processor family %d model %d has not " + "been supported\n", family, cpu_model); + return -EINVAL; +} + diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 4f2247e..0b79d39 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -8,6 +8,7 @@ #include <asm/hvm/domain.h> #include <asm/e820.h> #include <asm/mce.h> +#include <asm/vpmu.h> #include <public/vcpu.h> #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo) diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 5971613..beb959f 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -20,7 +20,6 @@ #define __ASM_X86_HVM_VMX_VMCS_H__ #include <asm/hvm/io.h> -#include <asm/hvm/vpmu.h> #include <irq_vectors.h> extern void vmcs_dump_vcpu(struct vcpu *v); diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h deleted file mode 100644 index f7f507f..0000000 --- a/xen/include/asm-x86/hvm/vpmu.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * vpmu.h: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan <haitao.shan@xxxxxxxxx> - */ - -#ifndef __ASM_X86_HVM_VPMU_H_ -#define __ASM_X86_HVM_VPMU_H_ - -#include <public/xenpmu.h> - - -#define vcpu_vpmu(vcpu) (&((vcpu)->arch.vpmu)) -#define vpmu_vcpu(vpmu) (container_of((vpmu), struct vcpu, \ - arch.vpmu)) - -#define MSR_TYPE_COUNTER 0 -#define MSR_TYPE_CTRL 1 -#define MSR_TYPE_GLOBAL 2 -#define MSR_TYPE_ARCH_COUNTER 3 -#define MSR_TYPE_ARCH_CTRL 4 - - -/* Arch specific operations shared by all vpmus */ -struct arch_vpmu_ops { - int (*do_wrmsr)(unsigned int msr, uint64_t msr_content); - int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); - int (*do_interrupt)(struct cpu_user_regs *regs); - void (*do_cpuid)(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); - void (*arch_vpmu_destroy)(struct vcpu *v); - int (*arch_vpmu_save)(struct vcpu *v); - void (*arch_vpmu_load)(struct vcpu *v); - void (*arch_vpmu_dump)(struct vcpu *v); -}; - -int vmx_vpmu_initialise(struct vcpu *, unsigned int flags); -int svm_vpmu_initialise(struct vcpu *, unsigned int flags); - -struct vpmu_struct { - u32 flags; - u32 last_pcpu; - u32 hw_lapic_lvtpc; - void *context; - struct arch_vpmu_ops *arch_vpmu_ops; - xenpmu_data_t *xenpmu_data; -}; - -/* VPMU states */ -#define VPMU_CONTEXT_ALLOCATED 0x1 -#define VPMU_CONTEXT_LOADED 0x2 -#define VPMU_RUNNING 0x4 -#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ -#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ -#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 -#define VPMU_WAIT_FOR_FLUSH 0x40 /* PV guest waits for XENPMU_flush */ - -#define vpmu_set(_vpmu, _x) ((_vpmu)->flags |= (_x)) -#define vpmu_reset(_vpmu, _x) ((_vpmu)->flags &= ~(_x)) -#define vpmu_is_set(_vpmu, _x) ((_vpmu)->flags & (_x)) -#define vpmu_is_set_all(_vpmu, _x) (((_vpmu)->flags & (_x)) == (_x)) -#define vpmu_clear(_vpmu) ((_vpmu)->flags = 0) - -int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content); -int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content); -int vpmu_do_interrupt(struct cpu_user_regs *regs); -void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); -void vpmu_initialise(struct vcpu *v); -void vpmu_destroy(struct vcpu *v); -void vpmu_save(struct vcpu *v); -void vpmu_load(struct vcpu *v); -void vpmu_dump(struct vcpu *v); - -extern int acquire_pmu_ownership(int pmu_ownership); -extern void release_pmu_ownership(int pmu_ownership); - -extern uint32_t vpmu_mode; - -#endif /* __ASM_X86_HVM_VPMU_H_*/ - diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h new file mode 100644 index 0000000..f7f507f --- /dev/null +++ b/xen/include/asm-x86/vpmu.h @@ -0,0 +1,97 @@ +/* + * vpmu.h: PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan <haitao.shan@xxxxxxxxx> + */ + +#ifndef __ASM_X86_HVM_VPMU_H_ +#define __ASM_X86_HVM_VPMU_H_ + +#include <public/xenpmu.h> + + +#define vcpu_vpmu(vcpu) (&((vcpu)->arch.vpmu)) +#define vpmu_vcpu(vpmu) (container_of((vpmu), struct vcpu, \ + arch.vpmu)) + +#define MSR_TYPE_COUNTER 0 +#define MSR_TYPE_CTRL 1 +#define MSR_TYPE_GLOBAL 2 +#define MSR_TYPE_ARCH_COUNTER 3 +#define MSR_TYPE_ARCH_CTRL 4 + + +/* Arch specific operations shared by all vpmus */ +struct arch_vpmu_ops { + int (*do_wrmsr)(unsigned int msr, uint64_t msr_content); + int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); + int (*do_interrupt)(struct cpu_user_regs *regs); + void (*do_cpuid)(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + void (*arch_vpmu_destroy)(struct vcpu *v); + int (*arch_vpmu_save)(struct vcpu *v); + void (*arch_vpmu_load)(struct vcpu *v); + void (*arch_vpmu_dump)(struct vcpu *v); +}; + +int vmx_vpmu_initialise(struct vcpu *, unsigned int flags); +int svm_vpmu_initialise(struct vcpu *, unsigned int flags); + +struct vpmu_struct { + u32 flags; + u32 last_pcpu; + u32 hw_lapic_lvtpc; + void *context; + struct arch_vpmu_ops *arch_vpmu_ops; + xenpmu_data_t *xenpmu_data; +}; + +/* VPMU states */ +#define VPMU_CONTEXT_ALLOCATED 0x1 +#define VPMU_CONTEXT_LOADED 0x2 +#define VPMU_RUNNING 0x4 +#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ +#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ +#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 +#define VPMU_WAIT_FOR_FLUSH 0x40 /* PV guest waits for XENPMU_flush */ + +#define vpmu_set(_vpmu, _x) ((_vpmu)->flags |= (_x)) +#define vpmu_reset(_vpmu, _x) ((_vpmu)->flags &= ~(_x)) +#define vpmu_is_set(_vpmu, _x) ((_vpmu)->flags & (_x)) +#define vpmu_is_set_all(_vpmu, _x) (((_vpmu)->flags & (_x)) == (_x)) +#define vpmu_clear(_vpmu) ((_vpmu)->flags = 0) + +int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content); +int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content); +int vpmu_do_interrupt(struct cpu_user_regs *regs); +void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); +void vpmu_initialise(struct vcpu *v); +void vpmu_destroy(struct vcpu *v); +void vpmu_save(struct vcpu *v); +void vpmu_load(struct vcpu *v); +void vpmu_dump(struct vcpu *v); + +extern int acquire_pmu_ownership(int pmu_ownership); +extern void release_pmu_ownership(int pmu_ownership); + +extern uint32_t vpmu_mode; + +#endif /* __ASM_X86_HVM_VPMU_H_*/ + -- 1.8.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |