Xen project Mailing List

Re: [Xen-devel] [PATCH v21 11/14] x86/VPMU: Handle PMU interrupts for PV(H) guests

From: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>

Date: Mon, 18 May 2015 11:43:40 +0200

Cc: kevin.tian@xxxxxxxxx, JBeulich@xxxxxxxx, jun.nakajima@xxxxxxxxx, andrew.cooper3@xxxxxxxxxx, tim@xxxxxxx, Aravind.Gopalakrishnan@xxxxxxx, suravee.suthikulpanit@xxxxxxx, dgdegra@xxxxxxxxxxxxx, Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>

Delivery-date: Mon, 18 May 2015 13:13:41 +0000

Domainkey-signature: s=s1536a; d=ts.fujitsu.com; c=nofws; q=dns; h=X-SBRSScore:Received:Received:Received:From:To:Cc: Subject:Date:Message-ID:User-Agent:In-Reply-To:References: MIME-Version:Content-Transfer-Encoding:Content-Type; b=R8Or/0Vy1fDyMHg1kxDZ+CQd+Xu1YwE3swDiEtodl31ySwwYr3E0XnUf ARCd9LnC9aMqlowLBdBnggM6AdMqefF6xqxpajQyQtXDkhBCYxE4LdiBv kjwkzoIJ3w2859FLGF8Ekkwkeqm2P6ULWhjbNDcL4b/uQAp+owAVKxO1T IZf4PVvhtePkKVF16vrusLFJowCT2BlAc0GPxrX2uyI47pJD5+ttSF1U4 0dm8suXLUvNz2Ij7DjtcsLdo+8Kiu;

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Am Freitag 08 Mai 2015, 17:06:11 schrieb Boris Ostrovsky: > Add support for handling PMU interrupts for PV(H) guests. I have only some minor nits below. Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx> > > VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush > hypercall. This allows the guest to access PMU MSR values that are stored in > VPMU context which is shared between hypervisor and domain, thus avoiding > traps to hypervisor. > > Since the interrupt handler may now force VPMU context save (i.e. set > VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which > until now expected this flag to be set only when the counters were stopped. > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> > Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx> > --- > Changes in v21: > * Copy hypervisor-private VPMU context to shared page during interrupt and > copy > it back during XENPMU_flush (see also changes to patch 6). Verify > user-provided > VPMU context before loading it into hypervisor-private one (and then to HW). > Specifically, the changes are: > - Change definitions of save/load ops to take a flag that specifies whether > a copy and verification is required and, for the load op, to return error > if verification fails. > - Both load ops: update VMPU_RUNNING flag based on user-provided context, > copy > VPMU context > - Both save ops: copy VPMU context > - core2_vpmu_load(): add core2_vpmu_verify() call to do context verification > - precompute VPMU context size into ctxt_sz to use in memcpy > - Return an error in XENPMU_flush (vpmu.c) if verification fails. > * Non-privileged domains should not be provided with physical CPUID in > vpmu_do_interrupt(), set it to vcpu_id instead. > > xen/arch/x86/hvm/svm/vpmu.c | 63 +++++++--- > xen/arch/x86/hvm/vmx/vpmu_core2.c | 87 ++++++++++++-- > xen/arch/x86/hvm/vpmu.c | 237 > +++++++++++++++++++++++++++++++++++--- > xen/include/asm-x86/hvm/vpmu.h | 8 +- > xen/include/public/arch-x86/pmu.h | 3 + > xen/include/public/pmu.h | 2 + > xen/include/xsm/dummy.h | 4 +- > xen/xsm/flask/hooks.c | 2 + > 8 files changed, 359 insertions(+), 47 deletions(-) > > diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c > index 74d03a5..efe5573 100644 > --- a/xen/arch/x86/hvm/svm/vpmu.c > +++ b/xen/arch/x86/hvm/svm/vpmu.c > @@ -45,6 +45,7 @@ static unsigned int __read_mostly num_counters; > static const u32 __read_mostly *counters; > static const u32 __read_mostly *ctrls; > static bool_t __read_mostly k7_counters_mirrored; > +static unsigned long __read_mostly ctxt_sz; > > #define F10H_NUM_COUNTERS 4 > #define F15H_NUM_COUNTERS 6 > @@ -188,27 +189,52 @@ static inline void context_load(struct vcpu *v) > } > } > > -static void amd_vpmu_load(struct vcpu *v) > +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; > - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > + struct xen_pmu_amd_ctxt *ctxt; > + uint64_t *ctrl_regs; > + unsigned int i; > > vpmu_reset(vpmu, VPMU_FROZEN); > > - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) > + if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) > { > - unsigned int i; > + ctxt = vpmu->context; > + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > > for ( i = 0; i < num_counters; i++ ) > wrmsrl(ctrls[i], ctrl_regs[i]); > > - return; > + return 0; > + } > + > + if ( from_guest ) > + { > + ASSERT(!is_hvm_vcpu(v)); > + > + ctxt = &vpmu->xenpmu_data->pmu.c.amd; > + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > + for ( i = 0; i < num_counters; i++ ) > + { > + if ( is_pmu_enabled(ctrl_regs[i]) ) > + { > + vpmu_set(vpmu, VPMU_RUNNING); > + break; > + } > + } > + > + if ( i == num_counters ) > + vpmu_reset(vpmu, VPMU_RUNNING); > + > + memcpy(vpmu->context, &vpmu->xenpmu_data->pmu.c.amd, ctxt_sz); > } > > vpmu_set(vpmu, VPMU_CONTEXT_LOADED); > > context_load(v); > + > + return 0; > } > > static inline void context_save(struct vcpu *v) > @@ -223,22 +249,17 @@ static inline void context_save(struct vcpu *v) > rdmsrl(counters[i], counter_regs[i]); > } > > -static int amd_vpmu_save(struct vcpu *v) > +static int amd_vpmu_save(struct vcpu *v, bool_t to_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > unsigned int i; > > - /* > - * Stop the counters. If we came here via vpmu_save_force (i.e. > - * when VPMU_CONTEXT_SAVE is set) counters are already stopped. > - */ > + for ( i = 0; i < num_counters; i++ ) > + wrmsrl(ctrls[i], 0); > + > if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) > { > vpmu_set(vpmu, VPMU_FROZEN); > - > - for ( i = 0; i < num_counters; i++ ) > - wrmsrl(ctrls[i], 0); > - > return 0; > } > > @@ -251,6 +272,12 @@ static int amd_vpmu_save(struct vcpu *v) > has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) > amd_vpmu_unset_msr_bitmap(v); > > + if ( to_guest ) > + { > + ASSERT(!is_hvm_vcpu(v)); > + memcpy(&vpmu->xenpmu_data->pmu.c.amd, vpmu->context, ctxt_sz); > + } > + > return 1; > } > > @@ -433,8 +460,7 @@ int svm_vpmu_initialise(struct vcpu *v) > if ( !counters ) > return -EINVAL; > > - ctxt = xzalloc_bytes(sizeof(*ctxt) + > - 2 * sizeof(uint64_t) * num_counters); > + ctxt = xzalloc_bytes(ctxt_sz); > if ( !ctxt ) > { > printk(XENLOG_G_WARNING "Insufficient memory for PMU, " > @@ -490,6 +516,9 @@ int __init amd_vpmu_init(void) > return -ENOSPC; > } > > + ctxt_sz = sizeof(struct xen_pmu_amd_ctxt) + > + 2 * sizeof(uint64_t) * num_counters; > + > return 0; > } > > diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c > b/xen/arch/x86/hvm/vmx/vpmu_core2.c > index 49f6771..a8df3a0 100644 > --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c > +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c > @@ -88,6 +88,9 @@ static unsigned int __read_mostly arch_pmc_cnt, > fixed_pmc_cnt; > static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask; > static uint64_t __read_mostly global_ovf_ctrl_mask; > > +/* VPMU context size */ > +static unsigned long __read_mostly ctxt_sz; > + > /* > * QUIRK to workaround an issue on various family 6 cpus. > * The issue leads to endless PMC interrupt loops on the processor. > @@ -310,7 +313,7 @@ static inline void __core2_vpmu_save(struct vcpu *v) > rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); > } > > -static int core2_vpmu_save(struct vcpu *v) > +static int core2_vpmu_save(struct vcpu *v, bool_t to_user) Variable name mix between core2_vpmu_save(.., to_user) and amd_vpmu_save(.., to_guest) for the same interface. > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > > @@ -327,6 +330,12 @@ static int core2_vpmu_save(struct vcpu *v) > has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap ) > core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); > > + if ( to_user ) > + { > + ASSERT(!is_hvm_vcpu(v)); > + memcpy(&vpmu->xenpmu_data->pmu.c.intel, vpmu->context, ctxt_sz); > + } > + > return 1; > } > > @@ -363,16 +372,79 @@ static inline void __core2_vpmu_load(struct vcpu *v) > } > } > > -static void core2_vpmu_load(struct vcpu *v) > +static int core2_vpmu_verify(struct vcpu *v) > +{ > + unsigned int i; > + struct vpmu_struct *vpmu = vcpu_vpmu(v); > + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = > + &v->arch.vpmu.xenpmu_data->pmu.c.intel; > + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, > fixed_counters); > + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = > + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); > + uint64_t fixed_ctrl; > + uint64_t enabled_cntrs = 0; > + > + if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask ) > + return 1; > + > + fixed_ctrl = core2_vpmu_cxt->fixed_ctrl; > + if ( fixed_ctrl & fixed_ctrl_mask ) > + return 1; > + > + for ( i = 0; i < fixed_pmc_cnt; i++ ) > + { > + if ( fixed_counters[i] & fixed_counters_mask ) > + return 1; > + if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 ) > + enabled_cntrs |= (1ULL << i); > + } > + enabled_cntrs <<= 32; > + > + for ( i = 0; i < arch_pmc_cnt; i++ ) > + { > + uint64_t control = xen_pmu_cntr_pair[i].control; > + > + if ( control & ARCH_CTRL_MASK ) > + return 1; > + if ( control & (1ULL << 22) ) As I think thats's the ENABLE bit 22 a define should be better or at least a comment? > + enabled_cntrs |= (1ULL << i); > + } > + > + if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) && > + !is_canonical_address(core2_vpmu_cxt->ds_area) ) > + return 1; > + > + if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) || > + (core2_vpmu_cxt->ds_area != 0) ) > + vpmu_set(vpmu, VPMU_RUNNING); > + else > + vpmu_reset(vpmu, VPMU_RUNNING); > + > + *(uint64_t *)vpmu->priv_context = enabled_cntrs; > + > + return 0; > +} > + > +static int core2_vpmu_load(struct vcpu *v, bool_t from_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > > if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) > - return; > + return 0; > + > + if ( from_guest ) > + { > + ASSERT(!is_hvm_vcpu(v)); > + if ( core2_vpmu_verify(v) ) > + return 1; > + memcpy(vpmu->context, &v->arch.vpmu.xenpmu_data->pmu.c.intel, > ctxt_sz); > + } > > vpmu_set(vpmu, VPMU_CONTEXT_LOADED); > > __core2_vpmu_load(v); > + > + return 0; > } > > static int core2_vpmu_alloc_resource(struct vcpu *v) > @@ -395,10 +467,7 @@ static int core2_vpmu_alloc_resource(struct vcpu *v) > vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0); > } > > - core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) + > - sizeof(uint64_t) * fixed_pmc_cnt + > - sizeof(struct xen_pmu_cntr_pair) * > - arch_pmc_cnt); > + core2_vpmu_cxt = xzalloc_bytes(ctxt_sz); > p = xzalloc(uint64_t); > if ( !core2_vpmu_cxt || !p ) > goto out_err; > @@ -921,6 +990,10 @@ int __init core2_vpmu_init(void) > (((1ULL << fixed_pmc_cnt) - 1) << 32) | > ((1ULL << arch_pmc_cnt) - 1)); > > + ctxt_sz = sizeof(struct xen_pmu_intel_ctxt) + > + sizeof(uint64_t) * fixed_pmc_cnt + > + sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt; > + > check_pmc_quirk(); > > if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt + > diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c > index 07fa368..a30f02a 100644 > --- a/xen/arch/x86/hvm/vpmu.c > +++ b/xen/arch/x86/hvm/vpmu.c > @@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s) > void vpmu_lvtpc_update(uint32_t val) > { > struct vpmu_struct *vpmu; > + struct vcpu *curr = current; > > - if ( vpmu_mode == XENPMU_MODE_OFF ) > + if ( likely(vpmu_mode == XENPMU_MODE_OFF) ) > return; > > - vpmu = vcpu_vpmu(current); > + vpmu = vcpu_vpmu(curr); > > vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); > - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > + > + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ > + if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data || > + !(vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) > + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > } > > int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu; > > if ( vpmu_mode == XENPMU_MODE_OFF ) > return 0; > > + vpmu = vcpu_vpmu(curr); > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) > - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported); > + { > + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported); > + > + /* > + * We may have received a PMU interrupt during WRMSR handling > + * and since do_wrmsr may load VPMU context we should save > + * (and unload) it again. > + */ > + if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data && > + (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) > + { > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > + vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + } > + return ret; > + } > + > return 0; > } > > int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu; > > if ( vpmu_mode == XENPMU_MODE_OFF ) > { > @@ -117,24 +142,166 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t > *msr_content) > return 0; > } > > + vpmu = vcpu_vpmu(curr); > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) > - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); > + { > + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); > + > + if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data && > + (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) > + { > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > + vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + } > + return ret; > + } > else > *msr_content = 0; > > return 0; > } > > +static inline struct vcpu *choose_hwdom_vcpu(void) > +{ > + unsigned idx; > + > + if ( hardware_domain->max_vcpus == 0 ) > + return NULL; > + > + idx = smp_processor_id() % hardware_domain->max_vcpus; > + > + return hardware_domain->vcpu[idx]; > +} > + > void vpmu_do_interrupt(struct cpu_user_regs *regs) > { > - struct vcpu *v = current; > - struct vpmu_struct *vpmu = vcpu_vpmu(v); > + struct vcpu *sampled = current, *sampling; > + struct vpmu_struct *vpmu; > + > + /* dom0 will handle interrupt for special domains (e.g. idle domain) */ > + if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED ) > + { > + sampling = choose_hwdom_vcpu(); > + if ( !sampling ) > + return; > + } > + else > + sampling = sampled; > + > + vpmu = vcpu_vpmu(sampling); > + if ( !is_hvm_vcpu(sampling) ) > + { > + /* PV(H) guest */ > + const struct cpu_user_regs *cur_regs; > + uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags; > + domid_t domid = DOMID_SELF; > + > + if ( !vpmu->xenpmu_data ) > + return; > + > + if ( is_pvh_vcpu(sampling) && > + !vpmu->arch_vpmu_ops->do_interrupt(regs) ) Here you expect vpmu->arch_vpmu_ops != NULL but ... > + return; > + > + if ( *flags & PMU_CACHED ) > + return; > + > + /* PV guest will be reading PMU MSRs from xenpmu_data */ > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + > + if ( has_hvm_container_vcpu(sampled) ) > + *flags = 0; > + else > + *flags = PMU_SAMPLE_PV; > + > + /* Store appropriate registers in xenpmu_data */ > + /* FIXME: 32-bit PVH should go here as well */ > + if ( is_pv_32bit_vcpu(sampling) ) > + { > + /* > + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) > + * and therefore we treat it the same way as a non-privileged > + * PV 32-bit domain. > + */ > + struct compat_pmu_regs *cmp; > + > + cur_regs = guest_cpu_user_regs(); > + > + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; > + cmp->ip = cur_regs->rip; > + cmp->sp = cur_regs->rsp; > + cmp->flags = cur_regs->eflags; > + cmp->ss = cur_regs->ss; > + cmp->cs = cur_regs->cs; > + if ( (cmp->cs & 3) > 1 ) > + *flags |= PMU_SAMPLE_USER; > + } > + else > + { > + struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; > + > + if ( (vpmu_mode & XENPMU_MODE_SELF) ) > + cur_regs = guest_cpu_user_regs(); > + else if ( !guest_mode(regs) && > is_hardware_domain(sampling->domain) ) > + { > + cur_regs = regs; > + domid = DOMID_XEN; > + } > + else > + cur_regs = guest_cpu_user_regs(); > + > + r->ip = cur_regs->rip; > + r->sp = cur_regs->rsp; > + r->flags = cur_regs->eflags; > + > + if ( !has_hvm_container_vcpu(sampled) ) > + { > + r->ss = cur_regs->ss; > + r->cs = cur_regs->cs; > + if ( !(sampled->arch.flags & TF_kernel_mode) ) > + *flags |= PMU_SAMPLE_USER; > + } > + else > + { > + struct segment_register seg; > + > + hvm_get_segment_register(sampled, x86_seg_cs, &seg); > + r->cs = seg.sel; > + hvm_get_segment_register(sampled, x86_seg_ss, &seg); > + r->ss = seg.sel; > + r->cpl = seg.attr.fields.dpl; > + if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) > + *flags |= PMU_SAMPLE_REAL; > + } > + } > + > + vpmu->xenpmu_data->domain_id = domid; > + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; > + if ( is_hardware_domain(sampling->domain) ) > + vpmu->xenpmu_data->pcpu_id = smp_processor_id(); > + else > + vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id; > + > + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; > + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > + *flags |= PMU_CACHED; > + > + send_guest_vcpu_virq(sampling, VIRQ_XENPMU); > + > + return; > + } > > if ( vpmu->arch_vpmu_ops ) ... here is a check. Maybe this check here is unnecessary because you would never get this interrupt without having arch_vpmu_ops != NULL to switch on the machinery? There are some other locations too with checks before calling vpmu->arch_vpmu_ops->... and some without. Maybe it would make sense to force always a complete set of arch_vpmu_ops - functions to avoid this? > { > - struct vlapic *vlapic = vcpu_vlapic(v); > + struct vlapic *vlapic = vcpu_vlapic(sampling); > u32 vlapic_lvtpc; > > + /* We don't support (yet) HVM dom0 */ > + ASSERT(sampling == sampled); > + > if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || > !is_vlapic_lvtpc_enabled(vlapic) ) > return; > @@ -147,7 +314,7 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs) > vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); > break; > case APIC_MODE_NMI: > - v->nmi_pending = 1; > + sampling->nmi_pending = 1; > break; > } > } > @@ -174,7 +341,7 @@ static void vpmu_save_force(void *arg) > vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > > if ( vpmu->arch_vpmu_ops ) > - (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); > + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0); > > vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); > > @@ -193,20 +360,20 @@ void vpmu_save(struct vcpu *v) > per_cpu(last_vcpu, pcpu) = v; > > if ( vpmu->arch_vpmu_ops ) > - if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) > + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) ) > vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); > > apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); > } > > -void vpmu_load(struct vcpu *v) > +int vpmu_load(struct vcpu *v, bool_t verify) vpmu_load uses "verify" but within the arch_vpmu_load functions (core2_vpmu_load() and amd_vpmu_load()) you use "from_guest" for the same meaning. This is a little bit confusing. Always using "verify" would be clearer I think. Dietmar. > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > int pcpu = smp_processor_id(); > struct vcpu *prev = NULL; > > if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) > - return; > + return 0; > > /* First time this VCPU is running here */ > if ( vpmu->last_pcpu != pcpu ) > @@ -245,15 +412,24 @@ void vpmu_load(struct vcpu *v) > local_irq_enable(); > > /* Only when PMU is counting, we load PMU context immediately. */ > - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) > - return; > + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || > + (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && > + (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED)) ) > + return 0; > > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) > { > apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > /* Arch code needs to set VPMU_CONTEXT_LOADED */ > - vpmu->arch_vpmu_ops->arch_vpmu_load(v); > + if ( vpmu->arch_vpmu_ops->arch_vpmu_load(v, verify) ) > + { > + apic_write_around(APIC_LVTPC, > + vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED); > + return 1; > + } > } > + > + return 0; > } > > void vpmu_initialise(struct vcpu *v) > @@ -265,6 +441,8 @@ void vpmu_initialise(struct vcpu *v) > > BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ); > BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ); > + BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ); > + BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ); > > ASSERT(!vpmu->flags && !vpmu->context); > > @@ -449,7 +627,9 @@ void vpmu_dump(struct vcpu *v) > long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) > arg) > { > int ret; > + struct vcpu *curr; > struct xen_pmu_params pmu_params = {.val = 0}; > + struct xen_pmu_data *xenpmu_data; > > if ( !opt_vpmu_enabled ) > return -EOPNOTSUPP; > @@ -552,6 +732,27 @@ long do_xenpmu_op(unsigned int op, > XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) > pvpmu_finish(current->domain, &pmu_params); > break; > > + case XENPMU_lvtpc_set: > + xenpmu_data = current->arch.vpmu.xenpmu_data; > + if ( xenpmu_data == NULL ) > + return -EINVAL; > + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); > + break; > + > + case XENPMU_flush: > + curr = current; > + xenpmu_data = curr->arch.vpmu.xenpmu_data; > + if ( xenpmu_data == NULL ) > + return -EINVAL; > + xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED; > + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); > + if ( vpmu_load(curr, 1) ) > + { > + xenpmu_data->pmu.pmu_flags |= PMU_CACHED; > + return -EIO; > + } > + break ; > + > default: > ret = -EINVAL; > } > diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h > index 642a4b7..564d28d 100644 > --- a/xen/include/asm-x86/hvm/vpmu.h > +++ b/xen/include/asm-x86/hvm/vpmu.h > @@ -47,8 +47,8 @@ struct arch_vpmu_ops { > unsigned int *eax, unsigned int *ebx, > unsigned int *ecx, unsigned int *edx); > void (*arch_vpmu_destroy)(struct vcpu *v); > - int (*arch_vpmu_save)(struct vcpu *v); > - void (*arch_vpmu_load)(struct vcpu *v); > + int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest); > + int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest); > void (*arch_vpmu_dump)(const struct vcpu *); > }; > > @@ -107,7 +107,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax, > unsigned int *ebx, > void vpmu_initialise(struct vcpu *v); > void vpmu_destroy(struct vcpu *v); > void vpmu_save(struct vcpu *v); > -void vpmu_load(struct vcpu *v); > +int vpmu_load(struct vcpu *v, bool_t verify); > void vpmu_dump(struct vcpu *v); > > extern int acquire_pmu_ownership(int pmu_ownership); > @@ -126,7 +126,7 @@ static inline void vpmu_switch_from(struct vcpu *prev) > static inline void vpmu_switch_to(struct vcpu *next) > { > if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) ) > - vpmu_load(next); > + vpmu_load(next, 0); > } > > #endif /* __ASM_X86_HVM_VPMU_H_*/ > diff --git a/xen/include/public/arch-x86/pmu.h > b/xen/include/public/arch-x86/pmu.h > index c0068f1..de60199 100644 > --- a/xen/include/public/arch-x86/pmu.h > +++ b/xen/include/public/arch-x86/pmu.h > @@ -53,6 +53,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t); > > /* PMU flags */ > #define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ > +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ > +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ > +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ > > /* > * Architecture-specific information describing state of the processor at > diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h > index 57b130b..005a2ef 100644 > --- a/xen/include/public/pmu.h > +++ b/xen/include/public/pmu.h > @@ -27,6 +27,8 @@ > #define XENPMU_feature_set 3 > #define XENPMU_init 4 > #define XENPMU_finish 5 > +#define XENPMU_lvtpc_set 6 > +#define XENPMU_flush 7 /* Write cached MSR values to HW */ > /* ` } */ > > /* Parameters structure for HYPERVISOR_xenpmu_op call */ > diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h > index 6ec942c..e234349 100644 > --- a/xen/include/xsm/dummy.h > +++ b/xen/include/xsm/dummy.h > @@ -682,7 +682,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct > domain *d, int op) > case XENPMU_feature_get: > return xsm_default_action(XSM_PRIV, d, current->domain); > case XENPMU_init: > - case XENPMU_finish: > + case XENPMU_finish: > + case XENPMU_lvtpc_set: > + case XENPMU_flush: > return xsm_default_action(XSM_HOOK, d, current->domain); > default: > return -EPERM; > diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c > index f8faba8..fc0328f 100644 > --- a/xen/xsm/flask/hooks.c > +++ b/xen/xsm/flask/hooks.c > @@ -1532,6 +1532,8 @@ static int flask_pmu_op (struct domain *d, unsigned int > op) > XEN2__PMU_CTRL, NULL); > case XENPMU_init: > case XENPMU_finish: > + case XENPMU_lvtpc_set: > + case XENPMU_flush: > return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2, > XEN2__PMU_USE, NULL); > default: > -- Company details: http://ts.fujitsu.com/imprint.html _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.