Xen project Mailing List

> From: Boris Ostrovsky [mailto:boris.ostrovsky@xxxxxxxxxx] > Sent: Wednesday, June 10, 2015 11:04 PM > > Add support for handling PMU interrupts for PV(H) guests. > > VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush > hypercall. This allows the guest to access PMU MSR values that are stored in > VPMU context which is shared between hypervisor and domain, thus avoiding > traps to hypervisor. > > Since the interrupt handler may now force VPMU context save (i.e. set > VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which > until now expected this flag to be set only when the counters were stopped. > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> > Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx> I may need more time to understand the whole interrupt stuff for PV(H) guest. But regarding to VMX specific changes I think they are clear: Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > --- > Changes in v24: > * For both AMD and Intel copy guest's MSRs first into context and then verify > it (to keep things as read-once by hypervisor) > * To make sure that guest did not alter offsets to registers don't copy these > values. Store them into shared area during VPMU initialization. Clarify in > public header file that they are RO by the guest > * Make vpmu_load return arch_vpmu_load()'s error code, not 1. > > xen/arch/x86/hvm/svm/vpmu.c | 90 ++++++++++--- > xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++++++++++++++- > xen/arch/x86/hvm/vpmu.c | 268 > +++++++++++++++++++++++++++++++++----- > xen/include/asm-x86/hvm/vpmu.h | 10 +- > xen/include/public/arch-x86/pmu.h | 41 +++++- > xen/include/public/pmu.h | 2 + > xen/include/xsm/dummy.h | 4 +- > xen/xsm/flask/hooks.c | 2 + > 8 files changed, 464 insertions(+), 61 deletions(-) > > diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c > index 934f1b7..b93d31d 100644 > --- a/xen/arch/x86/hvm/svm/vpmu.c > +++ b/xen/arch/x86/hvm/svm/vpmu.c > @@ -46,6 +46,9 @@ static const u32 __read_mostly *counters; > static const u32 __read_mostly *ctrls; > static bool_t __read_mostly k7_counters_mirrored; > > +/* Total size of PMU registers block (copied to/from PV(H) guest) */ > +static unsigned int __read_mostly regs_sz; > + > #define F10H_NUM_COUNTERS 4 > #define F15H_NUM_COUNTERS 6 > #define MAX_NUM_COUNTERS F15H_NUM_COUNTERS > @@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt > *ctxt) > unsigned i; > uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > > - memset(&ctxt->regs[0], 0, 2 * sizeof(uint64_t) * num_counters); > + memset(&ctxt->regs[0], 0, regs_sz); > for ( i = 0; i < num_counters; i++ ) > ctrl_regs[i] = ctrl_rsvd[i]; > } > @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v) > } > } > > -static void amd_vpmu_load(struct vcpu *v) > +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; > - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > + struct xen_pmu_amd_ctxt *ctxt; > + uint64_t *ctrl_regs; > + unsigned int i; > > vpmu_reset(vpmu, VPMU_FROZEN); > > - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) > + if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) > { > - unsigned int i; > + ctxt = vpmu->context; > + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > > for ( i = 0; i < num_counters; i++ ) > wrmsrl(ctrls[i], ctrl_regs[i]); > > - return; > + return 0; > + } > + > + if ( from_guest ) > + { > + unsigned int num_enabled = 0; > + struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd; > + > + ASSERT(!is_hvm_vcpu(v)); > + > + ctxt = vpmu->context; > + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); > + > + memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz); > + > + for ( i = 0; i < num_counters; i++ ) > + { > + if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] ) > + { > + /* > + * Not necessary to re-init context since we should never > load > + * it until guest provides valid values. But just to be safe. > + */ > + amd_vpmu_init_regs(ctxt); > + return -EINVAL; > + } > + > + if ( is_pmu_enabled(ctrl_regs[i]) ) > + num_enabled++; > + } > + > + if ( num_enabled ) > + vpmu_set(vpmu, VPMU_RUNNING); > + else > + vpmu_reset(vpmu, VPMU_RUNNING); > } > > vpmu_set(vpmu, VPMU_CONTEXT_LOADED); > > context_load(v); > + > + return 0; > } > > static inline void context_save(struct vcpu *v) > @@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v) > rdmsrl(counters[i], counter_regs[i]); > } > > -static int amd_vpmu_save(struct vcpu *v) > +static int amd_vpmu_save(struct vcpu *v, bool_t to_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > unsigned int i; > > - /* > - * Stop the counters. If we came here via vpmu_save_force (i.e. > - * when VPMU_CONTEXT_SAVE is set) counters are already stopped. > - */ > + for ( i = 0; i < num_counters; i++ ) > + wrmsrl(ctrls[i], 0); > + > if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) > { > vpmu_set(vpmu, VPMU_FROZEN); > - > - for ( i = 0; i < num_counters; i++ ) > - wrmsrl(ctrls[i], 0); > - > return 0; > } > > @@ -274,6 +310,16 @@ static int amd_vpmu_save(struct vcpu *v) > has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) > amd_vpmu_unset_msr_bitmap(v); > > + if ( to_guest ) > + { > + struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt; > + > + ASSERT(!is_hvm_vcpu(v)); > + ctxt = vpmu->context; > + guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd; > + memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz); > + } > + > return 1; > } > > @@ -461,8 +507,7 @@ int svm_vpmu_initialise(struct vcpu *v) > if ( !counters ) > return -EINVAL; > > - ctxt = xmalloc_bytes(sizeof(*ctxt) + > - 2 * sizeof(uint64_t) * num_counters); > + ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz); > if ( !ctxt ) > { > printk(XENLOG_G_WARNING "Insufficient memory for PMU, " > @@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v) > vpmu->context = ctxt; > vpmu->priv_context = NULL; > > + if ( !is_hvm_vcpu(v) ) > + { > + /* Copy register offsets to shared area */ > + ASSERT(vpmu->xenpmu_data); > + memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt, sizeof(*ctxt)); > + } > + > vpmu->arch_vpmu_ops = &amd_vpmu_ops; > > vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); > @@ -527,6 +579,8 @@ int __init amd_vpmu_init(void) > ctrl_rsvd[i] &= CTRL_RSVD_MASK; > } > > + regs_sz = 2 * sizeof(uint64_t) * num_counters; > + > return 0; > } > > diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c > b/xen/arch/x86/hvm/vmx/vpmu_core2.c > index 166277a..1206e90 100644 > --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c > +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c > @@ -90,6 +90,13 @@ static unsigned int __read_mostly arch_pmc_cnt, > fixed_pmc_cnt; > static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask; > static uint64_t __read_mostly global_ovf_ctrl_mask; > > +/* Total size of PMU registers block (copied to/from PV(H) guest) */ > +static unsigned int __read_mostly regs_sz; > +/* Offset into context of the beginning of PMU register block */ > +static const unsigned int regs_off = > + sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) + > + sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters); > + > /* > * QUIRK to workaround an issue on various family 6 cpus. > * The issue leads to endless PMC interrupt loops on the processor. > @@ -312,7 +319,7 @@ static inline void __core2_vpmu_save(struct vcpu *v) > rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); > } > > -static int core2_vpmu_save(struct vcpu *v) > +static int core2_vpmu_save(struct vcpu *v, bool_t to_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > > @@ -329,6 +336,13 @@ static int core2_vpmu_save(struct vcpu *v) > has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap ) > core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); > > + if ( to_guest ) > + { > + ASSERT(!is_hvm_vcpu(v)); > + memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off, > + vpmu->context + regs_off, regs_sz); > + } > + > return 1; > } > > @@ -365,16 +379,93 @@ static inline void __core2_vpmu_load(struct vcpu *v) > } > } > > -static void core2_vpmu_load(struct vcpu *v) > +static int core2_vpmu_verify(struct vcpu *v) > +{ > + unsigned int i; > + struct vpmu_struct *vpmu = vcpu_vpmu(v); > + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; > + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, > fixed_counters); > + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = > + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); > + uint64_t fixed_ctrl; > + uint64_t *priv_context = vpmu->priv_context; > + uint64_t enabled_cntrs = 0; > + > + if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask ) > + return -EINVAL; > + > + fixed_ctrl = core2_vpmu_cxt->fixed_ctrl; > + if ( fixed_ctrl & fixed_ctrl_mask ) > + return -EINVAL; > + > + for ( i = 0; i < fixed_pmc_cnt; i++ ) > + { > + if ( fixed_counters[i] & fixed_counters_mask ) > + return -EINVAL; > + if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 ) > + enabled_cntrs |= (1ULL << i); > + } > + enabled_cntrs <<= 32; > + > + for ( i = 0; i < arch_pmc_cnt; i++ ) > + { > + uint64_t control = xen_pmu_cntr_pair[i].control; > + > + if ( control & ARCH_CTRL_MASK ) > + return -EINVAL; > + if ( control & ARCH_CNTR_ENABLED ) > + enabled_cntrs |= (1ULL << i); > + } > + > + if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) && > + !is_canonical_address(core2_vpmu_cxt->ds_area) ) > + return -EINVAL; > + > + if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) || > + (core2_vpmu_cxt->ds_area != 0) ) > + vpmu_set(vpmu, VPMU_RUNNING); > + else > + vpmu_reset(vpmu, VPMU_RUNNING); > + > + *priv_context = enabled_cntrs; > + > + return 0; > +} > + > +static int core2_vpmu_load(struct vcpu *v, bool_t from_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > > if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) > - return; > + return 0; > + > + if ( from_guest ) > + { > + int ret; > + > + ASSERT(!is_hvm_vcpu(v)); > + > + memcpy(vpmu->context + regs_off, > + (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off, > + regs_sz); > + > + ret = core2_vpmu_verify(v); > + if ( ret ) > + { > + /* > + * Not necessary since we should never load the context until > + * guest provides valid values. But just to be safe. > + */ > + memset(vpmu->context + regs_off, 0, regs_sz); > + return ret; > + } > + } > > vpmu_set(vpmu, VPMU_CONTEXT_LOADED); > > __core2_vpmu_load(v); > + > + return 0; > } > > static int core2_vpmu_alloc_resource(struct vcpu *v) > @@ -412,6 +503,13 @@ static int core2_vpmu_alloc_resource(struct vcpu *v) > vpmu->context = core2_vpmu_cxt; > vpmu->priv_context = p; > > + if ( !is_hvm_vcpu(v) ) > + { > + /* Copy fixed/arch register offsets to shared area */ > + ASSERT(vpmu->xenpmu_data); > + memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off); > + } > + > vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); > > return 1; > @@ -923,6 +1021,10 @@ int __init core2_vpmu_init(void) > (((1ULL << fixed_pmc_cnt) - 1) << 32) | > ((1ULL << arch_pmc_cnt) - 1)); > > + regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) + > + sizeof(uint64_t) * fixed_pmc_cnt + > + sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt; > + > check_pmc_quirk(); > > if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt + > diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c > index 07fa368..37e541b 100644 > --- a/xen/arch/x86/hvm/vpmu.c > +++ b/xen/arch/x86/hvm/vpmu.c > @@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s) > void vpmu_lvtpc_update(uint32_t val) > { > struct vpmu_struct *vpmu; > + struct vcpu *curr = current; > > - if ( vpmu_mode == XENPMU_MODE_OFF ) > + if ( likely(vpmu_mode == XENPMU_MODE_OFF) ) > return; > > - vpmu = vcpu_vpmu(current); > + vpmu = vcpu_vpmu(curr); > > vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); > - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > + > + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ > + if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data || > + !vpmu_is_set(vpmu, VPMU_CACHED) ) > + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > } > > int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu; > > if ( vpmu_mode == XENPMU_MODE_OFF ) > return 0; > > + vpmu = vcpu_vpmu(curr); > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) > - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported); > + { > + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported); > + > + /* > + * We may have received a PMU interrupt during WRMSR handling > + * and since do_wrmsr may load VPMU context we should save > + * (and unload) it again. > + */ > + if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data && > + vpmu_is_set(vpmu, VPMU_CACHED) ) > + { > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > + vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + } > + return ret; > + } > + > return 0; > } > > int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu; > > if ( vpmu_mode == XENPMU_MODE_OFF ) > { > @@ -117,39 +142,184 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t > *msr_content) > return 0; > } > > + vpmu = vcpu_vpmu(curr); > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) > - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); > + { > + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); > + > + if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data && > + vpmu_is_set(vpmu, VPMU_CACHED) ) > + { > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > + vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + } > + return ret; > + } > else > *msr_content = 0; > > return 0; > } > > +static inline struct vcpu *choose_hwdom_vcpu(void) > +{ > + unsigned idx; > + > + if ( hardware_domain->max_vcpus == 0 ) > + return NULL; > + > + idx = smp_processor_id() % hardware_domain->max_vcpus; > + > + return hardware_domain->vcpu[idx]; > +} > + > void vpmu_do_interrupt(struct cpu_user_regs *regs) > { > - struct vcpu *v = current; > - struct vpmu_struct *vpmu = vcpu_vpmu(v); > + struct vcpu *sampled = current, *sampling; > + struct vpmu_struct *vpmu; > + struct vlapic *vlapic; > + u32 vlapic_lvtpc; > > - if ( vpmu->arch_vpmu_ops ) > + /* dom0 will handle interrupt for special domains (e.g. idle domain) */ > + if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED ) > + { > + sampling = choose_hwdom_vcpu(); > + if ( !sampling ) > + return; > + } > + else > + sampling = sampled; > + > + vpmu = vcpu_vpmu(sampling); > + if ( !vpmu->arch_vpmu_ops ) > + return; > + > + /* PV(H) guest */ > + if ( !is_hvm_vcpu(sampling) ) > { > - struct vlapic *vlapic = vcpu_vlapic(v); > - u32 vlapic_lvtpc; > + const struct cpu_user_regs *cur_regs; > + uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags; > + domid_t domid = DOMID_SELF; > + > + if ( !vpmu->xenpmu_data ) > + return; > + > + if ( is_pvh_vcpu(sampling) && > + !vpmu->arch_vpmu_ops->do_interrupt(regs) ) > + return; > > - if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || > - !is_vlapic_lvtpc_enabled(vlapic) ) > + if ( vpmu_is_set(vpmu, VPMU_CACHED) ) > return; > > - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); > + /* PV guest will be reading PMU MSRs from xenpmu_data */ > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > > - switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ) > + if ( has_hvm_container_vcpu(sampled) ) > + *flags = 0; > + else > + *flags = PMU_SAMPLE_PV; > + > + /* Store appropriate registers in xenpmu_data */ > + /* FIXME: 32-bit PVH should go here as well */ > + if ( is_pv_32bit_vcpu(sampling) ) > { > - case APIC_MODE_FIXED: > - vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); > - break; > - case APIC_MODE_NMI: > - v->nmi_pending = 1; > - break; > + /* > + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) > + * and therefore we treat it the same way as a non-privileged > + * PV 32-bit domain. > + */ > + struct compat_pmu_regs *cmp; > + > + cur_regs = guest_cpu_user_regs(); > + > + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; > + cmp->ip = cur_regs->rip; > + cmp->sp = cur_regs->rsp; > + cmp->flags = cur_regs->eflags; > + cmp->ss = cur_regs->ss; > + cmp->cs = cur_regs->cs; > + if ( (cmp->cs & 3) > 1 ) > + *flags |= PMU_SAMPLE_USER; > + } > + else > + { > + struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; > + > + if ( (vpmu_mode & XENPMU_MODE_SELF) ) > + cur_regs = guest_cpu_user_regs(); > + else if ( !guest_mode(regs) && > is_hardware_domain(sampling->domain) ) > + { > + cur_regs = regs; > + domid = DOMID_XEN; > + } > + else > + cur_regs = guest_cpu_user_regs(); > + > + r->ip = cur_regs->rip; > + r->sp = cur_regs->rsp; > + r->flags = cur_regs->eflags; > + > + if ( !has_hvm_container_vcpu(sampled) ) > + { > + r->ss = cur_regs->ss; > + r->cs = cur_regs->cs; > + if ( !(sampled->arch.flags & TF_kernel_mode) ) > + *flags |= PMU_SAMPLE_USER; > + } > + else > + { > + struct segment_register seg; > + > + hvm_get_segment_register(sampled, x86_seg_cs, &seg); > + r->cs = seg.sel; > + hvm_get_segment_register(sampled, x86_seg_ss, &seg); > + r->ss = seg.sel; > + r->cpl = seg.attr.fields.dpl; > + if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) > + *flags |= PMU_SAMPLE_REAL; > + } > } > + > + vpmu->xenpmu_data->domain_id = domid; > + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; > + if ( is_hardware_domain(sampling->domain) ) > + vpmu->xenpmu_data->pcpu_id = smp_processor_id(); > + else > + vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id; > + > + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; > + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > + *flags |= PMU_CACHED; > + vpmu_set(vpmu, VPMU_CACHED); > + > + send_guest_vcpu_virq(sampling, VIRQ_XENPMU); > + > + return; > + } > + > + /* HVM guests */ > + vlapic = vcpu_vlapic(sampling); > + > + /* We don't support (yet) HVM dom0 */ > + ASSERT(sampling == sampled); > + > + if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || > + !is_vlapic_lvtpc_enabled(vlapic) ) > + return; > + > + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); > + > + switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ) > + { > + case APIC_MODE_FIXED: > + vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); > + break; > + case APIC_MODE_NMI: > + sampling->nmi_pending = 1; > + break; > } > } > > @@ -174,7 +344,7 @@ static void vpmu_save_force(void *arg) > vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > > if ( vpmu->arch_vpmu_ops ) > - (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); > + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0); > > vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); > > @@ -193,20 +363,20 @@ void vpmu_save(struct vcpu *v) > per_cpu(last_vcpu, pcpu) = v; > > if ( vpmu->arch_vpmu_ops ) > - if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) > + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) ) > vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); > > apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); > } > > -void vpmu_load(struct vcpu *v) > +int vpmu_load(struct vcpu *v, bool_t from_guest) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > int pcpu = smp_processor_id(); > struct vcpu *prev = NULL; > > if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) > - return; > + return 0; > > /* First time this VCPU is running here */ > if ( vpmu->last_pcpu != pcpu ) > @@ -245,15 +415,26 @@ void vpmu_load(struct vcpu *v) > local_irq_enable(); > > /* Only when PMU is counting, we load PMU context immediately. */ > - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) > - return; > + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || > + (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) ) > + return 0; > > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) > { > + int ret; > + > apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > /* Arch code needs to set VPMU_CONTEXT_LOADED */ > - vpmu->arch_vpmu_ops->arch_vpmu_load(v); > + ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest); > + if ( ret ) > + { > + apic_write_around(APIC_LVTPC, > + vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED); > + return ret; > + } > } > + > + return 0; > } > > void vpmu_initialise(struct vcpu *v) > @@ -265,6 +446,8 @@ void vpmu_initialise(struct vcpu *v) > > BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ); > BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ); > + BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ); > + BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ); > > ASSERT(!vpmu->flags && !vpmu->context); > > @@ -449,7 +632,10 @@ void vpmu_dump(struct vcpu *v) > long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) > arg) > { > int ret; > + struct vcpu *curr; > struct xen_pmu_params pmu_params = {.val = 0}; > + struct xen_pmu_data *xenpmu_data; > + struct vpmu_struct *vpmu; > > if ( !opt_vpmu_enabled ) > return -EOPNOTSUPP; > @@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op, > XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) > pvpmu_finish(current->domain, &pmu_params); > break; > > + case XENPMU_lvtpc_set: > + xenpmu_data = current->arch.vpmu.xenpmu_data; > + if ( xenpmu_data == NULL ) > + return -EINVAL; > + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); > + break; > + > + case XENPMU_flush: > + curr = current; > + vpmu = vcpu_vpmu(curr); > + xenpmu_data = curr->arch.vpmu.xenpmu_data; > + if ( xenpmu_data == NULL ) > + return -EINVAL; > + xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED; > + vpmu_reset(vpmu, VPMU_CACHED); > + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); > + if ( vpmu_load(curr, 1) ) > + { > + xenpmu_data->pmu.pmu_flags |= PMU_CACHED; > + vpmu_set(vpmu, VPMU_CACHED); > + return -EIO; > + } > + break ; > + > default: > ret = -EINVAL; > } > diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h > index 642a4b7..f486d2f 100644 > --- a/xen/include/asm-x86/hvm/vpmu.h > +++ b/xen/include/asm-x86/hvm/vpmu.h > @@ -47,8 +47,8 @@ struct arch_vpmu_ops { > unsigned int *eax, unsigned int *ebx, > unsigned int *ecx, unsigned int *edx); > void (*arch_vpmu_destroy)(struct vcpu *v); > - int (*arch_vpmu_save)(struct vcpu *v); > - void (*arch_vpmu_load)(struct vcpu *v); > + int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest); > + int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest); > void (*arch_vpmu_dump)(const struct vcpu *); > }; > > @@ -75,6 +75,8 @@ struct vpmu_struct { > #define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ > #define VPMU_FROZEN 0x10 /* Stop counters while > VCPU is not > running */ > #define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 > +/* PV(H) guests: VPMU registers are accessed by guest from shared page */ > +#define VPMU_CACHED 0x40 > > static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask) > { > @@ -107,7 +109,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax, > unsigned int *ebx, > void vpmu_initialise(struct vcpu *v); > void vpmu_destroy(struct vcpu *v); > void vpmu_save(struct vcpu *v); > -void vpmu_load(struct vcpu *v); > +int vpmu_load(struct vcpu *v, bool_t from_guest); > void vpmu_dump(struct vcpu *v); > > extern int acquire_pmu_ownership(int pmu_ownership); > @@ -126,7 +128,7 @@ static inline void vpmu_switch_from(struct vcpu *prev) > static inline void vpmu_switch_to(struct vcpu *next) > { > if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) ) > - vpmu_load(next); > + vpmu_load(next, 0); > } > > #endif /* __ASM_X86_HVM_VPMU_H_*/ > diff --git a/xen/include/public/arch-x86/pmu.h > b/xen/include/public/arch-x86/pmu.h > index 4351115..1a53888 100644 > --- a/xen/include/public/arch-x86/pmu.h > +++ b/xen/include/public/arch-x86/pmu.h > @@ -5,7 +5,10 @@ > > /* AMD PMU registers and structures */ > struct xen_pmu_amd_ctxt { > - /* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd) > */ > + /* > + * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). > + * For PV(H) guests these fields are RO. > + */ > uint32_t counters; > uint32_t ctrls; > > @@ -30,7 +33,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t); > struct xen_pmu_intel_ctxt { > /* > * Offsets to fixed and architectural counter MSRs (relative to > - * xen_pmu_arch.c.intel) > + * xen_pmu_arch.c.intel). > + * For PV(H) guests these fields are RO. > */ > uint32_t fixed_counters; > uint32_t arch_counters; > @@ -69,6 +73,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t); > > /* PMU flags */ > #define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ > +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ > +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ > +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ > > /* > * Architecture-specific information describing state of the processor at > @@ -93,12 +100,34 @@ struct xen_pmu_arch { > /* WO for hypervisor, RO for guest */ > uint64_t pmu_flags; > > - /* Placeholder for APIC LVTPC register */ > - uint64_t lvtpc_pad; > + /* > + * APIC LVTPC register. > + * RW for both hypervisor and guest. > + * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware > + * during XENPMU_flush or XENPMU_lvtpc_set. > + */ > + union { > + uint32_t lapic_lvtpc; > + uint64_t pad; > + } l; > + > + /* > + * Vendor-specific PMU registers. > + * RW for both hypervisor and guest (see exceptions above). > + * Guest's updates to this field are verified and then loaded by the > + * hypervisor into hardware during XENPMU_flush > + */ > + union { > + struct xen_pmu_amd_ctxt amd; > + struct xen_pmu_intel_ctxt intel; > > - /* Placeholder for vendor-specific PMU registers */ > + /* > + * Padding for contexts (fixed parts only, does not include MSR banks > + * that are specified by offsets) > + */ > #define XENPMU_CTXT_PAD_SZ 128 > - uint64_t pmu_regs_pad[XENPMU_CTXT_PAD_SZ / 8]; > + uint8_t pad[XENPMU_CTXT_PAD_SZ]; > + } c; > }; > typedef struct xen_pmu_arch xen_pmu_arch_t; > DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t); > diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h > index e6307b5..7a45783 100644 > --- a/xen/include/public/pmu.h > +++ b/xen/include/public/pmu.h > @@ -27,6 +27,8 @@ > #define XENPMU_feature_set 3 > #define XENPMU_init 4 > #define XENPMU_finish 5 > +#define XENPMU_lvtpc_set 6 > +#define XENPMU_flush 7 /* Write cached MSR values to HW */ > /* ` } */ > > /* Parameters structure for HYPERVISOR_xenpmu_op call */ > diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h > index 6456f72..37e6aa3 100644 > --- a/xen/include/xsm/dummy.h > +++ b/xen/include/xsm/dummy.h > @@ -705,7 +705,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct > domain *d, int op) > case XENPMU_feature_get: > return xsm_default_action(XSM_PRIV, d, current->domain); > case XENPMU_init: > - case XENPMU_finish: > + case XENPMU_finish: > + case XENPMU_lvtpc_set: > + case XENPMU_flush: > return xsm_default_action(XSM_HOOK, d, current->domain); > default: > return -EPERM; > diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c > index aefcbda..4aa3e79 100644 > --- a/xen/xsm/flask/hooks.c > +++ b/xen/xsm/flask/hooks.c > @@ -1594,6 +1594,8 @@ static int flask_pmu_op (struct domain *d, unsigned int > op) > XEN2__PMU_CTRL, NULL); > case XENPMU_init: > case XENPMU_finish: > + case XENPMU_lvtpc_set: > + case XENPMU_flush: > return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2, > XEN2__PMU_USE, NULL); > default: > -- > 1.8.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.

Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests