Xen project Mailing List

[Xen-devel] [PATCH v9 15/20] x86/VPMU: Handle PMU interrupts for PV guests

Add support for handling PMU interrupts for PV guests. VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush hypercall. This allows the guest to access PMU MSR values that are stored in VPMU context which is shared between hypervisor and domain, thus avoiding traps to hypervisor. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx> Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx> Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx> --- xen/arch/x86/hvm/vpmu.c | 158 +++++++++++++++++++++++++++++++++++++++++++---- xen/include/public/pmu.h | 7 +++ 2 files changed, 153 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c index 399e195..818f721 100644 --- a/xen/arch/x86/hvm/vpmu.c +++ b/xen/arch/x86/hvm/vpmu.c @@ -80,44 +80,167 @@ static void __init parse_vpmu_param(char *s) void vpmu_lvtpc_update(uint32_t val) { - struct vpmu_struct *vpmu = vcpu_vpmu(current); + struct vcpu *curr = current; + struct vpmu_struct *vpmu = vcpu_vpmu(curr); vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); + + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ + if ( is_hvm_domain(curr->domain) || + !(vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) ) + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); } int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) { - struct vpmu_struct *vpmu = vcpu_vpmu(current); + struct vcpu *curr = current; + struct vpmu_struct *vpmu = vcpu_vpmu(curr); if ( !(vpmu_mode & XENPMU_MODE_SELF) ) return 0; if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content); + { + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content); + + /* + * We may have received a PMU interrupt during WRMSR handling + * and since do_wrmsr may load VPMU context we should save + * (and unload) it again. + */ + if ( !is_hvm_domain(curr->domain) && + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + vpmu->arch_vpmu_ops->arch_vpmu_save(curr); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + } + return ret; + } return 0; } int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) { - struct vpmu_struct *vpmu = vcpu_vpmu(current); + struct vcpu *curr = current; + struct vpmu_struct *vpmu = vcpu_vpmu(curr); if ( !(vpmu_mode & XENPMU_MODE_SELF) ) return 0; if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); + { + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); + + if ( !is_hvm_domain(curr->domain) && + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + vpmu->arch_vpmu_ops->arch_vpmu_save(curr); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + } + return ret; + } return 0; } +static struct vcpu *choose_hwdom_vcpu(void) +{ + struct vcpu *v; + unsigned idx = smp_processor_id() % hardware_domain->max_vcpus; + + if ( hardware_domain->vcpu == NULL ) + return NULL; + + v = hardware_domain->vcpu[idx]; + + /* + * If index is not populated search downwards the vcpu array until + * a valid vcpu can be found + */ + while ( !v && idx-- ) + v = hardware_domain->vcpu[idx]; + + return v; +} + int vpmu_do_interrupt(struct cpu_user_regs *regs) { - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct vcpu *sampled = current, *sampling; + struct vpmu_struct *vpmu; + + /* dom0 will handle interrupt for special domains (e.g. idle domain) */ + if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED ) + { + sampling = choose_hwdom_vcpu(); + if ( !sampling ) + return 0; + } + else + sampling = sampled; + + vpmu = vcpu_vpmu(sampling); + if ( !is_hvm_domain(sampling->domain) ) + { + /* PV(H) guest or dom0 is doing system profiling */ + const struct cpu_user_regs *gregs; + + if ( !vpmu->xenpmu_data ) + return 0; + + if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED ) + return 1; + + if ( is_pvh_domain(sampled->domain) && + !vpmu->arch_vpmu_ops->do_interrupt(regs) ) + return 0; + + /* PV guest will be reading PMU MSRs from xenpmu_data */ + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + + /* Store appropriate registers in xenpmu_data */ + if ( is_pv_32bit_domain(sampled->domain) ) + { + /* + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) + * and therefore we treat it the same way as a non-priviledged + * PV 32-bit domain. + */ + struct compat_cpu_user_regs *cmp; + + gregs = guest_cpu_user_regs(); + + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; + XLAT_cpu_user_regs(cmp, gregs); + } + else if ( !is_hardware_domain(sampled->domain) && + !is_idle_vcpu(sampled) ) + { + /* PV(H) guest */ + gregs = guest_cpu_user_regs(); + vpmu->xenpmu_data->pmu.r.regs = *gregs; + } + else + vpmu->xenpmu_data->pmu.r.regs = *regs; + + vpmu->xenpmu_data->domain_id = sampled->domain->domain_id; + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; + vpmu->xenpmu_data->pcpu_id = smp_processor_id(); + + vpmu->xenpmu_data->pmu_flags |= PMU_CACHED; + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; + + send_guest_vcpu_virq(sampling, VIRQ_XENPMU); + + return 1; + } if ( vpmu->arch_vpmu_ops ) { - struct vlapic *vlapic = vcpu_vlapic(v); + struct vlapic *vlapic = vcpu_vlapic(sampling); u32 vlapic_lvtpc; unsigned char int_vec; @@ -131,9 +254,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs) int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) - vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); + vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0); else - v->nmi_pending = 1; + sampling->nmi_pending = 1; return 1; } @@ -232,7 +355,9 @@ void vpmu_load(struct vcpu *v) local_irq_enable(); /* Only when PMU is counting, we load PMU context immediately. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || + (!is_hvm_domain(v->domain) && + vpmu->xenpmu_data->pmu_flags & PMU_CACHED) ) return; if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) @@ -436,6 +561,7 @@ vpmu_force_context_switch(XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) { int ret = -EINVAL; + struct vcpu *curr; xen_pmu_params_t pmu_params; switch ( op ) @@ -532,6 +658,14 @@ long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); ret = 0; break; + + case XENPMU_flush: + curr = current; + curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED; + vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); + vpmu_load(curr); + ret = 0; + break; } return ret; diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h index 92f3683..44bf43f 100644 --- a/xen/include/public/pmu.h +++ b/xen/include/public/pmu.h @@ -28,6 +28,7 @@ #define XENPMU_init 4 #define XENPMU_finish 5 #define XENPMU_lvtpc_set 6 +#define XENPMU_flush 7 /* Write cached MSR values to HW */ /* ` } */ /* Parameters structure for HYPERVISOR_xenpmu_op call */ @@ -59,6 +60,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t); */ #define XENPMU_FEATURE_INTEL_BTS 1 +/* + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to + * the hypervisor + */ +#define PMU_CACHED 1 + /* Shared between hypervisor and PV domain */ struct xen_pmu_data { uint32_t domain_id; -- 1.8.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.