[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5 RESEND 15/17] x86/VPMU: NMI-based VPMU support
> From: Boris Ostrovsky [mailto:boris.ostrovsky@xxxxxxxxxx] > Sent: Wednesday, April 23, 2014 8:51 PM > > Add support for using NMIs as PMU interrupts. > > Most of processing is still performed by vpmu_do_interrupt(). However, since > certain operations are not NMI-safe we defer them to a softint that > vpmu_do_interrupt() > will schedule: > * For PV guests that would be send_guest_vcpu_virq() > * For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the > later > can be called in privileged profiling mode when the interrupted guest is an > HVM one). > > With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and > vlapic > accesses for HVM moved to sofint, the only routines/macros that > vpmu_do_interrupt() > calls in NMI mode are: > * memcpy() > * querying domain type (is_XX_domain()) > * guest_cpu_user_regs() > * XLAT_cpu_user_regs() > * raise_softirq() > * vcpu_vpmu() > * vpmu_ops->arch_vpmu_save() > * vpmu_ops->do_interrupt() (in the future for PVH support) > > The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe > versions > which would not be NMI-safe). > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> if a SCHEDULE_SOFTIRQ happens before NMI_SOFTIRQ, will it cause any race condition when accessing previous recorded content in nmi softirq handler? Thanks Kevin > --- > xen/arch/x86/hvm/svm/vpmu.c | 1 + > xen/arch/x86/hvm/vmx/vpmu_core2.c | 1 + > xen/arch/x86/hvm/vpmu.c | 169 > ++++++++++++++++++++++++++++++-------- > 3 files changed, 138 insertions(+), 33 deletions(-) > > diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c > index 346aa51..04d3b91 100644 > --- a/xen/arch/x86/hvm/svm/vpmu.c > +++ b/xen/arch/x86/hvm/svm/vpmu.c > @@ -182,6 +182,7 @@ static inline void context_load(struct vcpu *v) > } > } > > +/* Must be NMI-safe */ > static void amd_vpmu_load(struct vcpu *v) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c > b/xen/arch/x86/hvm/vmx/vpmu_core2.c > index 9eac418..e214f01 100644 > --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c > +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c > @@ -303,6 +303,7 @@ static inline void __core2_vpmu_save(struct vcpu *v) > rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, > core2_vpmu_cxt->global_status); > } > > +/* Must be NMI-safe */ > static int core2_vpmu_save(struct vcpu *v) > { > struct vpmu_struct *vpmu = vcpu_vpmu(v); > diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c > index dc416f9..cbe8cfd 100644 > --- a/xen/arch/x86/hvm/vpmu.c > +++ b/xen/arch/x86/hvm/vpmu.c > @@ -36,6 +36,7 @@ > #include <asm/hvm/svm/svm.h> > #include <asm/hvm/svm/vmcb.h> > #include <asm/apic.h> > +#include <asm/nmi.h> > #include <public/pmu.h> > > /* > @@ -49,33 +50,57 @@ static void parse_vpmu_param(char *s); > custom_param("vpmu", parse_vpmu_param); > > static DEFINE_PER_CPU(struct vcpu *, last_vcpu); > +static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu); > + > +static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR; > > static void __init parse_vpmu_param(char *s) > { > - switch ( parse_bool(s) ) > - { > - case 0: > - break; > - default: > - if ( !strcmp(s, "bts") ) > - vpmu_features |= XENPMU_FEATURE_INTEL_BTS; > - else if ( *s ) > + char *ss; > + > + vpmu_mode = XENPMU_MODE_ON; > + if (*s == '\0') > + return; > + > + do { > + ss = strchr(s, ','); > + if ( ss ) > + *ss = '\0'; > + > + switch ( parse_bool(s) ) > { > - printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); > + case 0: > + vpmu_mode = XENPMU_MODE_OFF; > + return; > + case -1: > + if ( !strcmp(s, "nmi") ) > + vpmu_interrupt_type = APIC_DM_NMI; > + else if ( !strcmp(s, "bts") ) > + vpmu_features |= XENPMU_FEATURE_INTEL_BTS; > + else if ( !strcmp(s, "priv") ) > + { > + vpmu_mode &= ~XENPMU_MODE_ON; > + vpmu_mode |= XENPMU_MODE_PRIV; > + } > + else > + { > + printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); > + vpmu_mode = XENPMU_MODE_OFF; > + return; > + } > + default: > break; > } > - /* fall through */ > - case 1: > - vpmu_mode = XENPMU_MODE_ON; > - break; > - } > + > + s = ss + 1; > + } while ( ss ); > } > > void vpmu_lvtpc_update(uint32_t val) > { > struct vpmu_struct *vpmu = vcpu_vpmu(current); > > - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & > APIC_LVT_MASKED); > + vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & > APIC_LVT_MASKED); > > /* Postpone APIC updates for PV guests if PMU interrupt is pending */ > if ( !is_pv_domain(current->domain) || > @@ -84,6 +109,27 @@ void vpmu_lvtpc_update(uint32_t val) > apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > } > > +static void vpmu_send_nmi(struct vcpu *v) > +{ > + struct vlapic *vlapic; > + u32 vlapic_lvtpc; > + unsigned char int_vec; > + > + ASSERT( is_hvm_vcpu(v) ); > + > + vlapic = vcpu_vlapic(v); > + if ( !is_vlapic_lvtpc_enabled(vlapic) ) > + return; > + > + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); > + int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; > + > + if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) > + vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); > + else > + v->nmi_pending = 1; > +} > + > int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) > { > struct vpmu_struct *vpmu = vcpu_vpmu(current); > @@ -137,6 +183,7 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t > *msr_content) > return 0; > } > > +/* This routine may be called in NMI context */ > int vpmu_do_interrupt(struct cpu_user_regs *regs) > { > struct vcpu *v = current; > @@ -217,9 +264,13 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs) > memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, > gregs, sizeof(struct cpu_user_regs)); > > - hvm_get_segment_register(current, x86_seg_cs, &cs); > - gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; > - gregs->cs = cs.attr.fields.dpl; > + /* This is unsafe in NMI context, we'll do it in softint handler > */ > + if ( !(vpmu_interrupt_type & APIC_DM_NMI ) ) > + { > + hvm_get_segment_register(current, x86_seg_cs, &cs); > + gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; > + gregs->cs = cs.attr.fields.dpl; > + } > } > > v->arch.vpmu.xenpmu_data->domain_id = > current->domain->domain_id; > @@ -230,30 +281,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs) > apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | > APIC_LVT_MASKED); > vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; > > - send_guest_vcpu_virq(v, VIRQ_XENPMU); > + if ( vpmu_interrupt_type & APIC_DM_NMI ) > + { > + per_cpu(sampled_vcpu, smp_processor_id()) = current; > + raise_softirq(PMU_SOFTIRQ); > + } > + else > + send_guest_vcpu_virq(v, VIRQ_XENPMU); > > return 1; > } > > if ( vpmu->arch_vpmu_ops ) > { > - struct vlapic *vlapic = vcpu_vlapic(v); > - u32 vlapic_lvtpc; > - unsigned char int_vec; > - > if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ) > return 0; > > - if ( !is_vlapic_lvtpc_enabled(vlapic) ) > - return 1; > - > - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); > - int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; > - > - if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == > APIC_MODE_FIXED ) > - vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); > + if ( vpmu_interrupt_type & APIC_DM_NMI ) > + { > + per_cpu(sampled_vcpu, smp_processor_id()) = current; > + raise_softirq(PMU_SOFTIRQ); > + } > else > - v->nmi_pending = 1; > + vpmu_send_nmi(v); > + > return 1; > } > > @@ -301,7 +352,7 @@ void vpmu_save(struct vcpu *v) > if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) > vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); > > - apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); > + apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED); > } > > void vpmu_load(struct vcpu *v) > @@ -450,11 +501,48 @@ static void vpmu_unload_all(void) > } > } > > +/* Process the softirq set by PMU NMI handler */ > +static void pmu_softnmi(void) > +{ > + struct cpu_user_regs *regs; > + struct vcpu *v, *sampled = per_cpu(sampled_vcpu, smp_processor_id()); > + > + if ( (vpmu_mode & XENPMU_MODE_PRIV) || > + (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) > + v = dom0->vcpu[smp_processor_id() % dom0->max_vcpus]; > + else > + { > + if ( is_hvm_domain(sampled->domain) ) > + { > + vpmu_send_nmi(sampled); > + return; > + } > + v = sampled; > + } > + > + regs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; > + if ( !is_pv_domain(sampled->domain) ) > + { > + struct segment_register cs; > + > + hvm_get_segment_register(sampled, x86_seg_cs, &cs); > + regs->cs = cs.attr.fields.dpl; > + } > + > + send_guest_vcpu_virq(v, VIRQ_XENPMU); > +} > + > +int pmu_nmi_interrupt(struct cpu_user_regs *regs, int cpu) > +{ > + return vpmu_do_interrupt(regs); > +} > + > static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) > { > struct vcpu *v; > struct page_info *page; > uint64_t gfn = params->d.val; > + static bool_t __read_mostly pvpmu_initted = 0; > > if ( !is_pv_domain(d) ) > return -EINVAL; > @@ -480,6 +568,21 @@ static int pvpmu_init(struct domain *d, > xen_pmu_params_t *params) > return -EINVAL; > } > > + if ( !pvpmu_initted ) > + { > + if (reserve_lapic_nmi() == 0) > + set_nmi_callback(pmu_nmi_interrupt); > + else > + { > + printk("Failed to reserve PMU NMI\n"); > + put_page(page); > + return -EBUSY; > + } > + open_softirq(PMU_SOFTIRQ, pmu_softnmi); > + > + pvpmu_initted = 1; > + } > + > vpmu_initialise(v); > > return 0; > -- > 1.8.3.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |