|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v12 for-xen-4.5 19/20] x86/VPMU: NMI-based VPMU support
Add support for using NMIs as PMU interrupts to allow profiling hypervisor
when interrupts are disabled.
Most of processing is still performed by vpmu_do_interrupt(). However, since
certain operations are not NMI-safe we defer them to a softint that
vpmu_do_interrupt()
will schedule:
* For PV guests that would be send_guest_vcpu_virq()
* For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the later
can be called in privileged profiling mode when the interrupted guest is an HVM
one).
With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and vlapic
accesses for HVM moved to sofint, the only routines/macros that
vpmu_do_interrupt()
calls in NMI mode are:
* memcpy()
* querying domain type (is_XX_domain())
* guest_cpu_user_regs()
* XLAT_cpu_user_regs()
* raise_softirq()
* vcpu_vpmu()
* vpmu_ops->arch_vpmu_save()
* vpmu_ops->do_interrupt()
The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions
which would not be NMI-safe).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
docs/misc/xen-command-line.markdown | 8 +-
xen/arch/x86/hvm/svm/vpmu.c | 3 +-
xen/arch/x86/hvm/vmx/vpmu_core2.c | 3 +-
xen/arch/x86/hvm/vpmu.c | 188 +++++++++++++++++++++++++++++-------
xen/include/asm-x86/hvm/vpmu.h | 4 +-
xen/include/xen/softirq.h | 1 +
6 files changed, 166 insertions(+), 41 deletions(-)
diff --git a/docs/misc/xen-command-line.markdown
b/docs/misc/xen-command-line.markdown
index af93e17..7c608dd 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1227,11 +1227,11 @@ Use Virtual Processor ID support if available. This
prevents the need for TLB
flushes on VM entry and exit, increasing performance.
### vpmu
-> `= ( bts )`
+> `= ( [nmi,][bts] )`
> Default: `off`
-Switch on the virtualized performance monitoring unit for HVM guests.
+Switch on the virtualized performance monitoring unit.
If the current cpu isn't supported a message like
'VPMU: Initialization failed. ...'
@@ -1243,6 +1243,10 @@ wrong behaviour (see handle\_pmc\_quirk()).
If 'vpmu=bts' is specified the virtualisation of the Branch Trace Store (BTS)
feature is switched on on Intel processors supporting this feature.
+If 'vpmu=nmi' is specified the PMU interrupt will cause an NMI instead of a
+regular vector interrupt (which is the default). This can be useful for
sampling
+hypervisor code that is executed with interrupts disabled.
+
*Warning:*
As the BTS virtualisation is not 100% safe and because of the nehalem quirk
don't use the vpmu flag on production systems with Intel cpus!
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 055b21c..9db0559 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -169,7 +169,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
msr_bitmap_off(vpmu);
}
-static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
return 1;
}
@@ -224,6 +224,7 @@ static inline void context_save(struct vcpu *v)
rdmsrl(counters[i], counter_regs[i]);
}
+/* Must be NMI-safe */
static int amd_vpmu_save(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 0f605bd..2c84194 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -300,6 +300,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
}
+/* Must be NMI-safe */
static int core2_vpmu_save(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
@@ -711,7 +712,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
}
}
-static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
struct vcpu *v = current;
u64 msr_content;
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index fae778e..4b34109 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -34,6 +34,7 @@
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include <public/pmu.h>
#include <xen/tasklet.h>
#include <xsm/xsm.h>
@@ -54,36 +55,57 @@ uint64_t __read_mostly vpmu_features = 0;
static void parse_vpmu_param(char *s);
custom_param("vpmu", parse_vpmu_param);
+static void pmu_softnmi(void);
+
static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
+
+static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
static void __init parse_vpmu_param(char *s)
{
- switch ( parse_bool(s) )
- {
- case 0:
- break;
- default:
- if ( !strcmp(s, "bts") )
- vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
- else if ( *s )
+ char *ss;
+
+ vpmu_mode = XENPMU_MODE_SELF;
+ if (*s == '\0')
+ return;
+
+ do {
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ switch ( parse_bool(s) )
{
- printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
- break;
+ case 0:
+ vpmu_mode = XENPMU_MODE_OFF;
+ /* FALLTHROUGH */
+ case 1:
+ return;
+ default:
+ if ( !strcmp(s, "nmi") )
+ vpmu_interrupt_type = APIC_DM_NMI;
+ else if ( !strcmp(s, "bts") )
+ vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
+ else
+ {
+ printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+ vpmu_mode = XENPMU_MODE_OFF;
+ return;
+ }
}
- /* fall through */
- case 1:
- /* Default VPMU mode */
- vpmu_mode = XENPMU_MODE_SELF;
- break;
- }
+
+ s = ss + 1;
+ } while ( ss );
}
+
void vpmu_lvtpc_update(uint32_t val)
{
struct vcpu *curr = current;
struct vpmu_struct *vpmu = vcpu_vpmu(curr);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED);
/* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
if ( is_hvm_domain(curr->domain) ||
@@ -91,6 +113,24 @@ void vpmu_lvtpc_update(uint32_t val)
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
}
+static void vpmu_send_interrupt(struct vcpu *v)
+{
+ struct vlapic *vlapic;
+ u32 vlapic_lvtpc;
+
+ ASSERT( is_hvm_vcpu(v) );
+
+ vlapic = vcpu_vlapic(v);
+ if ( !is_vlapic_lvtpc_enabled(vlapic) )
+ return;
+
+ vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+ if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
+ vlapic_set_irq(vcpu_vlapic(v), vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+ else
+ v->nmi_pending = 1;
+}
+
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
uint64_t supported, bool_t is_write)
{
@@ -151,7 +191,8 @@ static struct vcpu *choose_hwdom_vcpu(void)
return v;
}
-int vpmu_do_interrupt(struct cpu_user_regs *regs)
+/* This routine may be called in NMI context */
+int vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
struct vcpu *sampled = current, *sampling;
struct vpmu_struct *vpmu;
@@ -232,8 +273,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
if ( sampled->arch.flags & TF_kernel_mode )
r->cs &= ~3;
}
- else
+ else if ( !(vpmu_interrupt_type & APIC_DM_NMI) )
{
+ /* Unsafe in NMI context, defer to softint later */
struct segment_register seg_cs;
hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
@@ -251,30 +293,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
- send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+ if ( vpmu_interrupt_type & APIC_DM_NMI )
+ {
+ this_cpu(sampled_vcpu) = sampled;
+ raise_softirq(PMU_SOFTIRQ);
+ }
+ else
+ send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
return 1;
}
if ( vpmu->arch_vpmu_ops )
{
- struct vlapic *vlapic = vcpu_vlapic(sampling);
- u32 vlapic_lvtpc;
- unsigned char int_vec;
-
if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
return 0;
- if ( !is_vlapic_lvtpc_enabled(vlapic) )
- return 1;
-
- vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
- int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
-
- if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
- vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
+ if ( vpmu_interrupt_type & APIC_DM_NMI )
+ {
+ this_cpu(sampled_vcpu) = sampled;
+ raise_softirq(PMU_SOFTIRQ);
+ }
else
- sampling->nmi_pending = 1;
+ vpmu_send_interrupt(sampling);
+
return 1;
}
@@ -307,6 +349,9 @@ static void vpmu_save_force(void *arg)
vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
per_cpu(last_vcpu, smp_processor_id()) = NULL;
+
+ /* Make sure there are no outstanding PMU NMIs */
+ pmu_softnmi();
}
void vpmu_save(struct vcpu *v)
@@ -324,7 +369,10 @@ void vpmu_save(struct vcpu *v)
if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
+
+ /* Make sure there are no outstanding PMU NMIs */
+ pmu_softnmi();
}
void vpmu_load(struct vcpu *v)
@@ -378,6 +426,9 @@ void vpmu_load(struct vcpu *v)
(vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
return;
+ /* Make sure there are no outstanding PMU NMIs from previous vcpu */
+ pmu_softnmi();
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
{
apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
@@ -400,7 +451,7 @@ void vpmu_initialise(struct vcpu *v)
vpmu_destroy(v);
vpmu_clear(vpmu);
vpmu->context = NULL;
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+ vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | APIC_LVT_MASKED;
switch ( vendor )
{
@@ -436,11 +487,57 @@ void vpmu_destroy(struct vcpu *v)
}
}
+/* Process the softirq set by PMU NMI handler */
+static void pmu_softnmi(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *v, *sampled = per_cpu(sampled_vcpu, cpu);
+
+ if ( sampled == NULL )
+ return;
+
+ per_cpu(sampled_vcpu, cpu) = NULL;
+
+ if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+ (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
+ {
+ v = choose_hwdom_vcpu();
+ if ( !v )
+ return;
+ }
+ else
+ {
+ if ( is_hvm_domain(sampled->domain) )
+ {
+ vpmu_send_interrupt(sampled);
+ return;
+ }
+ v = sampled;
+ }
+
+ if ( has_hvm_container_domain(sampled->domain) )
+ {
+ struct segment_register seg_cs;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
+ v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel;
+ }
+
+ send_guest_vcpu_virq(v, VIRQ_XENPMU);
+}
+
+int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu)
+{
+ return vpmu_do_interrupt(regs);
+}
+
static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
{
struct vcpu *v;
struct page_info *page;
uint64_t gfn = params->val;
+ static bool_t __read_mostly pvpmu_init_done;
+ static DEFINE_SPINLOCK(init_lock);
if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) ||
(d->vcpu[params->vcpu] == NULL) )
@@ -464,6 +561,27 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
*params)
return -EINVAL;
}
+ spin_lock(&init_lock);
+
+ if ( !pvpmu_init_done )
+ {
+ if ( reserve_lapic_nmi() != 0 )
+ {
+ spin_unlock(&init_lock);
+ printk(XENLOG_G_ERR "Failed to reserve PMU NMI\n");
+ put_page(page);
+ return -EBUSY;
+ }
+
+ set_nmi_callback(pmu_nmi_interrupt);
+
+ open_softirq(PMU_SOFTIRQ, pmu_softnmi);
+
+ pvpmu_init_done = 1;
+ }
+
+ spin_unlock(&init_lock);
+
vpmu_initialise(v);
return 0;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index a6e7934..7d86f64 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -42,7 +42,7 @@ struct arch_vpmu_ops {
int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
uint64_t supported);
int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
- int (*do_interrupt)(struct cpu_user_regs *regs);
+ int (*do_interrupt)(const struct cpu_user_regs *regs);
void (*do_cpuid)(unsigned int input,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
@@ -98,7 +98,7 @@ static inline bool_t vpmu_are_all_set(const struct
vpmu_struct *vpmu,
void vpmu_lvtpc_update(uint32_t val);
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
uint64_t supported, bool_t is_write);
-int vpmu_do_interrupt(struct cpu_user_regs *regs);
+int vpmu_do_interrupt(const struct cpu_user_regs *regs);
void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void vpmu_initialise(struct vcpu *v);
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index 0895a16..9bada14 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -8,6 +8,7 @@ enum {
NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
RCU_SOFTIRQ,
TASKLET_SOFTIRQ,
+ PMU_SOFTIRQ,
NR_COMMON_SOFTIRQS
};
--
1.8.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |