|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v16 22/23] x86/VPMU: NMI-based VPMU support
Add support for using NMIs as PMU interrupts to allow profiling hypervisor
when interrupts are disabled.
Most of processing is still performed by vpmu_do_interrupt(). However, since
certain operations are not NMI-safe we defer them to a softint that
vpmu_do_interrupt()
will schedule:
* For PV guests that would be send_guest_vcpu_virq()
* For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the later
can be called in privileged profiling mode when the interrupted guest is an HVM
one).
With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and vlapic
accesses for HVM moved to sofint, the only routines/macros that
vpmu_do_interrupt()
calls in NMI mode are:
* memcpy()
* querying domain type (is_XX_domain())
* guest_cpu_user_regs()
* XLAT_cpu_user_regs()
* raise_softirq()
* vcpu_vpmu()
* vpmu_ops->arch_vpmu_save()
* vpmu_ops->do_interrupt()
The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions
which would not be NMI-safe).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
---
docs/misc/xen-command-line.markdown | 8 +-
xen/arch/x86/hvm/svm/vpmu.c | 3 +-
xen/arch/x86/hvm/vmx/vpmu_core2.c | 3 +-
xen/arch/x86/hvm/vpmu.c | 226 ++++++++++++++++++++++++++++--------
xen/include/asm-x86/hvm/vpmu.h | 4 +-
xen/include/asm-x86/softirq.h | 3 +-
6 files changed, 192 insertions(+), 55 deletions(-)
diff --git a/docs/misc/xen-command-line.markdown
b/docs/misc/xen-command-line.markdown
index 311316a..3a82080 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1311,11 +1311,11 @@ Use Virtual Processor ID support if available. This
prevents the need for TLB
flushes on VM entry and exit, increasing performance.
### vpmu
-> `= ( bts )`
+> `= ( [nmi,][bts] )`
> Default: `off`
-Switch on the virtualized performance monitoring unit for HVM guests.
+Switch on the virtualized performance monitoring unit.
If the current cpu isn't supported a message like
'VPMU: Initialization failed. ...'
@@ -1327,6 +1327,10 @@ wrong behaviour (see handle\_pmc\_quirk()).
If 'vpmu=bts' is specified the virtualisation of the Branch Trace Store (BTS)
feature is switched on on Intel processors supporting this feature.
+If 'vpmu=nmi' is specified the PMU interrupt will cause an NMI instead of a
+regular vector interrupt (which is the default). This can be useful for
sampling
+hypervisor code that is executed with interrupts disabled.
+
*Warning:*
As the BTS virtualisation is not 100% safe and because of the nehalem quirk
don't use the vpmu flag on production systems with Intel cpus!
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 9b70291..97d545c 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -168,7 +168,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
msr_bitmap_off(vpmu);
}
-static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
return 1;
}
@@ -220,6 +220,7 @@ static inline void context_save(struct vpmu_struct *vpmu)
rdmsrl(counters[i], counter_regs[i]);
}
+/* Must be NMI-safe */
static int amd_vpmu_save(struct vpmu_struct *vpmu)
{
struct vcpu *v;
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 8e6386e..77f7795 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -305,6 +305,7 @@ static inline void __core2_vpmu_save(struct vpmu_struct
*vpmu)
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
}
+/* Must be NMI-safe */
static int core2_vpmu_save(struct vpmu_struct *vpmu)
{
struct vcpu *v = vpmu_vcpu(vpmu);
@@ -720,7 +721,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
}
}
-static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
struct vcpu *v = current;
u64 msr_content;
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index dd3f5e0..74b30a8 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -34,6 +34,7 @@
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include <public/pmu.h>
#include <xsm/xsm.h>
@@ -54,36 +55,54 @@ unsigned int __read_mostly vpmu_features = 0;
static void parse_vpmu_param(char *s);
custom_param("vpmu", parse_vpmu_param);
+static void pmu_softnmi(void);
+
static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
+
+static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
static void __init parse_vpmu_param(char *s)
{
- switch ( parse_bool(s) )
- {
- case 0:
- break;
- default:
- if ( !strcmp(s, "bts") )
- vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
- else if ( *s )
+ char *ss;
+
+ vpmu_mode = XENPMU_MODE_SELF;
+ if (*s == '\0')
+ return;
+
+ do {
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ switch ( parse_bool(s) )
{
- printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
- break;
+ default:
+ if ( !strcmp(s, "nmi") )
+ vpmu_interrupt_type = APIC_DM_NMI;
+ else if ( !strcmp(s, "bts") )
+ vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
+ else
+ {
+ printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+ case 0:
+ vpmu_mode = XENPMU_MODE_OFF;
+ case 1:
+ return;
+ }
}
- /* fall through */
- case 1:
- /* Default VPMU mode */
- vpmu_mode = XENPMU_MODE_SELF;
- break;
- }
+
+ s = ss + 1;
+ } while ( ss );
}
+
void vpmu_lvtpc_update(uint32_t val)
{
struct vcpu *curr = current;
struct vpmu_struct *vpmu = vcpu_vpmu(curr);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED);
/* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
@@ -91,6 +110,30 @@ void vpmu_lvtpc_update(uint32_t val)
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
}
+static void vpmu_send_interrupt(struct vcpu *v)
+{
+ struct vlapic *vlapic;
+ u32 vlapic_lvtpc;
+
+ ASSERT(is_hvm_vcpu(v));
+
+ vlapic = vcpu_vlapic(v);
+ if ( !is_vlapic_lvtpc_enabled(vlapic) )
+ return;
+
+ vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+
+ switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ {
+ case APIC_MODE_FIXED:
+ vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+ break;
+ case APIC_MODE_NMI:
+ v->nmi_pending = 1;
+ break;
+ }
+}
+
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
uint64_t supported, bool_t is_write)
{
@@ -140,7 +183,7 @@ static struct vcpu *choose_hwdom_vcpu(void)
return hardware_domain->vcpu[idx];
}
-void vpmu_do_interrupt(struct cpu_user_regs *regs)
+int vpmu_do_interrupt(const struct cpu_user_regs *regs)
{
struct vcpu *sampled = current, *sampling;
struct vpmu_struct *vpmu;
@@ -154,7 +197,7 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
{
sampling = choose_hwdom_vcpu();
if ( !sampling )
- return;
+ return 0;
}
else
sampling = sampled;
@@ -168,15 +211,15 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
uint32_t domid;
if ( !vpmu->xenpmu_data )
- return;
+ return 0;
if ( *flags & PMU_CACHED )
- return;
+ return 0;
if ( is_pvh_vcpu(sampling) &&
!(vpmu_mode & XENPMU_MODE_ALL) &&
!vpmu->arch_vpmu_ops->do_interrupt(regs) )
- return;
+ return 0;
/* PV guest will be reading PMU MSRs from xenpmu_data */
vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
@@ -243,15 +286,20 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
}
else
{
- struct segment_register seg;
-
- hvm_get_segment_register(sampled, x86_seg_cs, &seg);
- r->cs = seg.sel;
- hvm_get_segment_register(sampled, x86_seg_ss, &seg);
- r->ss = seg.sel;
- r->cpl = seg.attr.fields.dpl;
if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
*flags |= PMU_SAMPLE_REAL;
+
+ /* Unsafe in NMI context, defer to softint later. */
+ if ( vpmu_interrupt_type != APIC_DM_NMI )
+ {
+ struct segment_register seg;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+ r->cs = seg.sel;
+ hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+ r->ss = seg.sel;
+ r->cpl = seg.attr.fields.dpl;
+ }
}
}
@@ -263,35 +311,37 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
- send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
+ {
+ this_cpu(sampled_vcpu) = sampled;
+ raise_softirq(PMU_SOFTIRQ);
+ }
+ else
+ send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
- return;
+ return 1;
}
if ( vpmu->arch_vpmu_ops )
{
- struct vlapic *vlapic = vcpu_vlapic(sampling);
- u32 vlapic_lvtpc;
-
/* We don't support (yet) HVM dom0 */
ASSERT(sampling == sampled);
- if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
- !is_vlapic_lvtpc_enabled(vlapic) )
- return;
+ if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+ return 0;
- vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
-
- switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
{
- case APIC_MODE_FIXED:
- vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
- break;
- case APIC_MODE_NMI:
- sampling->nmi_pending = 1;
- break;
+ this_cpu(sampled_vcpu) = sampled;
+ raise_softirq(PMU_SOFTIRQ);
}
+ else
+ vpmu_send_interrupt(sampling);
+
+ return 1;
}
+
+ return 0;
}
void vpmu_do_cpuid(unsigned int input,
@@ -319,6 +369,9 @@ static void vpmu_save_force(void *arg)
vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
per_cpu(last_vcpu, smp_processor_id()) = NULL;
+
+ /* Make sure there are no outstanding PMU NMIs */
+ pmu_softnmi();
}
void vpmu_save(struct vpmu_struct *vpmu)
@@ -335,7 +388,10 @@ void vpmu_save(struct vpmu_struct *vpmu)
if ( vpmu->arch_vpmu_ops->arch_vpmu_save(vpmu) )
vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
+
+ /* Make sure there are no outstanding PMU NMIs */
+ pmu_softnmi();
}
void vpmu_load(struct vpmu_struct *vpmu)
@@ -386,6 +442,9 @@ void vpmu_load(struct vpmu_struct *vpmu)
(vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED)) )
return;
+ /* Make sure there are no outstanding PMU NMIs from previous vcpu */
+ pmu_softnmi();
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
{
apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
@@ -409,7 +468,7 @@ void vpmu_initialise(struct vcpu *v)
vpmu_destroy(v);
vpmu_clear(vpmu);
vpmu->context = NULL;
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+ vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | APIC_LVT_MASKED;
switch ( vendor )
{
@@ -445,6 +504,55 @@ void vpmu_destroy(struct vcpu *v)
}
}
+/* Process the softirq set by PMU NMI handler */
+static void pmu_softnmi(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *v, *sampled = per_cpu(sampled_vcpu, cpu);
+
+ if ( sampled == NULL )
+ return;
+
+ per_cpu(sampled_vcpu, cpu) = NULL;
+
+ if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+ (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
+ {
+ v = choose_hwdom_vcpu();
+ if ( !v )
+ return;
+ }
+ else
+ {
+ if ( is_hvm_vcpu(sampled) )
+ {
+ vpmu_send_interrupt(sampled);
+ return;
+ }
+ v = sampled;
+ }
+
+ if ( has_hvm_container_vcpu(sampled) )
+ {
+ struct segment_register seg;
+ struct xen_pmu_arch *pmu = &v->arch.vpmu.xenpmu_data->pmu;
+ struct xen_pmu_regs *r = &pmu->r.regs;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+ r->cs = seg.sel;
+ hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+ r->ss = seg.sel;
+ r->cpl = seg.attr.fields.dpl;
+ }
+
+ send_guest_vcpu_virq(v, VIRQ_XENPMU);
+}
+
+int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu)
+{
+ return vpmu_do_interrupt(regs);
+}
+
static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
{
struct vcpu *v;
@@ -740,6 +848,21 @@ static int __init vpmu_init(void)
return 0;
}
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
+ {
+ if ( reserve_lapic_nmi() != 0 )
+ {
+ printk(XENLOG_WARNING "VPMU: Can't reserve NMI, will use"
+ " APIC vector 0x%x\n", PMU_APIC_VECTOR);
+ vpmu_interrupt_type = PMU_APIC_VECTOR;
+ }
+ else
+ {
+ set_nmi_callback(pmu_nmi_interrupt);
+ open_softirq(PMU_SOFTIRQ, pmu_softnmi);
+ }
+ }
+
switch ( vendor )
{
case X86_VENDOR_AMD:
@@ -756,7 +879,14 @@ static int __init vpmu_init(void)
}
if ( vpmu_mode == XENPMU_MODE_OFF )
+ {
+ if ( vpmu_interrupt_type == APIC_DM_NMI )
+ {
+ unset_nmi_callback();
+ release_lapic_nmi();
+ }
printk(XENLOG_WARNING "VPMU: Disabling due to initialization error\n");
+ }
else
printk(XENLOG_INFO "VPMU: version %d.%d\n",
XENPMU_VER_MAJ, XENPMU_VER_MIN);
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 2c888cc..ed5dc8c 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -53,7 +53,7 @@ struct arch_vpmu_ops {
int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
uint64_t supported);
int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
- int (*do_interrupt)(struct cpu_user_regs *regs);
+ int (*do_interrupt)(const struct cpu_user_regs *regs);
void (*do_cpuid)(unsigned int input,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
@@ -102,7 +102,7 @@ static inline bool_t vpmu_are_all_set(const struct
vpmu_struct *vpmu,
void vpmu_lvtpc_update(uint32_t val);
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
uint64_t supported, bool_t is_write);
-void vpmu_do_interrupt(struct cpu_user_regs *regs);
+int vpmu_do_interrupt(const struct cpu_user_regs *regs);
void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void vpmu_initialise(struct vcpu *v);
diff --git a/xen/include/asm-x86/softirq.h b/xen/include/asm-x86/softirq.h
index ec787d6..fca110f 100644
--- a/xen/include/asm-x86/softirq.h
+++ b/xen/include/asm-x86/softirq.h
@@ -8,7 +8,8 @@
#define MACHINE_CHECK_SOFTIRQ (NR_COMMON_SOFTIRQS + 3)
#define PCI_SERR_SOFTIRQ (NR_COMMON_SOFTIRQS + 4)
#define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 5)
-#define NR_ARCH_SOFTIRQS 6
+#define PMU_SOFTIRQ (NR_COMMON_SOFTIRQS + 6)
+#define NR_ARCH_SOFTIRQS 7
bool_t arch_skip_send_event_check(unsigned int cpu);
--
1.8.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |