[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 3/6] xen/PMU: Initialization code for Xen PMU



Map shared data structure that will hold CPU registers, VPMU context, V/PCPU IDs
of the CPU interrupted by PMU interrupt. Hypervisor fills this information in
its handler and passes it to the guest for further processing.

Set up PMU VIRQ.

Now that perf infrastructure will assume that PMU is availble on a PV guest we
need to be careful and make sure that accesses via RDPMC instruction don't
cause fatal traps by the hypervisor. Provide a nop RDPMC handler.

For the same reason avoid issuing a warning on a write to APIC's LVTPC.

Both of these will be made functional in later patches.


Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
 arch/x86/include/asm/xen/interface.h |  41 ++++++++++
 arch/x86/xen/Makefile                |   2 +-
 arch/x86/xen/enlighten.c             |  10 ++-
 arch/x86/xen/pmu.c                   | 145 +++++++++++++++++++++++++++++++++++
 arch/x86/xen/pmu.h                   |  11 +++
 arch/x86/xen/smp.c                   |  31 +++++++-
 include/xen/interface/xen.h          |   1 +
 include/xen/interface/xenpmu.h       |  17 ++++
 8 files changed, 255 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/xen/pmu.c
 create mode 100644 arch/x86/xen/pmu.h

diff --git a/arch/x86/include/asm/xen/interface.h 
b/arch/x86/include/asm/xen/interface.h
index 3400dba..fa64648 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -172,6 +172,47 @@ struct vcpu_guest_context {
 #endif
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+       uint32_t counters;       /* Offset to counter MSRs */
+       uint32_t ctrls;          /* Offset to control MSRs */
+};
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+       uint64_t counter;
+       uint64_t control;
+};
+
+struct xen_pmu_intel_ctxt {
+       uint64_t global_ctrl;
+       uint64_t global_ovf_ctrl;
+       uint64_t global_status;
+       uint64_t fixed_ctrl;
+       uint64_t ds_area;
+       uint64_t pebs_enable;
+       uint64_t debugctl;
+       uint32_t fixed_counters;  /* Offset to fixed counter MSRs */
+       uint32_t arch_counters;   /* Offset to architectural counter MSRs */
+};
+
+struct xen_arch_pmu {
+       union {
+               struct cpu_user_regs regs;
+               uint8_t pad1[256];
+       };
+       union {
+               uint32_t lapic_lvtpc;
+               uint64_t pad2;
+       };
+       union {
+               struct xen_pmu_amd_ctxt amd;
+               struct xen_pmu_intel_ctxt intel;
+#define XENPMU_CTXT_PAD_SZ  128
+               uint8_t pad3[XENPMU_CTXT_PAD_SZ];
+       };
+};
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 96ab2c0..b187df5 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,7 +13,7 @@ CFLAGS_mmu.o                  := $(nostackp)
 obj-y          := enlighten.o setup.o multicalls.o mmu.o irq.o \
                        time.o xen-asm.o xen-asm_$(BITS).o \
                        grant-table.o suspend.o platform-pci-unplug.o \
-                       p2m.o
+                       p2m.o pmu.o
 
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ffb101e..57764ce 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -962,8 +962,16 @@ static u32 xen_apic_read(u32 reg)
        return op.u.pcpu_info.apic_id << 24;
 }
 
+unsigned long long xen_read_pmc(int counter)
+{
+       return 0;
+}
+
 static void xen_apic_write(u32 reg, u32 val)
 {
+       if (reg == APIC_LVTPC)
+               return;
+
        /* Warn to see if there's any stray references */
        WARN_ON(1);
 }
@@ -1248,7 +1256,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
        .write_msr = xen_write_msr_safe,
 
        .read_tsc = native_read_tsc,
-       .read_pmc = native_read_pmc,
+       .read_pmc = xen_read_pmc,
 
        .read_tscp = native_read_tscp,
 
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
new file mode 100644
index 0000000..799e229
--- /dev/null
+++ b/arch/x86/xen/pmu.c
@@ -0,0 +1,145 @@
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <asm/xen/hypercall.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+
+/* x86_pmu.handle_irq definition */
+#include "../kernel/cpu/perf_event.h"
+
+
+/* Shared page between hypervisor and domain */
+DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data()    per_cpu(xenpmu_shared, smp_processor_id())
+
+/* perf callbacks*/
+int xen_is_in_guest(void)
+{
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+               return 0;
+       }
+
+       if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
+               return 0;
+
+       return 1;
+}
+
+static int xen_is_user_mode(void)
+{
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+               return 0;
+       }
+
+       return ((xenpmu_data->pmu.regs.cs & 3) == 3);
+}
+
+static unsigned long xen_get_guest_ip(void)
+{
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+               return 0;
+       }
+
+       return xenpmu_data->pmu.regs.eip;
+}
+
+static struct perf_guest_info_callbacks xen_guest_cbs = {
+       .is_in_guest            = xen_is_in_guest,
+       .is_user_mode           = xen_is_user_mode,
+       .get_guest_ip           = xen_get_guest_ip,
+};
+
+/* Convert registers from Xen's format to Linux' */
+static void xen_convert_regs(const struct cpu_user_regs *xen_regs,
+                            struct pt_regs *regs)
+{
+       regs->ip = xen_regs->eip;
+       regs->cs = xen_regs->cs;
+}
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+{
+       int ret = IRQ_NONE;
+       struct pt_regs regs;
+       const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+       if (!xenpmu_data) {
+               WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+               return ret;
+       }
+
+       xen_convert_regs(&xenpmu_data->pmu.regs, &regs);
+       if (x86_pmu.handle_irq(&regs))
+               ret = IRQ_HANDLED;
+
+       return ret;
+}
+
+bool is_xen_pmu(int cpu)
+{
+       return (per_cpu(xenpmu_shared, cpu) != NULL);
+}
+
+int xen_pmu_init(int cpu)
+{
+       int ret = 0;
+       struct xen_pmu_params xp;
+       unsigned long pfn;
+       struct xen_pmu_data *xenpmu_data;
+
+       BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+       xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+       if (!xenpmu_data) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+       pfn = virt_to_pfn(xenpmu_data);
+
+       xp.val = pfn_to_mfn(pfn);
+       xp.vcpu = cpu;
+       xp.version.maj = XENPMU_VER_MAJ;
+       xp.version.min = XENPMU_VER_MIN;
+       ret = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
+       if (ret)
+               goto fail;
+
+       per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+
+       if (cpu == 0)
+               perf_register_guest_info_callbacks(&xen_guest_cbs);
+
+       return ret;
+
+fail:
+       free_pages((unsigned long)xenpmu_data, 0);
+       return ret;
+}
+
+void xen_pmu_finish(int cpu)
+{
+       struct xen_pmu_params xp;
+
+       xp.vcpu = cpu;
+       xp.version.maj = XENPMU_VER_MAJ;
+       xp.version.min = XENPMU_VER_MIN;
+
+       (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
+
+       free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
+       per_cpu(xenpmu_shared, cpu) = NULL;
+}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
new file mode 100644
index 0000000..d52e8db
--- /dev/null
+++ b/arch/x86/xen/pmu.h
@@ -0,0 +1,11 @@
+#ifndef __XEN_PMU_H
+#define __XEN_PMU_H
+
+#include <xen/interface/xenpmu.h>
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+int xen_pmu_init(int cpu);
+void xen_pmu_finish(int cpu);
+bool is_xen_pmu(int cpu);
+
+#endif /* __XEN_PMU_H */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7005974..7ea6296 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -26,6 +26,7 @@
 
 #include <xen/interface/xen.h>
 #include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
 
 #include <asm/xen/interface.h>
 #include <asm/xen/hypercall.h>
@@ -37,6 +38,7 @@
 #include <xen/hvc-console.h>
 #include "xen-ops.h"
 #include "mmu.h"
+#include "pmu.h"
 
 cpumask_var_t xen_cpu_initialized_map;
 
@@ -49,6 +51,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, 
xen_callfunc_irq) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq 
= -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
 static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -147,11 +150,18 @@ static void xen_smp_intr_free(unsigned int cpu)
                kfree(per_cpu(xen_irq_work, cpu).name);
                per_cpu(xen_irq_work, cpu).name = NULL;
        }
+
+       if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+               unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+               per_cpu(xen_pmu_irq, cpu).irq = -1;
+               kfree(per_cpu(xen_pmu_irq, cpu).name);
+               per_cpu(xen_pmu_irq, cpu).name = NULL;
+       }
 };
 static int xen_smp_intr_init(unsigned int cpu)
 {
        int rc;
-       char *resched_name, *callfunc_name, *debug_name;
+       char *resched_name, *callfunc_name, *debug_name, *pmu_name;
 
        resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
        rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -217,6 +227,18 @@ static int xen_smp_intr_init(unsigned int cpu)
        per_cpu(xen_irq_work, cpu).irq = rc;
        per_cpu(xen_irq_work, cpu).name = callfunc_name;
 
+       if (is_xen_pmu(cpu)) {
+               pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+               rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+                                            xen_pmu_irq_handler,
+                                            IRQF_PERCPU|IRQF_NOBALANCING,
+                                            pmu_name, NULL);
+               if (rc < 0)
+                       goto fail;
+               per_cpu(xen_pmu_irq, cpu).irq = rc;
+               per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+       }
+
        return 0;
 
  fail:
@@ -334,6 +356,9 @@ static void __init xen_smp_prepare_cpus(unsigned int 
max_cpus)
        }
        set_cpu_sibling_map(0);
 
+       if (xen_pmu_init(0))
+               pr_err("Could not initialize VPMU for VCPU 0\n");
+
        if (xen_smp_intr_init(0))
                BUG();
 
@@ -463,6 +488,9 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct 
*idle)
                /* Just in case we booted with a single CPU. */
                alternatives_enable_smp();
 
+       if (xen_pmu_init(cpu))
+               pr_err("Could not initialize VPMU for VCPU %u\n", cpu);
+
        rc = xen_smp_intr_init(cpu);
        if (rc)
                return rc;
@@ -504,6 +532,7 @@ static void xen_cpu_die(unsigned int cpu)
        xen_smp_intr_free(cpu);
        xen_uninit_lock_cpu(cpu);
        xen_teardown_timer(cpu);
+       xen_pmu_finish(cpu);
 }
 
 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 9ba0370..25df701 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -81,6 +81,7 @@
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
 #define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
+#define VIRQ_XENPMU    13  /* PMC interrupt                               */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index be5ea9d..78073dc 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -11,6 +11,8 @@
 #define XENPMU_mode_set        1
 #define XENPMU_feature_get     2
 #define XENPMU_feature_set     3
+#define XENPMU_init            4
+#define XENPMU_finish          5
 
 /* Parameter structure for HYPERVISOR_xenpmu_op call */
 struct xen_pmu_params {
@@ -44,5 +46,20 @@ struct xen_pmu_params {
  */
 #define XENPMU_FEATURE_INTEL_BTS  1
 
+/*
+ * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
+ * the hypervisor
+ */
+#define PMU_CACHED 1
+
+/* Shared between hypervisor and PV domain */
+struct xen_pmu_data {
+       uint32_t domain_id;
+       uint32_t vcpu_id;
+       uint32_t pcpu_id;
+       uint32_t pmu_flags;
+
+       struct xen_arch_pmu pmu;
+};
 
 #endif /* __XEN_PUBLIC_XENPMU_H__ */
-- 
1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.