[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 11/13] x86/PMU: Handle PMU interrupts for PV guests



Add support for handling PMU interrupts for PV guests, make these interrupts
NMI instead of PMU_APIC_VECTOR vector. Depending on vpmu_mode forward the
interrupts to appropriate guest (mode is VPMU_ON) or to dom0 (VPMU_DOM0).

VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
 xen/arch/x86/apic.c                            |  13 ---
 xen/arch/x86/hvm/svm/vpmu.c                    |   8 +-
 xen/arch/x86/hvm/vmx/vpmu_core2.c              |   8 +-
 xen/arch/x86/hvm/vpmu.c                        | 111 +++++++++++++++++++++++--
 xen/include/asm-x86/hvm/vpmu.h                 |   1 +
 xen/include/asm-x86/irq.h                      |   1 -
 xen/include/asm-x86/mach-default/irq_vectors.h |   1 -
 xen/include/public/xenpmu.h                    |   1 +
 8 files changed, 115 insertions(+), 29 deletions(-)

diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index a52a0e8..9675e76 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -125,9 +125,6 @@ void __init apic_intr_init(void)
     /* IPI vectors for APIC spurious and error interrupts */
     set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
     set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
-
-    /* Performance Counters Interrupt */
-    set_direct_apic_vector(PMU_APIC_VECTOR, pmu_apic_interrupt);
 }
 
 /* Using APIC to generate smp_local_timer_interrupt? */
@@ -1368,16 +1365,6 @@ void error_interrupt(struct cpu_user_regs *regs)
 }
 
 /*
- * This interrupt handles performance counters interrupt
- */
-
-void pmu_apic_interrupt(struct cpu_user_regs *regs)
-{
-    ack_APIC_irq();
-    vpmu_do_interrupt(regs);
-}
-
-/*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 527a1de..3993a95 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -283,8 +283,8 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content)
         if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
             return 1;
         vpmu_set(vpmu, VPMU_RUNNING);
-        apic_write(APIC_LVTPC, PMU_APIC_VECTOR);
-        vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR;
+        apic_write(APIC_LVTPC, APIC_DM_NMI);
+        vpmu->hw_lapic_lvtpc = APIC_DM_NMI;
 
         if ( is_hvm_domain(v->domain) &&
              !((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
@@ -295,8 +295,8 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content)
     if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) &&
         (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
     {
-        apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
-        vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+        apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED);
+        vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED;
         vpmu_reset(vpmu, VPMU_RUNNING);
         if ( is_hvm_domain(v->domain) &&
              ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index ebbb516..27f0807 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -548,13 +548,13 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content)
         if ( vpmu_is_set(vpmu, VPMU_RUNNING) &&
              is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
         {
-            apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR);
-            vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR;
+            apic_write_around(APIC_LVTPC, APIC_DM_NMI);
+            vpmu->hw_lapic_lvtpc = APIC_DM_NMI;
         }
         else
         {
-            apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
-            vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+            apic_write_around(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED);
+            vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED;
         }
     }
 
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 4638193..1ea3a96 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -47,6 +47,7 @@ uint32_t __read_mostly vpmu_mode = XENPMU_MODE_OFF;
 static void parse_vpmu_param(char *s);
 custom_param("vpmu", parse_vpmu_param);
 
+static void vpmu_save_force(void *arg);
 static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
 
 static void __init parse_vpmu_param(char *s)
@@ -74,7 +75,7 @@ static void vpmu_lvtpc_update(uint32_t val)
 {
      struct vpmu_struct *vpmu = vcpu_vpmu(current);
 
-     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+     vpmu->hw_lapic_lvtpc = APIC_DM_NMI | (val & APIC_LVT_MASKED);
      apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
 }
 
@@ -82,6 +83,9 @@ int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(current);
 
+    if ( (vpmu_mode & XENPMU_MODE_PRIV) && (current->domain != dom0) )
+        return 0;
+
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
         return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content);
     return 0;
@@ -91,6 +95,9 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(current);
 
+    if ( (vpmu_mode & XENPMU_MODE_PRIV) && (current->domain != dom0) )
+        return 0;
+
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
         return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
     return 0;
@@ -99,17 +106,97 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
 int vpmu_do_interrupt(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct vpmu_struct *vpmu;
 
-    if ( vpmu->arch_vpmu_ops )
+
+    /* dom0 will handle this interrupt */
+    if ( (vpmu_mode & XENPMU_MODE_PRIV) ||
+        (v->domain->domain_id >= DOMID_FIRST_RESERVED) )
+    {
+            if ( smp_processor_id() >= dom0->max_vcpus )
+                return 0;
+            v = dom0->vcpu[smp_processor_id()];
+    }
+
+    vpmu = vcpu_vpmu(v);
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+        return 0;
+
+    if ( !is_hvm_domain(v->domain) || (vpmu_mode & XENPMU_MODE_PRIV) )
+    {
+        /* PV guest or dom0 is doing system profiling */
+        void *p;
+        struct cpu_user_regs *gregs;
+
+        p = &v->arch.vpmu.xenpmu_data->pmu.regs;
+
+        /* PV guest will be reading PMU MSRs from xenpmu_data */
+        vpmu_save_force(v);
+
+        /* Store appropriate registers in xenpmu_data
+         *
+         * Note: '!current->is_running' is possible when 'set_current(next)'
+         * for the (HVM) guest has been called but 'reset_stack_and_jump()'
+         * has not (i.e. the guest is not actually running yet).
+         */
+        if ( !is_hvm_domain(current->domain) ||
+             ((vpmu_mode & XENPMU_MODE_PRIV) && !current->is_running) )
+        {
+            /*
+             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+             * and therefore we treat it the same way as a non-priviledged
+             * PV 32-bit domain.
+             */
+            if ( is_pv_32bit_domain(current->domain) )
+            {
+                struct compat_cpu_user_regs cmp;
+
+                gregs = guest_cpu_user_regs();
+                XLAT_cpu_user_regs(&cmp, gregs);
+                memcpy(p, &cmp, sizeof(struct compat_cpu_user_regs));
+            }
+            else if ( (current->domain != dom0) && !is_idle_vcpu(current) &&
+                !(vpmu_mode & XENPMU_MODE_PRIV) )
+            {
+                /* PV guest */
+                gregs = guest_cpu_user_regs();
+                memcpy(p, gregs, sizeof(struct cpu_user_regs));
+            }
+            else
+                memcpy(p, regs, sizeof(struct cpu_user_regs));
+        }
+        else
+        {
+            /* HVM guest */
+            struct segment_register cs;
+
+            gregs = guest_cpu_user_regs();
+            hvm_get_segment_register(current, x86_seg_cs, &cs);
+            gregs->cs = cs.attr.fields.dpl;
+
+            memcpy(p, gregs, sizeof(struct cpu_user_regs));
+        }
+
+        v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id;
+        v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id;
+        v->arch.vpmu.xenpmu_data->pcpu_id = smp_processor_id();
+
+        raise_softirq(PMU_SOFTIRQ);
+        vpmu_set(vpmu, VPMU_WAIT_FOR_FLUSH);
+
+        return 1;
+    }
+    else  if ( vpmu->arch_vpmu_ops )
     {
-        struct vlapic *vlapic = vcpu_vlapic(v);
+        /* HVM guest */
+        struct vlapic *vlapic;
         u32 vlapic_lvtpc;
         unsigned char int_vec;
 
         if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
             return 0;
 
+        vlapic = vcpu_vlapic(v);
         if ( !is_vlapic_lvtpc_enabled(vlapic) )
             return 1;
 
@@ -169,7 +256,7 @@ void vpmu_save(struct vcpu *v)
         if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 
-    apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+    apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED);
 }
 
 void vpmu_load(struct vcpu *v)
@@ -223,7 +310,13 @@ void vpmu_load(struct vcpu *v)
         vpmu->arch_vpmu_ops->arch_vpmu_load(v);
     }
 
-    vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+    /*
+     * PMU interrupt may happen while loading the context above. That
+     * may cause vpmu_save_force() in the handler so we we don't
+     * want to mark the context as loaded.
+     */
+    if ( !vpmu_is_set(vpmu, VPMU_WAIT_FOR_FLUSH) )
+        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
 }
 
 void vpmu_initialise(struct vcpu *v)
@@ -444,6 +537,12 @@ long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg)
         vpmu_lvtpc_update((uint32_t)pmu_params.val);
         ret = 0;
         break;
+
+    case XENPMU_flush:
+        vpmu_reset(vcpu_vpmu(current), VPMU_WAIT_FOR_FLUSH);
+        vpmu_load(current);
+        ret = 0;
+        break;
     }
 
     return ret;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index e046afd..348fc9a 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -68,6 +68,7 @@ struct vpmu_struct {
 #define VPMU_CONTEXT_SAVE                   0x8   /* Force context save */
 #define VPMU_FROZEN                         0x10  /* Stop counters while VCPU 
is not running */
 #define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x20
+#define VPMU_WAIT_FOR_FLUSH                 0x40  /* PV guest waits for 
XENPMU_flush */
 
 #define vpmu_set(_vpmu, _x)         ((_vpmu)->flags |= (_x))
 #define vpmu_reset(_vpmu, _x)       ((_vpmu)->flags &= ~(_x))
diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h
index 7f5da06..e582a72 100644
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -88,7 +88,6 @@ void invalidate_interrupt(struct cpu_user_regs *regs);
 void call_function_interrupt(struct cpu_user_regs *regs);
 void apic_timer_interrupt(struct cpu_user_regs *regs);
 void error_interrupt(struct cpu_user_regs *regs);
-void pmu_apic_interrupt(struct cpu_user_regs *regs);
 void spurious_interrupt(struct cpu_user_regs *regs);
 void irq_move_cleanup_interrupt(struct cpu_user_regs *regs);
 
diff --git a/xen/include/asm-x86/mach-default/irq_vectors.h 
b/xen/include/asm-x86/mach-default/irq_vectors.h
index 992e00c..46dcfaf 100644
--- a/xen/include/asm-x86/mach-default/irq_vectors.h
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h
@@ -8,7 +8,6 @@
 #define EVENT_CHECK_VECTOR     0xfc
 #define CALL_FUNCTION_VECTOR   0xfb
 #define LOCAL_TIMER_VECTOR     0xfa
-#define PMU_APIC_VECTOR        0xf9
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
diff --git a/xen/include/public/xenpmu.h b/xen/include/public/xenpmu.h
index 0060670..f05fdfa 100644
--- a/xen/include/public/xenpmu.h
+++ b/xen/include/public/xenpmu.h
@@ -28,6 +28,7 @@
 #define XENPMU_init            4
 #define XENPMU_finish          5
 #define XENPMU_lvtpc_set       6
+#define XENPMU_flush           7 /* Write cached MSR values to HW     */
 /* ` } */
 
 /* Parameters structure for HYPERVISOR_xenpmu_op call */
-- 
1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.