[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests



Add support for handling PMU interrupts for PV(H) guests.

VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.

Since the interrupt handler may now force VPMU context save (i.e. set
VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
until now expected this flag to be set only when the counters were stopped.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
---
Changes in v24:
* For both AMD and Intel copy guest's MSRs first into context and then verify
  it (to keep things as read-once by hypervisor)
* To make sure that guest did not alter offsets to registers don't copy these
  values. Store them into shared area during VPMU initialization. Clarify in 
  public header file that they are RO by the guest
* Make vpmu_load return arch_vpmu_load()'s error code, not 1.

 xen/arch/x86/hvm/svm/vpmu.c       |  90 ++++++++++---
 xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++++++++++++++-
 xen/arch/x86/hvm/vpmu.c           | 268 +++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/hvm/vpmu.h    |  10 +-
 xen/include/public/arch-x86/pmu.h |  41 +++++-
 xen/include/public/pmu.h          |   2 +
 xen/include/xsm/dummy.h           |   4 +-
 xen/xsm/flask/hooks.c             |   2 +
 8 files changed, 464 insertions(+), 61 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 934f1b7..b93d31d 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -46,6 +46,9 @@ static const u32 __read_mostly *counters;
 static const u32 __read_mostly *ctrls;
 static bool_t __read_mostly k7_counters_mirrored;
 
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+
 #define F10H_NUM_COUNTERS   4
 #define F15H_NUM_COUNTERS   6
 #define MAX_NUM_COUNTERS    F15H_NUM_COUNTERS
@@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt 
*ctxt)
     unsigned i;
     uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 
-    memset(&ctxt->regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
+    memset(&ctxt->regs[0], 0, regs_sz);
     for ( i = 0; i < num_counters; i++ )
         ctrl_regs[i] = ctrl_rsvd[i];
 }
@@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
     }
 }
 
-static void amd_vpmu_load(struct vcpu *v)
+static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
-    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+    struct xen_pmu_amd_ctxt *ctxt;
+    uint64_t *ctrl_regs;
+    unsigned int i;
 
     vpmu_reset(vpmu, VPMU_FROZEN);
 
-    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+    if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
     {
-        unsigned int i;
+        ctxt = vpmu->context;
+        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 
         for ( i = 0; i < num_counters; i++ )
             wrmsrl(ctrls[i], ctrl_regs[i]);
 
-        return;
+        return 0;
+    }
+
+    if ( from_guest )
+    {
+        unsigned int num_enabled = 0;
+        struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+
+        ASSERT(!is_hvm_vcpu(v));
+
+        ctxt = vpmu->context;
+        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+        memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
+
+        for ( i = 0; i < num_counters; i++ )
+        {
+            if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+            {
+                /*
+                 * Not necessary to re-init context since we should never load
+                 * it until guest provides valid values. But just to be safe.
+                 */
+                amd_vpmu_init_regs(ctxt);
+                return -EINVAL;
+            }
+
+            if ( is_pmu_enabled(ctrl_regs[i]) )
+                num_enabled++;
+        }
+
+        if ( num_enabled )
+            vpmu_set(vpmu, VPMU_RUNNING);
+        else
+            vpmu_reset(vpmu, VPMU_RUNNING);
     }
 
     vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
 
     context_load(v);
+
+    return 0;
 }
 
 static inline void context_save(struct vcpu *v)
@@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
         rdmsrl(counters[i], counter_regs[i]);
 }
 
-static int amd_vpmu_save(struct vcpu *v)
+static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     unsigned int i;
 
-    /*
-     * Stop the counters. If we came here via vpmu_save_force (i.e.
-     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
-     */
+    for ( i = 0; i < num_counters; i++ )
+        wrmsrl(ctrls[i], 0);
+
     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
     {
         vpmu_set(vpmu, VPMU_FROZEN);
-
-        for ( i = 0; i < num_counters; i++ )
-            wrmsrl(ctrls[i], 0);
-
         return 0;
     }
 
@@ -274,6 +310,16 @@ static int amd_vpmu_save(struct vcpu *v)
          has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
         amd_vpmu_unset_msr_bitmap(v);
 
+    if ( to_guest )
+    {
+        struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
+
+        ASSERT(!is_hvm_vcpu(v));
+        ctxt = vpmu->context;
+        guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+        memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
+    }
+
     return 1;
 }
 
@@ -461,8 +507,7 @@ int svm_vpmu_initialise(struct vcpu *v)
     if ( !counters )
         return -EINVAL;
 
-    ctxt = xmalloc_bytes(sizeof(*ctxt) +
-                         2 * sizeof(uint64_t) * num_counters);
+    ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
     if ( !ctxt )
     {
         printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
@@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v)
     vpmu->context = ctxt;
     vpmu->priv_context = NULL;
 
+    if ( !is_hvm_vcpu(v) )
+    {
+        /* Copy register offsets to shared area */
+        ASSERT(vpmu->xenpmu_data);
+        memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt, sizeof(*ctxt));
+    }
+
     vpmu->arch_vpmu_ops = &amd_vpmu_ops;
 
     vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
@@ -527,6 +579,8 @@ int __init amd_vpmu_init(void)
         ctrl_rsvd[i] &= CTRL_RSVD_MASK;
     }
 
+    regs_sz = 2 * sizeof(uint64_t) * num_counters;
+
     return 0;
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 166277a..1206e90 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -90,6 +90,13 @@ static unsigned int __read_mostly arch_pmc_cnt, 
fixed_pmc_cnt;
 static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
 static uint64_t __read_mostly global_ovf_ctrl_mask;
 
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+/* Offset into context of the beginning of PMU register block */
+static const unsigned int regs_off =
+        sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
+        sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
+
 /*
  * QUIRK to workaround an issue on various family 6 cpus.
  * The issue leads to endless PMC interrupt loops on the processor.
@@ -312,7 +319,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
 }
 
-static int core2_vpmu_save(struct vcpu *v)
+static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
@@ -329,6 +336,13 @@ static int core2_vpmu_save(struct vcpu *v)
          has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
         core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
 
+    if ( to_guest )
+    {
+        ASSERT(!is_hvm_vcpu(v));
+        memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
+               vpmu->context + regs_off, regs_sz);
+    }
+
     return 1;
 }
 
@@ -365,16 +379,93 @@ static inline void __core2_vpmu_load(struct vcpu *v)
     }
 }
 
-static void core2_vpmu_load(struct vcpu *v)
+static int core2_vpmu_verify(struct vcpu *v)
+{
+    unsigned int i;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, 
fixed_counters);
+    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+    uint64_t fixed_ctrl;
+    uint64_t *priv_context = vpmu->priv_context;
+    uint64_t enabled_cntrs = 0;
+
+    if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
+        return -EINVAL;
+
+    fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
+    if ( fixed_ctrl & fixed_ctrl_mask )
+        return -EINVAL;
+
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+    {
+        if ( fixed_counters[i] & fixed_counters_mask )
+            return -EINVAL;
+        if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
+            enabled_cntrs |= (1ULL << i);
+    }
+    enabled_cntrs <<= 32;
+
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
+        uint64_t control = xen_pmu_cntr_pair[i].control;
+
+        if ( control & ARCH_CTRL_MASK )
+            return -EINVAL;
+        if ( control & ARCH_CNTR_ENABLED )
+            enabled_cntrs |= (1ULL << i);
+    }
+
+    if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
+         !is_canonical_address(core2_vpmu_cxt->ds_area) )
+        return -EINVAL;
+
+    if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
+         (core2_vpmu_cxt->ds_area != 0) )
+        vpmu_set(vpmu, VPMU_RUNNING);
+    else
+        vpmu_reset(vpmu, VPMU_RUNNING);
+
+    *priv_context = enabled_cntrs;
+
+    return 0;
+}
+
+static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
     if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        return;
+        return 0;
+
+    if ( from_guest )
+    {
+        int ret;
+
+        ASSERT(!is_hvm_vcpu(v));
+
+        memcpy(vpmu->context + regs_off,
+               (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
+               regs_sz);
+
+        ret = core2_vpmu_verify(v);
+        if ( ret )
+        {
+            /*
+             * Not necessary since we should never load the context until
+             * guest provides valid values. But just to be safe.
+             */
+            memset(vpmu->context + regs_off, 0, regs_sz);
+            return ret;
+        }
+    }
 
     vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
 
     __core2_vpmu_load(v);
+
+    return 0;
 }
 
 static int core2_vpmu_alloc_resource(struct vcpu *v)
@@ -412,6 +503,13 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
     vpmu->context = core2_vpmu_cxt;
     vpmu->priv_context = p;
 
+    if ( !is_hvm_vcpu(v) )
+    {
+        /* Copy fixed/arch register offsets to shared area */
+        ASSERT(vpmu->xenpmu_data);
+        memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
+    }
+
     vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
 
     return 1;
@@ -923,6 +1021,10 @@ int __init core2_vpmu_init(void)
                              (((1ULL << fixed_pmc_cnt) - 1) << 32) |
                              ((1ULL << arch_pmc_cnt) - 1));
 
+    regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
+              sizeof(uint64_t) * fixed_pmc_cnt +
+              sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
+
     check_pmc_quirk();
 
     if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 07fa368..37e541b 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s)
 void vpmu_lvtpc_update(uint32_t val)
 {
     struct vpmu_struct *vpmu;
+    struct vcpu *curr = current;
 
-    if ( vpmu_mode == XENPMU_MODE_OFF )
+    if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
         return;
 
-    vpmu = vcpu_vpmu(current);
+    vpmu = vcpu_vpmu(curr);
 
     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
-    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+
+    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
+    if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
+         !vpmu_is_set(vpmu, VPMU_CACHED) )
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
 }
 
 int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
 {
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+    struct vcpu *curr = current;
+    struct vpmu_struct *vpmu;
 
     if ( vpmu_mode == XENPMU_MODE_OFF )
         return 0;
 
+    vpmu = vcpu_vpmu(curr);
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
-        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
+    {
+        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
+
+        /*
+         * We may have received a PMU interrupt during WRMSR handling
+         * and since do_wrmsr may load VPMU context we should save
+         * (and unload) it again.
+         */
+        if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+             vpmu_is_set(vpmu, VPMU_CACHED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
+            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        }
+        return ret;
+    }
+
     return 0;
 }
 
 int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
 {
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+    struct vcpu *curr = current;
+    struct vpmu_struct *vpmu;
 
     if ( vpmu_mode == XENPMU_MODE_OFF )
     {
@@ -117,39 +142,184 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
         return 0;
     }
 
+    vpmu = vcpu_vpmu(curr);
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
-        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+    {
+        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+
+        if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+             vpmu_is_set(vpmu, VPMU_CACHED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
+            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        }
+        return ret;
+    }
     else
         *msr_content = 0;
 
     return 0;
 }
 
+static inline struct vcpu *choose_hwdom_vcpu(void)
+{
+    unsigned idx;
+
+    if ( hardware_domain->max_vcpus == 0 )
+        return NULL;
+
+    idx = smp_processor_id() % hardware_domain->max_vcpus;
+
+    return hardware_domain->vcpu[idx];
+}
+
 void vpmu_do_interrupt(struct cpu_user_regs *regs)
 {
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct vcpu *sampled = current, *sampling;
+    struct vpmu_struct *vpmu;
+    struct vlapic *vlapic;
+    u32 vlapic_lvtpc;
 
-    if ( vpmu->arch_vpmu_ops )
+    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
+    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
+    {
+        sampling = choose_hwdom_vcpu();
+        if ( !sampling )
+            return;
+    }
+    else
+        sampling = sampled;
+
+    vpmu = vcpu_vpmu(sampling);
+    if ( !vpmu->arch_vpmu_ops )
+        return;
+
+    /* PV(H) guest */
+    if ( !is_hvm_vcpu(sampling) )
     {
-        struct vlapic *vlapic = vcpu_vlapic(v);
-        u32 vlapic_lvtpc;
+        const struct cpu_user_regs *cur_regs;
+        uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
+        domid_t domid = DOMID_SELF;
+
+        if ( !vpmu->xenpmu_data )
+            return;
+
+        if ( is_pvh_vcpu(sampling) &&
+             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+            return;
 
-        if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
-             !is_vlapic_lvtpc_enabled(vlapic) )
+        if ( vpmu_is_set(vpmu, VPMU_CACHED) )
             return;
 
-        vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+        /* PV guest will be reading PMU MSRs from xenpmu_data */
+        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
+        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
 
-        switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+        if ( has_hvm_container_vcpu(sampled) )
+            *flags = 0;
+        else
+            *flags = PMU_SAMPLE_PV;
+
+        /* Store appropriate registers in xenpmu_data */
+        /* FIXME: 32-bit PVH should go here as well */
+        if ( is_pv_32bit_vcpu(sampling) )
         {
-        case APIC_MODE_FIXED:
-            vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
-            break;
-        case APIC_MODE_NMI:
-            v->nmi_pending = 1;
-            break;
+            /*
+             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+             * and therefore we treat it the same way as a non-privileged
+             * PV 32-bit domain.
+             */
+            struct compat_pmu_regs *cmp;
+
+            cur_regs = guest_cpu_user_regs();
+
+            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
+            cmp->ip = cur_regs->rip;
+            cmp->sp = cur_regs->rsp;
+            cmp->flags = cur_regs->eflags;
+            cmp->ss = cur_regs->ss;
+            cmp->cs = cur_regs->cs;
+            if ( (cmp->cs & 3) > 1 )
+                *flags |= PMU_SAMPLE_USER;
+        }
+        else
+        {
+            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
+
+            if ( (vpmu_mode & XENPMU_MODE_SELF) )
+                cur_regs = guest_cpu_user_regs();
+            else if ( !guest_mode(regs) && 
is_hardware_domain(sampling->domain) )
+            {
+                cur_regs = regs;
+                domid = DOMID_XEN;
+            }
+            else
+                cur_regs = guest_cpu_user_regs();
+
+            r->ip = cur_regs->rip;
+            r->sp = cur_regs->rsp;
+            r->flags = cur_regs->eflags;
+
+            if ( !has_hvm_container_vcpu(sampled) )
+            {
+                r->ss = cur_regs->ss;
+                r->cs = cur_regs->cs;
+                if ( !(sampled->arch.flags & TF_kernel_mode) )
+                    *flags |= PMU_SAMPLE_USER;
+            }
+            else
+            {
+                struct segment_register seg;
+
+                hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+                r->cs = seg.sel;
+                hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+                r->ss = seg.sel;
+                r->cpl = seg.attr.fields.dpl;
+                if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
+                    *flags |= PMU_SAMPLE_REAL;
+            }
         }
+
+        vpmu->xenpmu_data->domain_id = domid;
+        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
+        if ( is_hardware_domain(sampling->domain) )
+            vpmu->xenpmu_data->pcpu_id = smp_processor_id();
+        else
+            vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
+
+        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+        *flags |= PMU_CACHED;
+        vpmu_set(vpmu, VPMU_CACHED);
+
+        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+
+        return;
+    }
+
+    /* HVM guests */
+    vlapic = vcpu_vlapic(sampling);
+
+    /* We don't support (yet) HVM dom0 */
+    ASSERT(sampling == sampled);
+
+    if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
+         !is_vlapic_lvtpc_enabled(vlapic) )
+        return;
+
+    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+
+    switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+    {
+    case APIC_MODE_FIXED:
+        vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+        break;
+    case APIC_MODE_NMI:
+        sampling->nmi_pending = 1;
+        break;
     }
 }
 
@@ -174,7 +344,7 @@ static void vpmu_save_force(void *arg)
     vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
 
     if ( vpmu->arch_vpmu_ops )
-        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
 
     vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
 
@@ -193,20 +363,20 @@ void vpmu_save(struct vcpu *v)
     per_cpu(last_vcpu, pcpu) = v;
 
     if ( vpmu->arch_vpmu_ops )
-        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
+        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 
     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
 }
 
-void vpmu_load(struct vcpu *v)
+int vpmu_load(struct vcpu *v, bool_t from_guest)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     int pcpu = smp_processor_id();
     struct vcpu *prev = NULL;
 
     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-        return;
+        return 0;
 
     /* First time this VCPU is running here */
     if ( vpmu->last_pcpu != pcpu )
@@ -245,15 +415,26 @@ void vpmu_load(struct vcpu *v)
     local_irq_enable();
 
     /* Only when PMU is counting, we load PMU context immediately. */
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
-        return;
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+         (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
+        return 0;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
     {
+        int ret;
+
         apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
         /* Arch code needs to set VPMU_CONTEXT_LOADED */
-        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
+        ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
+        if ( ret )
+        {
+            apic_write_around(APIC_LVTPC,
+                              vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
+            return ret;
+        }
     }
+
+    return 0;
 }
 
 void vpmu_initialise(struct vcpu *v)
@@ -265,6 +446,8 @@ void vpmu_initialise(struct vcpu *v)
 
     BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
     BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
+    BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
+    BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
 
     ASSERT(!vpmu->flags && !vpmu->context);
 
@@ -449,7 +632,10 @@ void vpmu_dump(struct vcpu *v)
 long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
arg)
 {
     int ret;
+    struct vcpu *curr;
     struct xen_pmu_params pmu_params = {.val = 0};
+    struct xen_pmu_data *xenpmu_data;
+    struct vpmu_struct *vpmu;
 
     if ( !opt_vpmu_enabled )
         return -EOPNOTSUPP;
@@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op, 
XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
         pvpmu_finish(current->domain, &pmu_params);
         break;
 
+    case XENPMU_lvtpc_set:
+        xenpmu_data = current->arch.vpmu.xenpmu_data;
+        if ( xenpmu_data == NULL )
+            return -EINVAL;
+        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+        break;
+
+    case XENPMU_flush:
+        curr = current;
+        vpmu = vcpu_vpmu(curr);
+        xenpmu_data = curr->arch.vpmu.xenpmu_data;
+        if ( xenpmu_data == NULL )
+            return -EINVAL;
+        xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
+        vpmu_reset(vpmu, VPMU_CACHED);
+        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+        if ( vpmu_load(curr, 1) )
+        {
+            xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
+            vpmu_set(vpmu, VPMU_CACHED);
+            return -EIO;
+        }
+        break ;
+
     default:
         ret = -EINVAL;
     }
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 642a4b7..f486d2f 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -47,8 +47,8 @@ struct arch_vpmu_ops {
                      unsigned int *eax, unsigned int *ebx,
                      unsigned int *ecx, unsigned int *edx);
     void (*arch_vpmu_destroy)(struct vcpu *v);
-    int (*arch_vpmu_save)(struct vcpu *v);
-    void (*arch_vpmu_load)(struct vcpu *v);
+    int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
+    int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
     void (*arch_vpmu_dump)(const struct vcpu *);
 };
 
@@ -75,6 +75,8 @@ struct vpmu_struct {
 #define VPMU_CONTEXT_SAVE                   0x8   /* Force context save */
 #define VPMU_FROZEN                         0x10  /* Stop counters while VCPU 
is not running */
 #define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x20
+/* PV(H) guests: VPMU registers are accessed by guest from shared page */
+#define VPMU_CACHED                         0x40
 
 static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
 {
@@ -107,7 +109,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax, 
unsigned int *ebx,
 void vpmu_initialise(struct vcpu *v);
 void vpmu_destroy(struct vcpu *v);
 void vpmu_save(struct vcpu *v);
-void vpmu_load(struct vcpu *v);
+int vpmu_load(struct vcpu *v, bool_t from_guest);
 void vpmu_dump(struct vcpu *v);
 
 extern int acquire_pmu_ownership(int pmu_ownership);
@@ -126,7 +128,7 @@ static inline void vpmu_switch_from(struct vcpu *prev)
 static inline void vpmu_switch_to(struct vcpu *next)
 {
     if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
-        vpmu_load(next);
+        vpmu_load(next, 0);
 }
 
 #endif /* __ASM_X86_HVM_VPMU_H_*/
diff --git a/xen/include/public/arch-x86/pmu.h 
b/xen/include/public/arch-x86/pmu.h
index 4351115..1a53888 100644
--- a/xen/include/public/arch-x86/pmu.h
+++ b/xen/include/public/arch-x86/pmu.h
@@ -5,7 +5,10 @@
 
 /* AMD PMU registers and structures */
 struct xen_pmu_amd_ctxt {
-    /* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd) */
+    /*
+     * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
+     * For PV(H) guests these fields are RO.
+     */
     uint32_t counters;
     uint32_t ctrls;
 
@@ -30,7 +33,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t);
 struct xen_pmu_intel_ctxt {
    /*
     * Offsets to fixed and architectural counter MSRs (relative to
-    * xen_pmu_arch.c.intel)
+    * xen_pmu_arch.c.intel).
+    * For PV(H) guests these fields are RO.
     */
     uint32_t fixed_counters;
     uint32_t arch_counters;
@@ -69,6 +73,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
 
 /* PMU flags */
 #define PMU_CACHED         (1<<0) /* PMU MSRs are cached in the context */
+#define PMU_SAMPLE_USER    (1<<1) /* Sample is from user or kernel mode */
+#define PMU_SAMPLE_REAL    (1<<2) /* Sample is from realmode */
+#define PMU_SAMPLE_PV      (1<<3) /* Sample from a PV guest */
 
 /*
  * Architecture-specific information describing state of the processor at
@@ -93,12 +100,34 @@ struct xen_pmu_arch {
     /* WO for hypervisor, RO for guest */
     uint64_t pmu_flags;
 
-    /* Placeholder for APIC LVTPC register */
-    uint64_t lvtpc_pad;
+    /*
+     * APIC LVTPC register.
+     * RW for both hypervisor and guest.
+     * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
+     * during XENPMU_flush or XENPMU_lvtpc_set.
+     */
+    union {
+        uint32_t lapic_lvtpc;
+        uint64_t pad;
+    } l;
+
+    /*
+     * Vendor-specific PMU registers.
+     * RW for both hypervisor and guest (see exceptions above).
+     * Guest's updates to this field are verified and then loaded by the
+     * hypervisor into hardware during XENPMU_flush
+     */
+    union {
+        struct xen_pmu_amd_ctxt amd;
+        struct xen_pmu_intel_ctxt intel;
 
-    /* Placeholder for vendor-specific PMU registers */
+        /*
+         * Padding for contexts (fixed parts only, does not include MSR banks
+         * that are specified by offsets)
+         */
 #define XENPMU_CTXT_PAD_SZ  128
-    uint64_t pmu_regs_pad[XENPMU_CTXT_PAD_SZ / 8];
+        uint8_t pad[XENPMU_CTXT_PAD_SZ];
+    } c;
 };
 typedef struct xen_pmu_arch xen_pmu_arch_t;
 DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t);
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index e6307b5..7a45783 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -27,6 +27,8 @@
 #define XENPMU_feature_set     3
 #define XENPMU_init            4
 #define XENPMU_finish          5
+#define XENPMU_lvtpc_set       6
+#define XENPMU_flush           7 /* Write cached MSR values to HW     */
 /* ` } */
 
 /* Parameters structure for HYPERVISOR_xenpmu_op call */
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 6456f72..37e6aa3 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -705,7 +705,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct 
domain *d, int op)
     case XENPMU_feature_get:
         return xsm_default_action(XSM_PRIV, d, current->domain);
     case XENPMU_init:
-    case XENPMU_finish: 
+    case XENPMU_finish:
+    case XENPMU_lvtpc_set:
+    case XENPMU_flush:
         return xsm_default_action(XSM_HOOK, d, current->domain);
     default:
         return -EPERM;
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index aefcbda..4aa3e79 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1594,6 +1594,8 @@ static int flask_pmu_op (struct domain *d, unsigned int 
op)
                             XEN2__PMU_CTRL, NULL);
     case XENPMU_init:
     case XENPMU_finish:
+    case XENPMU_lvtpc_set:
+    case XENPMU_flush:
         return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
                             XEN2__PMU_USE, NULL);
     default:
-- 
1.8.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.