[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for PV guests



>  int vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
> -    struct vcpu *v = current;
> -    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct vcpu *sampled = current, *sampling;
> +    struct vpmu_struct *vpmu;
> +
> +    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> +    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> +    {
> +        sampling = choose_hwdom_vcpu();
> +        if ( !sampling )
> +            return 0;
> +    }
> +    else
> +        sampling = sampled;
> +
> +    vpmu = vcpu_vpmu(sampling);
> +    if ( !is_hvm_domain(sampling->domain) )
> +    {
> +        /* PV(H) guest */
> +        const struct cpu_user_regs *cur_regs;
> +
> +        if ( !vpmu->xenpmu_data )
> +            return 0;
> +
> +        if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED )
> +            return 1;
> +
> +        if ( is_pvh_domain(sampled->domain) &&
> +             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> +            return 0;
> +
> +        /* PV guest will be reading PMU MSRs from xenpmu_data */

OK, with that nice comment I was thinking that:
> +        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling);

would be the one that writes the values to pmu.r.regs, but so far
in this patchset that is not the case. It does write to the pmu.c (contexts)
but that is not ..

> +        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +
> +        /* Store appropriate registers in xenpmu_data */
> +        if ( is_pv_32bit_domain(sampling->domain) )
> +        {
> +            /*
> +             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> +             * and therefore we treat it the same way as a non-privileged
> +             * PV 32-bit domain.
> +             */
> +            struct compat_pmu_regs *cmp;
> +
> +            cur_regs = guest_cpu_user_regs();
> +
> +            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;

.. what we read from here. Did I miss a patch?

> +            cmp->eip = cur_regs->rip;
> +            cmp->esp = cur_regs->rsp;
> +            cmp->cs = cur_regs->cs;
> +            if ( (cmp->cs & 3) == 1 )
> +                cmp->cs &= ~3;
> +        }
> +        else
> +        {
> +            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;

Ditto here.

Perhaps a comment stating _who_ (or _what_) is responsible for populating
the 'pmu.r.regs' structure?

> +
> +            /* Non-privileged domains are always in XENPMU_MODE_SELF mode */
> +            if ( (vpmu_mode & XENPMU_MODE_SELF) ||
> +                 (!is_hardware_domain(sampled->domain) &&
> +                  !is_idle_vcpu(sampled)) )
> +                cur_regs = guest_cpu_user_regs();
> +            else
> +                cur_regs = regs;
> +
> +            r->rip = cur_regs->rip;
> +            r->rsp = cur_regs->rsp;
> +
> +            if ( !is_pvh_domain(sampled->domain) )
> +            {
> +                r->cs = cur_regs->cs;
> +                if ( sampled->arch.flags & TF_kernel_mode )
> +                    r->cs &= ~3;
> +            }
> +            else
> +            {
> +                struct segment_register seg_cs;
> +
> +                hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> +                r->cs = seg_cs.sel;
> +            }
> +        }
> +
> +        vpmu->xenpmu_data->domain_id = DOMID_SELF;
> +        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> +        vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> +
> +        vpmu->xenpmu_data->pmu_flags |= PMU_CACHED;
> +        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;

Right, especially as core2_vpmu_do_interrupt (which is called earlier)
does:

  vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED;            
  apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);

So here we mask it

> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);

Hm, so for Intel we would do _two_ apic_writes ?

Anyhow,  we mask it here..
.. snip..
>      case XENPMU_lvtpc_set:
> -        if ( current->arch.vpmu.xenpmu_data == NULL )
> +        curr = current;
> +        if ( curr->arch.vpmu.xenpmu_data == NULL )
>              return -EINVAL;
> -        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        ret = 0;
> +        break;
> +
> +    case XENPMU_flush:
> +        curr = current;
> +        curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
> +        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);

And this code does:

+void vpmu_lvtpc_update(uint32_t val)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+}
+

And we over-write vpmu->hw_lapic_lvtpc with the new value. The 'val'
here is important as it may have APIC_LVT_MASKED or not. We get that
from ' curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtp ' but I am
not seeing who sets it. I was thinking the XENPMU_lvtpc_set but that
calls vpmu_lvtpc_update which will update the vpmu->hw_lapic_lvtpc 
with the APIC_LVT_MASKED if set.

So who sets it?

Anyhow, with it being zero, that means the APIC_LVT_MASKED is unset, so
interrupts are free to go, and here:

> +        vpmu_load(curr);

We do another 'apic_write_around' with the hw_lapic_lvtpc value. So two
apci_writes to unmask it. (Or mask it back if the l.lapic_lvtpc has
APIC_LVT_MASKED set).

Should the XENPMU_flush check if:

 - The PMU_CACHED bit is set (and if not then -EAGAIN?)
 - The APIC_LVT_MASKED was set (and if so, then unmaks it). If it is
   not set, then no need to do apic_write?

?
>          ret = 0;
>          break;
>      }
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 68a5fb8..a1886a5 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -28,6 +28,7 @@
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
>  #define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
>  /* ` } */
>  
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */
> @@ -61,6 +62,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
>   */
>  #define XENPMU_FEATURE_INTEL_BTS  1
>  
> +/*
> + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
> + * the hypervisor
> + */
> +#define PMU_CACHED 1
> +
>  /* Shared between hypervisor and PV domain */
>  struct xen_pmu_data {
>      uint32_t domain_id;
> -- 
> 1.8.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.