[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/3] x86/idle: update to include further package/core residency MSRs






On Wed, Mar 5, 2014 at 10:37 AM, Jan Beulich <JBeulich@xxxxxxxx> wrote:
With the number of these growing it becomes increasingly desirable to
not repeatedly alter the sysctl interface to accommodate them. Replace
the explicit listing of numbered states by arrays, unused fields of
which will remain untouched by the hypercall.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Acked-by: Keir Fraser <keir@xxxxxxx>
 

--- 2014-02-13.orig/tools/libxc/xc_pm.c 2014-03-04 17:43:06.000000000 +0100
+++ 2014-02-13/tools/libxc/xc_pm.c      2014-03-05 08:54:58.000000000 +0100
@@ -123,46 +123,90 @@ int xc_pm_get_max_cx(xc_interface *xch,

 int xc_pm_get_cxstat(xc_interface *xch, int cpuid, struct xc_cx_stat *cxpt)
 {
-    DECLARE_SYSCTL;
-    DECLARE_NAMED_HYPERCALL_BOUNCE(triggers, cxpt->triggers, 0, XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
-    DECLARE_NAMED_HYPERCALL_BOUNCE(residencies, cxpt->residencies, 0, XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    uint64_t pc[7], cc[7];
+    struct xc_cx_stat_v2 cxpt2 = {
+        .triggers = cxpt->triggers,
+        .residencies = cxpt->residencies,
+        .nr_pc = sizeof(pc) / sizeof(*pc),
+        .nr_cc = sizeof(cc) / sizeof(*cc),
+        .pc = pc,
+        .cc = cc
+    };
     int max_cx, ret;

     if( !cxpt->triggers || !cxpt->residencies )
         return -EINVAL;

     if ( (ret = xc_pm_get_max_cx(xch, cpuid, &max_cx)) )
-        goto unlock_0;
+        return ret;

-    HYPERCALL_BOUNCE_SET_SIZE(triggers, max_cx * sizeof(uint64_t));
-    HYPERCALL_BOUNCE_SET_SIZE(residencies, max_cx * sizeof(uint64_t));
+    cxpt2.nr = max_cx;
+    ret = xc_pm_get_cx_stat(xch, cpuid, &cxpt2);
+
+    cxpt->nr = cxpt2.nr;
+    cxpt->last = cxpt2.last;
+    cxpt->idle_time = cxpt2.idle_time;
+    cxpt->pc2 = pc[1];
+    cxpt->pc3 = pc[2];
+    cxpt->pc6 = pc[5];
+    cxpt->pc7 = pc[6];
+    cxpt->cc3 = cc[2];
+    cxpt->cc6 = cc[5];
+    cxpt->cc7 = cc[6];
+
+    return ret;
+}
+
+int xc_pm_get_cx_stat(xc_interface *xch, int cpuid, struct xc_cx_stat_v2 *cxpt)
+{
+    DECLARE_SYSCTL;
+    DECLARE_NAMED_HYPERCALL_BOUNCE(triggers, cxpt->triggers,
+                                   cxpt->nr * sizeof(*cxpt->triggers),
+                                   XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    DECLARE_NAMED_HYPERCALL_BOUNCE(residencies, cxpt->residencies,
+                                   cxpt->nr * sizeof(*cxpt->residencies),
+                                   XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    DECLARE_NAMED_HYPERCALL_BOUNCE(pc, cxpt->pc,
+                                   cxpt->nr_pc * sizeof(*cxpt->pc),
+                                   XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    DECLARE_NAMED_HYPERCALL_BOUNCE(cc, cxpt->cc,
+                                   cxpt->nr_cc * sizeof(*cxpt->cc),
+                                   XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    int ret = -1;

-    ret = -1;
     if ( xc_hypercall_bounce_pre(xch, triggers) )
         goto unlock_0;
     if ( xc_hypercall_bounce_pre(xch, residencies) )
         goto unlock_1;
+    if ( xc_hypercall_bounce_pre(xch, pc) )
+        goto unlock_2;
+    if ( xc_hypercall_bounce_pre(xch, cc) )
+        goto unlock_3;

     sysctl.cmd = XEN_SYSCTL_get_pmstat;
     sysctl.u.get_pmstat.type = PMSTAT_get_cxstat;
     sysctl.u.get_pmstat.cpuid = cpuid;
+    sysctl.u.get_pmstat.u.getcx.nr = cxpt->nr;
+    sysctl.u.get_pmstat.u.getcx.nr_pc = cxpt->nr_pc;
+    sysctl.u.get_pmstat.u.getcx.nr_cc = cxpt->nr_cc;
     set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.triggers, triggers);
     set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.residencies, residencies);
+    set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.pc, pc);
+    set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.cc, cc);

     if ( (ret = xc_sysctl(xch, &sysctl)) )
-        goto unlock_2;
+        goto unlock_4;

     cxpt->nr = sysctl.u.get_pmstat.u.getcx.nr;
     cxpt->last = sysctl.u.get_pmstat.u.getcx.last;
     cxpt->idle_time = sysctl.u.get_pmstat.u.getcx.idle_time;
-    cxpt->pc2 = sysctl.u.get_pmstat.u.getcx.pc2;
-    cxpt->pc3 = sysctl.u.get_pmstat.u.getcx.pc3;
-    cxpt->pc6 = sysctl.u.get_pmstat.u.getcx.pc6;
-    cxpt->pc7 = sysctl.u.get_pmstat.u.getcx.pc7;
-    cxpt->cc3 = sysctl.u.get_pmstat.u.getcx.cc3;
-    cxpt->cc6 = sysctl.u.get_pmstat.u.getcx.cc6;
-    cxpt->cc7 = sysctl.u.get_pmstat.u.getcx.cc7;
+    cxpt->nr_pc = sysctl.u.get_pmstat.u.getcx.nr_pc;
+    cxpt->nr_cc = sysctl.u.get_pmstat.u.getcx.nr_cc;

+unlock_4:
+    xc_hypercall_bounce_post(xch, cc);
+unlock_3:
+    xc_hypercall_bounce_post(xch, pc);
 unlock_2:
     xc_hypercall_bounce_post(xch, residencies);
 unlock_1:
--- 2014-02-13.orig/tools/libxc/xenctrl.h       2014-03-04 17:43:06.000000000 +0100
+++ 2014-02-13/tools/libxc/xenctrl.h    2014-03-04 17:50:49.000000000 +0100
@@ -1934,7 +1934,7 @@ int xc_pm_get_max_px(xc_interface *xch,
 int xc_pm_get_pxstat(xc_interface *xch, int cpuid, struct xc_px_stat *pxpt);
 int xc_pm_reset_pxstat(xc_interface *xch, int cpuid);

-struct xc_cx_stat {
+struct xc_cx_stat { /* DEPRECATED (use v2 below instead)! */
     uint32_t nr;    /* entry nr in triggers & residencies, including C0 */
     uint32_t last;         /* last Cx state */
     uint64_t idle_time;    /* idle time from boot */
@@ -1950,8 +1950,22 @@ struct xc_cx_stat {
 };
 typedef struct xc_cx_stat xc_cx_stat_t;

+struct xc_cx_stat_v2 {
+    uint32_t nr;           /* entry nr in triggers[]/residencies[], incl C0 */
+    uint32_t last;         /* last Cx state */
+    uint64_t idle_time;    /* idle time from boot */
+    uint64_t *triggers;    /* Cx trigger counts */
+    uint64_t *residencies; /* Cx residencies */
+    uint32_t nr_pc;        /* entry nr in pc[] */
+    uint32_t nr_cc;        /* entry nr in cc[] */
+    uint64_t *pc;          /* 1-biased indexing (i.e. excl C0) */
+    uint64_t *cc;          /* 1-biased indexing (i.e. excl C0) */
+};
+typedef struct xc_cx_stat_v2 xc_cx_stat_v2_t;
+
 int xc_pm_get_max_cx(xc_interface *xch, int cpuid, int *max_cx);
 int xc_pm_get_cxstat(xc_interface *xch, int cpuid, struct xc_cx_stat *cxpt);
+int xc_pm_get_cx_stat(xc_interface *xch, int cpuid, struct xc_cx_stat_v2 *);
 int xc_pm_reset_cxstat(xc_interface *xch, int cpuid);

 int xc_cpu_online(xc_interface *xch, int cpu);
--- 2014-02-13.orig/xen/arch/x86/acpi/cpu_idle.c        2014-03-04 17:43:06.000000000 +0100
+++ 2014-02-13/xen/arch/x86/acpi/cpu_idle.c     2014-03-04 17:38:39.000000000 +0100
@@ -62,13 +62,17 @@

 #define GET_HW_RES_IN_NS(msr, val) \
     do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
-#define GET_PC2_RES(val)  GET_HW_RES_IN_NS(0x60D, val) /* SNB only */
+#define GET_PC2_RES(val)  GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */
 #define GET_PC3_RES(val)  GET_HW_RES_IN_NS(0x3F8, val)
 #define GET_PC6_RES(val)  GET_HW_RES_IN_NS(0x3F9, val)
 #define GET_PC7_RES(val)  GET_HW_RES_IN_NS(0x3FA, val)
+#define GET_PC8_RES(val)  GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */
+#define GET_PC9_RES(val)  GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */
+#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */
+#define GET_CC1_RES(val)  GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */
 #define GET_CC3_RES(val)  GET_HW_RES_IN_NS(0x3FC, val)
 #define GET_CC6_RES(val)  GET_HW_RES_IN_NS(0x3FD, val)
-#define GET_CC7_RES(val)  GET_HW_RES_IN_NS(0x3FE, val) /* SNB only */
+#define GET_CC7_RES(val)  GET_HW_RES_IN_NS(0x3FE, val) /* SNB onwards */

 static void lapic_timer_nop(void) { }
 void (*__read_mostly lapic_timer_off)(void);
@@ -111,8 +115,13 @@ struct hw_residencies
 {
     uint64_t pc2;
     uint64_t pc3;
+    uint64_t pc4;
     uint64_t pc6;
     uint64_t pc7;
+    uint64_t pc8;
+    uint64_t pc9;
+    uint64_t pc10;
+    uint64_t cc1;
     uint64_t cc3;
     uint64_t cc6;
     uint64_t cc7;
@@ -128,6 +137,12 @@ static void do_get_hw_residencies(void *

     switch ( c->x86_model )
     {
+    /* 4th generation Intel Core (Haswell) */
+    case 0x45:
+        GET_PC8_RES(hw_res->pc8);
+        GET_PC9_RES(hw_res->pc9);
+        GET_PC10_RES(hw_res->pc10);
+        /* fall through */
     /* Sandy bridge */
     case 0x2A:
     case 0x2D:
@@ -137,7 +152,6 @@ static void do_get_hw_residencies(void *
     /* Haswell */
     case 0x3C:
     case 0x3F:
-    case 0x45:
     case 0x46:
     /* future */
     case 0x3D:
@@ -160,6 +174,22 @@ static void do_get_hw_residencies(void *
         GET_CC3_RES(hw_res->cc3);
         GET_CC6_RES(hw_res->cc6);
         break;
+    /* various Atoms */
+    case 0x27:
+        GET_PC3_RES(hw_res->pc2); /* abusing GET_PC3_RES */
+        GET_PC6_RES(hw_res->pc4); /* abusing GET_PC6_RES */
+        GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
+        break;
+    /* Silvermont */
+    case 0x37:
+    case 0x4A:
+    case 0x4D:
+    case 0x5A:
+    case 0x5D:
+        GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
+        GET_CC1_RES(hw_res->cc1);
+        GET_CC6_RES(hw_res->cc6);
+        break;
     }
 }

@@ -179,10 +209,16 @@ static void print_hw_residencies(uint32_

     get_hw_residencies(cpu, &hw_res);

-    printk("PC2[%"PRId64"] PC3[%"PRId64"] PC6[%"PRId64"] PC7[%"PRId64"]\n",
-           hw_res.pc2, hw_res.pc3, hw_res.pc6, hw_res.pc7);
-    printk("CC3[%"PRId64"] CC6[%"PRId64"] CC7[%"PRId64"]\n",
-           hw_res.cc3, hw_res.cc6,hw_res.cc7);
+    printk("PC2[%"PRIu64"] PC%d[%"PRIu64"] PC6[%"PRIu64"] PC7[%"PRIu64"]\n",
+           hw_res.pc2,
+           hw_res.pc4 ? 4 : 3, hw_res.pc4 ?: hw_res.pc3,
+           hw_res.pc6, hw_res.pc7);
+    if ( hw_res.pc8 | hw_res.pc9 | hw_res.pc10 )
+        printk("PC8[%"PRIu64"] PC9[%"PRIu64"] PC10[%"PRIu64"]\n",
+               hw_res.pc8, hw_res.pc9, hw_res.pc10);
+    printk("CC%d[%"PRIu64"] CC6[%"PRIu64"] CC7[%"PRIu64"]\n",
+           hw_res.cc1 ? 1 : 3, hw_res.cc1 ?: hw_res.cc3,
+           hw_res.cc6, hw_res.cc7);
 }

 static char* acpi_cstate_method_name[] =
@@ -1097,19 +1133,21 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
     struct acpi_processor_power *power = processor_powers[cpuid];
     uint64_t idle_usage = 0, idle_res = 0;
     uint64_t usage[ACPI_PROCESSOR_MAX_POWER], res[ACPI_PROCESSOR_MAX_POWER];
-    int i;
-    struct hw_residencies hw_res;
+    unsigned int i, nr, nr_pc = 0, nr_cc = 0;

     if ( power == NULL )
     {
         stat->last = 0;
         stat->nr = 0;
         stat->idle_time = 0;
+        stat->nr_pc = 0;
+        stat->nr_cc = 0;
         return 0;
     }

     stat->last = power->last_state ? power->last_state->idx : 0;
     stat->idle_time = get_cpu_idle_time(cpuid);
+    nr = min(stat->nr, power->count);

     /* mimic the stat when detail info hasn't been registered by dom0 */
     if ( pm_idle_save == NULL )
@@ -1118,14 +1156,14 @@ int pmstat_get_cx_stat(uint32_t cpuid, s

         usage[1] = idle_usage = 1;
         res[1] = idle_res = stat->idle_time;
-
-        memset(&hw_res, 0, sizeof(hw_res));
     }
     else
     {
+        struct hw_residencies hw_res;
+
         stat->nr = power->count;

-        for ( i = 1; i < power->count; i++ )
+        for ( i = 1; i < nr; i++ )
         {
             spin_lock_irq(&power->stat_lock);
             usage[i] = power->states[i].usage;
@@ -1137,22 +1175,42 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
         }

         get_hw_residencies(cpuid, &hw_res);
+
+#define PUT_xC(what, n) do { \
+        if ( stat->nr_##what >= n && \
+             copy_to_guest_offset(stat->what, n - 1, &hw_res.what##n, 1) ) \
+            return -EFAULT; \
+        if ( hw_res.what##n ) \
+            nr_##what = n; \
+    } while ( 0 )
+#define PUT_PC(n) PUT_xC(pc, n)
+        PUT_PC(2);
+        PUT_PC(3);
+        PUT_PC(4);
+        PUT_PC(6);
+        PUT_PC(7);
+        PUT_PC(8);
+        PUT_PC(9);
+        PUT_PC(10);
+#undef PUT_PC
+#define PUT_CC(n) PUT_xC(cc, n)
+        PUT_CC(1);
+        PUT_CC(3);
+        PUT_CC(6);
+        PUT_CC(7);
+#undef PUT_CC
+#undef PUT_xC
     }

     usage[0] = idle_usage;
     res[0] = NOW() - idle_res;

-    if ( copy_to_guest(stat->triggers, usage, stat->nr) ||
-         copy_to_guest(stat->residencies, res, stat->nr) )
+    if ( copy_to_guest(stat->triggers, usage, nr) ||
+         copy_to_guest(stat->residencies, res, nr) )
         return -EFAULT;

-    stat->pc2 = hw_res.pc2;
-    stat->pc3 = hw_res.pc3;
-    stat->pc6 = hw_res.pc6;
-    stat->pc7 = hw_res.pc7;
-    stat->cc3 = hw_res.cc3;
-    stat->cc6 = hw_res.cc6;
-    stat->cc7 = hw_res.cc7;
+    stat->nr_pc = nr_pc;
+    stat->nr_cc = nr_cc;

     return 0;
 }
--- 2014-02-13.orig/xen/include/public/sysctl.h 2014-03-04 17:43:06.000000000 +0100
+++ 2014-02-13/xen/include/public/sysctl.h      2014-03-04 17:34:15.000000000 +0100
@@ -34,7 +34,7 @@
 #include "xen.h"
 #include "domctl.h"

-#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000A
+#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B

 /*
  * Read console content from Xen buffer ring.
@@ -226,13 +226,10 @@ struct pm_cx_stat {
     uint64_aligned_t idle_time;                 /* idle time from boot */
     XEN_GUEST_HANDLE_64(uint64) triggers;    /* Cx trigger counts */
     XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
-    uint64_aligned_t pc2;
-    uint64_aligned_t pc3;
-    uint64_aligned_t pc6;
-    uint64_aligned_t pc7;
-    uint64_aligned_t cc3;
-    uint64_aligned_t cc6;
-    uint64_aligned_t cc7;
+    uint32_t nr_pc;                          /* entry nr in pc[] */
+    uint32_t nr_cc;                          /* entry nr in cc[] */
+    XEN_GUEST_HANDLE_64(uint64) pc;          /* 1-biased indexing */
+    XEN_GUEST_HANDLE_64(uint64) cc;          /* 1-biased indexing */
 };

 struct xen_sysctl_get_pmstat {



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.