[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v6] xenpm: Add get-intel-temp subcommand



get-intel-temp allows querying the per-core CPU temperature and
per-package one on Intel processors (as usual Dom0 drivers cannot
work due to misalignment between Dom0 vCPU and pCPUs).

Signed-off-by: Teddy Astie <teddy.astie@xxxxxxxxxx>
---
CC: Jan Beulich <jbeulich@xxxxxxxx>

v4: 
https://lore.kernel.org/xen-devel/cover.1766158766.git.teddy.astie@xxxxxxxxxx/
v5: Removed trailing whitespace.
v6: Report errors through errno and use strerror() to display them
---

 CHANGELOG.md       |   2 +
 tools/misc/xenpm.c | 131 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 132 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18f3d10f20..d7fac4a8d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ The format is based on [Keep a 
Changelog](https://keepachangelog.com/en/1.0.0/)
    - Support for Bus Lock Threshold on AMD Zen5 and later CPUs, used by Xen to
      mitigate (by rate-limiting) the system wide impact of an HVM guest
      misusing atomic instructions.
+   - Introduce get-intel-temp to xenpm to query CPU temperatures on Intel
+     platforms.

 ### Removed
  - On x86:
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 682d092479..de490b6507 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -32,11 +32,14 @@

 #include <xen-tools/common-macros.h>

+#include <xen/asm/msr-index.h>
+
 #define MAX_PKG_RESIDENCIES 12
 #define MAX_CORE_RESIDENCIES 8

 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
+static xc_physinfo_t physinfo;

 /* help message */
 void show_help(void)
@@ -93,6 +96,7 @@ void show_help(void)
             "                                           units default to 
\"us\" if unspecified.\n"
             "                                           truncates 
un-representable values.\n"
             "                                           0 lets the hardware 
decide.\n"
+            " get-intel-temp        [cpuid]       get Intel CPU temperature of 
<cpuid> or all\n"
             " start [seconds]                     start collect Cx/Px 
statistics,\n"
             "                                     output after CTRL-C or 
SIGINT or several seconds.\n"
             " enable-turbo-mode     [cpuid]       enable Turbo Mode for 
processors that support it.\n"
@@ -1354,6 +1358,131 @@ void enable_turbo_mode(int argc, char *argv[])
                 errno, strerror(errno));
 }

+static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int 
*temp)
+{
+    xc_resource_entry_t entries[] = {
+        { .idx = package ? MSR_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS },
+        { .idx = MSR_TEMPERATURE_TARGET },
+    };
+    struct xc_resource_op ops = {
+        .cpu = cpu,
+        .entries = entries,
+        .nr_entries = ARRAY_SIZE(entries),
+    };
+    int tjmax;
+
+    int ret = xc_resource_op(xch, 1, &ops);
+
+    switch ( ret )
+    {
+    case -1:
+        /* xc_resource_op returns -1 in out of memory scenarios */
+        errno = -ENOMEM;
+        return -1;
+
+    case 0:
+        /* This CPU isn't online or can't query this MSR */
+        errno = -ENODATA;
+        return -1;
+
+    case 1:
+    {
+        /*
+         * The CPU doesn't support MSR_TEMPERATURE_TARGET, we assume it's 100
+         * which is correct aside a few selected Atom CPUs. Check Linux
+         * kernel's coretemp.c for more information.
+         */
+        static bool has_reported_once = false;
+
+        if ( !has_reported_once )
+        {
+            fprintf(stderr, "MSR_TEMPERATURE_TARGET is not supported, assume "
+                            "tjmax = 100, readings may be incorrect.\n");
+            has_reported_once = true;
+        }
+
+        tjmax = 100;
+        break;
+    }
+
+    case 2:
+        tjmax = (entries[1].val >> 16) & 0xff;
+        break;
+
+    default:
+        if ( ret > 0 )
+        {
+            fprintf(stderr, "Got unexpected xc_resource_op return value: %d", 
ret);
+            errno = -EINVAL;
+        }
+        else
+            errno = ret;
+        return -1;
+    }
+
+    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
+    return 0;
+}
+
+static void get_intel_temp(int argc, char *argv[])
+{
+    int temp = -1, cpu = -1;
+    unsigned int socket;
+    bool has_data = false;
+
+    if ( argc > 0 )
+        parse_cpuid(argv[0], &cpu);
+
+    if ( cpu != -1 )
+    {
+        if ( !fetch_dts_temp(xc_handle, cpu, false, &temp) )
+            printf("CPU%d: %d°C\n", cpu, temp);
+        else
+        {
+            fprintf(stderr, "Unable to fetch temperature (%d - %s)\n",
+                    errno, strerror(errno));
+            printf("No data\n");
+        }
+        return;
+    }
+
+    /* Per socket measurement */
+    for ( socket = 0, cpu = 0; cpu < max_cpu_nr;
+          socket++, cpu += physinfo.cores_per_socket * 
physinfo.threads_per_core )
+    {
+        if ( fetch_dts_temp(xc_handle, cpu, true, &temp) )
+        {
+            fprintf(stderr,
+                    "[Package%u] Unable to fetch temperature (%d - %s)\n",
+                    cpu, errno, strerror(errno));
+            continue;
+        }
+
+        has_data = true;
+        printf("Package%u: %d°C\n", socket, temp);
+    }
+
+    if ( has_data )
+        /* Avoid inserting a trailing line if we have nothing */
+        printf("\n");
+
+    for ( cpu = 0; cpu < max_cpu_nr; cpu += physinfo.threads_per_core )
+    {
+        if ( fetch_dts_temp(xc_handle, cpu, false, &temp) )
+        {
+            fprintf(stderr, "[CPU%d] Unable to fetch temperature (%d - %s)\n",
+                    cpu, errno, strerror(errno));
+            continue;
+        }
+
+        has_data = true;
+        printf("CPU%d: %d°C\n", cpu, temp);
+    }
+
+    if ( !has_data )
+        printf("No data\n");
+}
+
 void disable_turbo_mode(int argc, char *argv[])
 {
     int cpuid = -1;
@@ -1618,12 +1747,12 @@ struct {
     { "set-max-cstate", set_max_cstate_func},
     { "enable-turbo-mode", enable_turbo_mode },
     { "disable-turbo-mode", disable_turbo_mode },
+    { "get-intel-temp", get_intel_temp },
 };

 int main(int argc, char *argv[])
 {
     int i, ret = 0;
-    xc_physinfo_t physinfo;
     int nr_matches = 0;
     int matches_main_options[ARRAY_SIZE(main_options)];

--
2.53.0



--
 | Vates

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech





 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.