[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XenPPC] Re: [Xen-devel] [PATCH] nr_cpus calculation problem due to incorrect sockets_per_node



Hi. Wondering if this patch has been reviewed and could be considered for inclusion in 3.2. Sorry about the late request. You were asking for input last week.

beth kon wrote:

Testing on an 8-node 128-way NUMA machine has exposed a problem with Xen's nr_cpus calculation. In this case, since Xen cuts off recognized CPUs at 32, the machine appears to have 16 CPUs on the first and second nodes and none on the remaining nodes. Given this asymmetry, the calculation of sockets_per_node (which is later used to calculate nr_cpus) is incorrect:

pi->sockets_per_node = num_online_cpus() /(num_online_nodes() * pi->cores_per_socket * pi->threads_per_core);

The most straightforward solution is to remove sockets_per_node, and instead determine nr_cpus directly from num_online_cpus.

This patch has been tested on x86_64 NUMA machines.

------------------------------------------------------------------------

diff -r b4278beaf354 docs/man/xm.pod.1
--- a/docs/man/xm.pod.1 Wed Oct 17 13:12:03 2007 +0100
+++ b/docs/man/xm.pod.1 Wed Oct 17 20:09:46 2007 -0700
@@ -446,7 +446,6 @@ page more readable):
 machine                : i686
 nr_cpus                : 2
 nr_nodes               : 1
- sockets_per_node       : 2
 cores_per_socket       : 1
 threads_per_core       : 1
 cpu_mhz                : 696
diff -r b4278beaf354 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Oct 17 20:09:46 2007 -0700
@@ -721,7 +721,7 @@ static PyObject *pyxc_physinfo(XcObject "max_cpu_id", info.max_cpu_id,
                            "threads_per_core", info.threads_per_core,
                            "cores_per_socket", info.cores_per_socket,
-                            "sockets_per_node", info.sockets_per_node,
+ "nr_cpus", info.nr_cpus, "total_memory", pages_to_kib(info.total_pages),
                            "free_memory",      pages_to_kib(info.free_pages),
                            "scrub_memory",     pages_to_kib(info.scrub_pages),
diff -r b4278beaf354 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/python/xen/xend/XendNode.py Wed Oct 17 20:09:46 2007 -0700
@@ -475,7 +475,7 @@ class XendNode:

        cpu_info = {
            "nr_nodes":         phys_info["nr_nodes"],
-            "sockets_per_node": phys_info["sockets_per_node"],
+            "nr_cpus":          phys_info["nr_cpus"],
            "cores_per_socket": phys_info["cores_per_socket"],
            "threads_per_core": phys_info["threads_per_core"]
            }
@@ -580,17 +580,9 @@ class XendNode:
            str='none\n'
        return str[:-1];

-    def count_cpus(self, pinfo):
-        count=0
-        node_to_cpu=pinfo['node_to_cpu']
-        for i in range(0, pinfo['nr_nodes']):
-            count+=len(node_to_cpu[i])
-        return count;
-
    def physinfo(self):
        info = self.xc.physinfo()

-        info['nr_cpus'] = self.count_cpus(info)
        info['cpu_mhz'] = info['cpu_khz'] / 1000
# physinfo is in KiB, need it in MiB
@@ -600,7 +592,6 @@ class XendNode:

        ITEM_ORDER = ['nr_cpus',
                      'nr_nodes',
-                      'sockets_per_node',
                      'cores_per_socket',
                      'threads_per_core',
                      'cpu_mhz',
diff -r b4278beaf354 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/python/xen/xm/main.py       Wed Oct 17 20:09:46 2007 -0700
@@ -1667,9 +1667,8 @@ def xm_info(args):
            "release":           getVal(["software_version", "release"]),
            "version":           getVal(["software_version", "version"]),
            "machine":           getVal(["software_version", "machine"]),
-            "nr_cpus":           len(getVal(["host_CPUs"], [])),
+            "nr_cpus":           getVal(["cpu_configuration", "nr_cpus"]),
            "nr_nodes":          getVal(["cpu_configuration", "nr_nodes"]),
-            "sockets_per_node":  getVal(["cpu_configuration", 
"sockets_per_node"]),
            "cores_per_socket":  getVal(["cpu_configuration", 
"cores_per_socket"]),
            "threads_per_core":  getVal(["cpu_configuration", 
"threads_per_core"]),
            "cpu_mhz":           getCpuMhz(),
diff -r b4278beaf354 tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xenmon/xenbaked.c   Wed Oct 17 20:09:46 2007 -0700
@@ -460,10 +460,7 @@ unsigned int get_num_cpus(void)
    xc_interface_close(xc_handle);
    opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;

-    return (physinfo.threads_per_core *
-            physinfo.cores_per_socket *
-            physinfo.sockets_per_node *
-            physinfo.nr_nodes);
+    return physinfo.nr_cpus;
}


diff -r b4278beaf354 tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Wed Oct 17 20:09:46 2007 -0700
@@ -155,9 +155,7 @@ xenstat_node *xenstat_get_node(xenstat_h
        }

        node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
-       node->num_cpus =
-           (physinfo.threads_per_core * physinfo.cores_per_socket *
-            physinfo.sockets_per_node * physinfo.nr_nodes);
+        node->num_cpus = physinfo.nr_cpus;
        node->tot_mem = ((unsigned long long)physinfo.total_pages)
            * handle->page_size;
        node->free_mem = ((unsigned long long)physinfo.free_pages)
diff -r b4278beaf354 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xentrace/xentrace.c Wed Oct 17 20:09:46 2007 -0700
@@ -309,10 +309,7 @@ unsigned int get_num_cpus(void)

    xc_interface_close(xc_handle);

-    return (physinfo.threads_per_core *
-            physinfo.cores_per_socket *
-            physinfo.sockets_per_node *
-            physinfo.nr_nodes);
+    return physinfo.nr_cpus;
}


diff -r b4278beaf354 tools/xm-test/lib/XmTestLib/Xm.py
--- a/tools/xm-test/lib/XmTestLib/Xm.py Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xm-test/lib/XmTestLib/Xm.py Wed Oct 17 20:09:46 2007 -0700
@@ -218,11 +218,9 @@ def restartXend():
        return status

def smpConcurrencyLevel():
-    cores = int(getInfo("cores_per_socket"))
-    threads = int(getInfo("threads_per_core"))
-    sockets = int(getInfo("sockets_per_node"))
-
-    return cores * sockets * threads
+    nr_cpus = int(getInfo("nr_cpus"))
+
+    return nr_cpus

if __name__ == "__main__":
    if isDomainRunning("0"):
diff -r b4278beaf354 tools/xm-test/lib/XmTestReport/OSReport.py
--- a/tools/xm-test/lib/XmTestReport/OSReport.py        Wed Oct 17 13:12:03 
2007 +0100
+++ b/tools/xm-test/lib/XmTestReport/OSReport.py        Wed Oct 17 20:09:46 
2007 -0700
@@ -92,7 +92,6 @@ class Machine:

        xenValues = {"nr_cpus"          : "Unknown",
                     "nr_nodes"         : "Unknown",
-                     "sockets_per_node" : "Unknown",
                     "cores_per_socket" : "Unknown",
                     "threads_per_core" : "Unknown",
                     "cpu_mhz"          : "Unknown",
diff -r b4278beaf354 xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/ia64/xen/dom0_ops.c      Wed Oct 17 20:09:46 2007 -0700
@@ -234,7 +234,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
    {
    case XEN_SYSCTL_physinfo:
    {
-        int i, node_cpus = 0;
+        int i;
        uint32_t max_array_ent;

        xen_sysctl_physinfo_t *pi = &op->u.physinfo;
@@ -242,18 +242,8 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
        pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
        pi->cores_per_socket =
            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+        pi->nr_cpus          = (u32)num_online_cpus();
        pi->nr_nodes         = num_online_nodes();
-
-        /*
-         * Guess at a sockets_per_node value.  Use the maximum number of
-         * CPUs per node to avoid deconfigured CPUs breaking the average.
-         */
-        for_each_online_node(i)
-            node_cpus = max(node_cpus, cpus_weight(node_to_cpumask(i)));
-
- pi->sockets_per_node = node_cpus / - (pi->cores_per_socket * pi->threads_per_core);
-
pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages();
        pi->scrub_pages      = avail_scrub_pages();
diff -r b4278beaf354 xen/arch/powerpc/sysctl.c
--- a/xen/arch/powerpc/sysctl.c Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/powerpc/sysctl.c Wed Oct 17 20:09:46 2007 -0700
@@ -45,9 +45,7 @@ long arch_do_sysctl(struct xen_sysctl *s
            cpus_weight(cpu_sibling_map[0]);
        pi->cores_per_socket =
            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
- pi->sockets_per_node = num_online_cpus() / - (num_online_nodes() * pi->cores_per_socket * pi->threads_per_core);
-
+        pi->nr_cpus          = (u32)num_online_cpus();
        pi->nr_nodes         = num_online_nodes();
        pi->total_pages      = total_pages;
        pi->free_pages       = avail_domheap_pages();
diff -r b4278beaf354 xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c     Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/x86/sysctl.c     Wed Oct 17 20:09:46 2007 -0700
@@ -51,10 +51,8 @@ long arch_do_sysctl(
            cpus_weight(cpu_sibling_map[0]);
        pi->cores_per_socket =
            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+        pi->nr_cpus = (u32)num_online_cpus();
        pi->nr_nodes = num_online_nodes();
- pi->sockets_per_node = num_online_cpus() / - (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core);
-
        pi->total_pages      = total_pages;
        pi->free_pages       = avail_domheap_pages();
        pi->scrub_pages      = avail_scrub_pages();
diff -r b4278beaf354 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/public/sysctl.h       Wed Oct 17 20:09:46 2007 -0700
@@ -79,7 +79,7 @@ struct xen_sysctl_physinfo {
    /* IN variables. */
    uint32_t threads_per_core;
    uint32_t cores_per_socket;
-    uint32_t sockets_per_node;
+    uint32_t nr_cpus;
    uint32_t nr_nodes;
    uint32_t cpu_khz;
    uint64_aligned_t total_pages;
------------------------------------------------------------------------

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


--
Elizabeth Kon (Beth)
IBM Linux Technology Center
Open Hypervisor Team
email: eak@xxxxxxxxxx


_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.