[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Host Numa information in dom0
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1270653725 -3600 # Node ID 28e5409e3fb377830a5f4346fd414d3d158f3483 # Parent f0ef396d8c334100293fcba75ee89f311811b9f2 Host Numa information in dom0 'xm info' command now also gives the cpu topology & host numa information. This will be later used to build guest numa support. The patch basically changes physinfo sysctl, and adds topology_info & numa_info sysctls, and also changes the python & libxc code accordingly. Signed-off-by: Nitin A Kamble <nitin.a.kamble@xxxxxxxxx> --- tools/libxc/xc_misc.c | 37 ++++++ tools/libxc/xenctrl.h | 14 ++ tools/python/xen/lowlevel/xc/xc.c | 215 ++++++++++++++++++++++++++------------ tools/python/xen/xend/XendNode.py | 63 ++++++----- tools/python/xen/xend/balloon.py | 14 -- xen/arch/x86/sysctl.c | 140 ++++++++++++++++++++++-- xen/common/page_alloc.c | 6 + xen/include/asm-x86/numa.h | 1 xen/include/public/sysctl.h | 90 ++++++++++++--- xen/include/xen/mm.h | 1 10 files changed, 447 insertions(+), 134 deletions(-) diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Wed Apr 07 15:44:29 2010 +0100 +++ b/tools/libxc/xc_misc.c Wed Apr 07 16:22:05 2010 +0100 @@ -79,6 +79,43 @@ int xc_physinfo(int xc_handle, return 0; } + +int xc_topologyinfo(int xc_handle, + xc_topologyinfo_t *put_info) +{ + int ret; + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_topologyinfo; + + memcpy(&sysctl.u.topologyinfo, put_info, sizeof(*put_info)); + + if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 ) + return ret; + + memcpy(put_info, &sysctl.u.topologyinfo, sizeof(*put_info)); + + return 0; +} + +int xc_numainfo(int xc_handle, + xc_numainfo_t *put_info) +{ + int ret; + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_numainfo; + + memcpy(&sysctl.u.numainfo, put_info, sizeof(*put_info)); + + if ((ret = do_sysctl(xc_handle, &sysctl)) != 0) + return ret; + + memcpy(put_info, &sysctl.u.numainfo, sizeof(*put_info)); + + return 0; +} + int xc_sched_id(int xc_handle, int *sched_id) diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Apr 07 15:44:29 2010 +0100 +++ b/tools/libxc/xenctrl.h Wed Apr 07 16:22:05 2010 +0100 @@ -612,9 +612,19 @@ int xc_send_debug_keys(int xc_handle, ch int xc_send_debug_keys(int xc_handle, char *keys); typedef xen_sysctl_physinfo_t xc_physinfo_t; +typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t; +typedef xen_sysctl_numainfo_t xc_numainfo_t; + typedef uint32_t xc_cpu_to_node_t; -int xc_physinfo(int xc_handle, - xc_physinfo_t *info); +typedef uint32_t xc_cpu_to_socket_t; +typedef uint32_t xc_cpu_to_core_t; +typedef uint64_t xc_node_to_memsize_t; +typedef uint64_t xc_node_to_memfree_t; +typedef uint32_t xc_node_to_node_dist_t; + +int xc_physinfo(int xc_handle, xc_physinfo_t *info); +int xc_topologyinfo(int xc_handle, xc_topologyinfo_t *info); +int xc_numainfo(int xc_handle, xc_numainfo_t *info); int xc_sched_id(int xc_handle, int *sched_id); diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Wed Apr 07 15:44:29 2010 +0100 +++ b/tools/python/xen/lowlevel/xc/xc.c Wed Apr 07 16:22:05 2010 +0100 @@ -1151,105 +1151,178 @@ static PyObject *pyxc_pages_to_kib(XcObj return PyLong_FromUnsignedLong(pages_to_kib(pages)); } - static PyObject *pyxc_physinfo(XcObject *self) { -#define MAX_CPU_ID 255 - xc_physinfo_t info; + xc_physinfo_t pinfo; char cpu_cap[128], virt_caps[128], *p; - int i, j, max_cpu_id, nr_nodes = 0; - uint64_t free_heap; - PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj; - PyObject *node_to_dma32_mem_obj; - xc_cpu_to_node_t map[MAX_CPU_ID + 1]; + int i; const char *virtcap_names[] = { "hvm", "hvm_directio" }; - set_xen_guest_handle(info.cpu_to_node, map); - info.max_cpu_id = MAX_CPU_ID; - - if ( xc_physinfo(self->xc_handle, &info) != 0 ) + if ( xc_physinfo(self->xc_handle, &pinfo) != 0 ) return pyxc_error_to_exception(); p = cpu_cap; *p = '\0'; - for ( i = 0; i < sizeof(info.hw_cap)/4; i++ ) - p += sprintf(p, "%08x:", info.hw_cap[i]); + for ( i = 0; i < sizeof(pinfo.hw_cap)/4; i++ ) + p += sprintf(p, "%08x:", pinfo.hw_cap[i]); *(p-1) = 0; p = virt_caps; *p = '\0'; for ( i = 0; i < 2; i++ ) - if ( (info.capabilities >> i) & 1 ) + if ( (pinfo.capabilities >> i) & 1 ) p += sprintf(p, "%s ", virtcap_names[i]); if ( p != virt_caps ) *(p-1) = '\0'; - max_cpu_id = info.max_cpu_id; - if ( max_cpu_id > MAX_CPU_ID ) - max_cpu_id = MAX_CPU_ID; + return Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}", + "nr_nodes", pinfo.nr_nodes, + "threads_per_core", pinfo.threads_per_core, + "cores_per_socket", pinfo.cores_per_socket, + "sockets_per_node", pinfo.sockets_per_node, + "nr_cpus", pinfo.nr_cpus, + "total_memory", pages_to_kib(pinfo.total_pages), + "free_memory", pages_to_kib(pinfo.free_pages), + "scrub_memory", pages_to_kib(pinfo.scrub_pages), + "cpu_khz", pinfo.cpu_khz, + "hw_caps", cpu_cap, + "virt_caps", virt_caps); +} + +static PyObject *pyxc_topologyinfo(XcObject *self) +{ +#define MAX_CPU_INDEX 255 + xc_topologyinfo_t tinfo; + int i, max_cpu_index; + PyObject *ret_obj; + PyObject *cpu_to_core_obj, *cpu_to_socket_obj, *cpu_to_node_obj; + xc_cpu_to_core_t coremap[MAX_CPU_INDEX + 1]; + xc_cpu_to_socket_t socketmap[MAX_CPU_INDEX + 1]; + xc_cpu_to_node_t nodemap[MAX_CPU_INDEX + 1]; + + + set_xen_guest_handle(tinfo.cpu_to_core, coremap); + set_xen_guest_handle(tinfo.cpu_to_socket, socketmap); + set_xen_guest_handle(tinfo.cpu_to_node, nodemap); + tinfo.max_cpu_index = MAX_CPU_INDEX; + + if ( xc_topologyinfo(self->xc_handle, &tinfo) != 0 ) + return pyxc_error_to_exception(); + + max_cpu_index = tinfo.max_cpu_index; + if ( max_cpu_index > MAX_CPU_INDEX ) + max_cpu_index = MAX_CPU_INDEX; + + /* Construct cpu-to-* lists. */ + cpu_to_core_obj = PyList_New(0); + cpu_to_socket_obj = PyList_New(0); + cpu_to_node_obj = PyList_New(0); + for ( i = 0; i < max_cpu_index; i++ ) + { + PyObject *pyint; + + pyint = PyInt_FromLong(coremap[i]); + PyList_Append(cpu_to_core_obj, pyint); + Py_DECREF(pyint); + + pyint = PyInt_FromLong(socketmap[i]); + PyList_Append(cpu_to_socket_obj, pyint); + Py_DECREF(pyint); + + pyint = PyInt_FromLong(nodemap[i]); + PyList_Append(cpu_to_node_obj, pyint); + Py_DECREF(pyint); + } + + ret_obj = Py_BuildValue("{s:i}", "max_cpu_index", max_cpu_index); + + PyDict_SetItemString(ret_obj, "cpu_to_core", cpu_to_core_obj); + Py_DECREF(cpu_to_core_obj); + + PyDict_SetItemString(ret_obj, "cpu_to_socket", cpu_to_socket_obj); + Py_DECREF(cpu_to_socket_obj); + + PyDict_SetItemString(ret_obj, "cpu_to_node", cpu_to_node_obj); + Py_DECREF(cpu_to_node_obj); + + return ret_obj; +#undef MAX_CPU_INDEX +} + +static PyObject *pyxc_numainfo(XcObject *self) +{ +#define MAX_NODE_INDEX 31 + xc_numainfo_t ninfo; + int i, j, max_node_index; + uint64_t free_heap; + PyObject *ret_obj; + PyObject *node_to_memsize_obj, *node_to_memfree_obj; + PyObject *node_to_dma32_mem_obj, *node_to_node_dist_obj; + xc_node_to_memsize_t node_memsize[MAX_NODE_INDEX + 1]; + xc_node_to_memfree_t node_memfree[MAX_NODE_INDEX + 1]; + xc_node_to_node_dist_t nodes_dist[(MAX_NODE_INDEX * MAX_NODE_INDEX) + 1]; + + set_xen_guest_handle(ninfo.node_to_memsize, node_memsize); + set_xen_guest_handle(ninfo.node_to_memfree, node_memfree); + set_xen_guest_handle(ninfo.node_to_node_distance, nodes_dist); + ninfo.max_node_index = MAX_NODE_INDEX; + if( xc_numainfo(self->xc_handle, &ninfo) != 0 ) + return pyxc_error_to_exception(); + + max_node_index = ninfo.max_node_index; + if ( max_node_index > MAX_NODE_INDEX ) + max_node_index = MAX_NODE_INDEX; /* Construct node-to-* lists. */ - node_to_cpu_obj = PyList_New(0); - node_to_memory_obj = PyList_New(0); + node_to_memsize_obj = PyList_New(0); + node_to_memfree_obj = PyList_New(0); node_to_dma32_mem_obj = PyList_New(0); - for ( i = 0; i <= info.max_node_id; i++ ) + node_to_node_dist_obj = PyList_New(0); + for ( i = 0; i < max_node_index; i++ ) { - int node_exists = 0; PyObject *pyint; - /* CPUs. */ - PyObject *cpus = PyList_New(0); - for ( j = 0; j <= max_cpu_id; j++ ) - { - if ( i != map[j] ) - continue; - pyint = PyInt_FromLong(j); - PyList_Append(cpus, pyint); - Py_DECREF(pyint); - node_exists = 1; - } - PyList_Append(node_to_cpu_obj, cpus); - Py_DECREF(cpus); - - /* Memory. */ - xc_availheap(self->xc_handle, 0, 0, i, &free_heap); - node_exists = node_exists || (free_heap != 0); - pyint = PyInt_FromLong(free_heap / 1024); - PyList_Append(node_to_memory_obj, pyint); + /* Total Memory */ + pyint = PyInt_FromLong(node_memsize[i] >> 20); /* MB */ + PyList_Append(node_to_memsize_obj, pyint); + Py_DECREF(pyint); + + /* Free Memory */ + pyint = PyInt_FromLong(node_memfree[i] >> 20); /* MB */ + PyList_Append(node_to_memfree_obj, pyint); Py_DECREF(pyint); /* DMA memory. */ xc_availheap(self->xc_handle, 0, 32, i, &free_heap); - pyint = PyInt_FromLong(free_heap / 1024); + pyint = PyInt_FromLong(free_heap >> 20); /* MB */ PyList_Append(node_to_dma32_mem_obj, pyint); Py_DECREF(pyint); - if ( node_exists ) - nr_nodes++; - } - - ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}", - "nr_nodes", nr_nodes, - "max_node_id", info.max_node_id, - "max_cpu_id", info.max_cpu_id, - "threads_per_core", info.threads_per_core, - "cores_per_socket", info.cores_per_socket, - "nr_cpus", info.nr_cpus, - "total_memory", pages_to_kib(info.total_pages), - "free_memory", pages_to_kib(info.free_pages), - "scrub_memory", pages_to_kib(info.scrub_pages), - "cpu_khz", info.cpu_khz, - "hw_caps", cpu_cap, - "virt_caps", virt_caps); - PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); - Py_DECREF(node_to_cpu_obj); - PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj); - Py_DECREF(node_to_memory_obj); + /* Node to Node Distance */ + for ( j = 0; j < ninfo.max_node_index; j++ ) + { + pyint = PyInt_FromLong(nodes_dist[(i * ninfo.max_node_index) + j]); + PyList_Append(node_to_node_dist_obj, pyint); + Py_DECREF(pyint); + } + } + + ret_obj = Py_BuildValue("{s:i}", "max_node_index", max_node_index); + + PyDict_SetItemString(ret_obj, "node_memsize", node_to_memsize_obj); + Py_DECREF(node_to_memsize_obj); + + PyDict_SetItemString(ret_obj, "node_memfree", node_to_memfree_obj); + Py_DECREF(node_to_memfree_obj); + PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj); Py_DECREF(node_to_dma32_mem_obj); + + PyDict_SetItemString(ret_obj, "node_to_node_dist", node_to_node_dist_obj); + Py_DECREF(node_to_node_dist_obj); return ret_obj; -#undef MAX_CPU_ID +#undef MAX_NODE_INDEX } static PyObject *pyxc_xeninfo(XcObject *self) @@ -2056,6 +2129,20 @@ static PyMethodDef pyxc_methods[] = { METH_NOARGS, "\n" "Get information about the physical host machine\n" "Returns [dict]: information about the hardware" + " [None]: on failure.\n" }, + + { "topologyinfo", + (PyCFunction)pyxc_topologyinfo, + METH_NOARGS, "\n" + "Get information about the cpu topology on the host machine\n" + "Returns [dict]: information about the cpu topology on host" + " [None]: on failure.\n" }, + + { "numainfo", + (PyCFunction)pyxc_numainfo, + METH_NOARGS, "\n" + "Get NUMA information on the host machine\n" + "Returns [dict]: NUMA information on host" " [None]: on failure.\n" }, { "xeninfo", diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Wed Apr 07 15:44:29 2010 +0100 +++ b/tools/python/xen/xend/XendNode.py Wed Apr 07 16:22:05 2010 +0100 @@ -878,65 +878,70 @@ class XendNode: def list_to_strrange(self,list): return self.format_pairs(self.list_to_rangepairs(list)) - def format_node_to_cpu(self, pinfo): - str='' - whitespace='' + def format_cpu_to_core_socket_node(self, tinfo): try: - node_to_cpu=pinfo['node_to_cpu'] - for i in range(0, pinfo['max_node_id']+1): - str+='%snode%d:%s\n' % (whitespace, - i, - self.list_to_strrange(node_to_cpu[i])) - whitespace='%25s' % '' + nr_cpus=tinfo['max_cpu_index'] + str='\ncpu: core socket node\n' + for i in range(0, nr_cpus): + str+='%3d:%8d %8d %8d\n' % (i, + tinfo['cpu_to_core'][i], + tinfo['cpu_to_socket'][i], + tinfo['cpu_to_node'][i]) except: str='none\n' return str[:-1]; - def format_node_to_memory(self, pinfo, key): - str='' - whitespace='' + + def format_numa_info(self, ninfo): try: - node_to_memory=pinfo[key] - for i in range(0, pinfo['max_node_id']+1): - str+='%snode%d:%d\n' % (whitespace, - i, - node_to_memory[i] / 1024) - whitespace='%25s' % '' + nr_nodes=ninfo['max_node_index'] + str='\nnode: TotalMemory FreeMemory dma32Memory NodeDist:' + for i in range(0, nr_nodes): + str+='%4d ' % i + str+='\n' + for i in range(0, nr_nodes): + str+='%4d: %8dMB %8dMB %8dMB :' % (i, + ninfo['node_memsize'][i], + ninfo['node_memfree'][i], + ninfo['node_to_dma32_mem'][i]) + for j in range(0, nr_nodes): + str+='%4d ' % ninfo['node_to_node_dist'][(i*nr_nodes)+j] + str+='\n' except: str='none\n' return str[:-1]; - def physinfo(self): info = self.xc.physinfo() + tinfo = self.xc.topologyinfo() + ninfo = self.xc.numainfo() info['cpu_mhz'] = info['cpu_khz'] / 1000 # physinfo is in KiB, need it in MiB info['total_memory'] = info['total_memory'] / 1024 info['free_memory'] = info['free_memory'] / 1024 - info['node_to_cpu'] = self.format_node_to_cpu(info) - info['node_to_memory'] = \ - self.format_node_to_memory(info, 'node_to_memory') - info['node_to_dma32_mem'] = \ - self.format_node_to_memory(info, 'node_to_dma32_mem') + + info['cpu_topology'] = \ + self.format_cpu_to_core_socket_node(tinfo) + + info['numa_info'] = \ + self.format_numa_info(ninfo) ITEM_ORDER = ['nr_cpus', 'nr_nodes', 'cores_per_socket', 'threads_per_core', + 'sockets_per_node', 'cpu_mhz', 'hw_caps', 'virt_caps', 'total_memory', 'free_memory', - 'node_to_cpu', - 'node_to_memory', - 'node_to_dma32_mem', - 'max_node_id' + 'cpu_topology', + 'numa_info', ] return [[k, info[k]] for k in ITEM_ORDER] - def pciinfo(self): from xen.xend.server.pciif import get_all_assigned_pci_devices diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Wed Apr 07 15:44:29 2010 +0100 +++ b/tools/python/xen/xend/balloon.py Wed Apr 07 16:22:05 2010 +0100 @@ -184,15 +184,11 @@ def free(need_mem, dominfo): waitscrub = 1 vcpus = dominfo.info['cpus'][0] for vcpu in vcpus: - nodenum = 0 - for node in physinfo['node_to_cpu']: - for cpu in node: - if vcpu == cpu: - if oldnode == -1: - oldnode = nodenum - elif oldnode != nodenum: - waitscrub = 0 - nodenum = nodenum + 1 + nodenum = xc.numainfo()['cpu_to_node'][cpu] + if oldnode == -1: + oldnode = nodenum + elif oldnode != nodenum: + waitscrub = 0 if waitscrub == 1 and scrub_mem > 0: log.debug("wait for scrub %s", scrub_mem) diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Wed Apr 07 15:44:29 2010 +0100 +++ b/xen/arch/x86/sysctl.c Wed Apr 07 16:22:05 2010 +0100 @@ -35,6 +35,8 @@ static long cpu_down_helper(void *data) return cpu_down(cpu); } +extern int __node_distance(int a, int b); + long arch_do_sysctl( struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) { @@ -45,25 +47,22 @@ long arch_do_sysctl( case XEN_SYSCTL_physinfo: { - uint32_t i, max_array_ent; - XEN_GUEST_HANDLE_64(uint32) cpu_to_node_arr; - xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; ret = xsm_physinfo(); if ( ret ) break; - max_array_ent = pi->max_cpu_id; - cpu_to_node_arr = pi->cpu_to_node; memset(pi, 0, sizeof(*pi)); - pi->cpu_to_node = cpu_to_node_arr; pi->threads_per_core = cpus_weight(per_cpu(cpu_sibling_map, 0)); pi->cores_per_socket = cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core; pi->nr_cpus = (u32)num_online_cpus(); + pi->nr_nodes = (u32)num_online_nodes(); + pi->sockets_per_node = pi->nr_cpus / + (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core); pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->scrub_pages = 0; @@ -74,15 +73,56 @@ long arch_do_sysctl( if ( iommu_enabled ) pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm_directio; - pi->max_node_id = last_node(node_online_map); - pi->max_cpu_id = last_cpu(cpu_online_map); - max_array_ent = min_t(uint32_t, max_array_ent, pi->max_cpu_id); + if ( copy_to_guest(u_sysctl, sysctl, 1) ) + ret = -EFAULT; + } + break; + + case XEN_SYSCTL_topologyinfo: + { + uint32_t i, max_cpu_index; + XEN_GUEST_HANDLE_64(uint32) cpu_to_core_arr; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket_arr; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node_arr; + + xen_sysctl_topologyinfo_t *ti = &sysctl->u.topologyinfo; + + max_cpu_index = ti->max_cpu_index; + cpu_to_core_arr = ti->cpu_to_core; + cpu_to_socket_arr = ti->cpu_to_socket; + cpu_to_node_arr = ti->cpu_to_node; + + memset(ti, 0, sizeof(*ti)); + ti->cpu_to_core = cpu_to_core_arr; + ti->cpu_to_socket = cpu_to_socket_arr; + ti->cpu_to_node = cpu_to_node_arr; + + max_cpu_index = min_t(uint32_t, max_cpu_index, num_online_cpus()); + ti->max_cpu_index = max_cpu_index; ret = 0; - if ( !guest_handle_is_null(cpu_to_node_arr) ) - { - for ( i = 0; i <= max_array_ent; i++ ) + for ( i = 0; i < max_cpu_index; i++ ) + { + if ( !guest_handle_is_null(cpu_to_core_arr) ) + { + uint32_t core = cpu_online(i) ? cpu_to_core(i) : ~0u; + if ( copy_to_guest_offset(cpu_to_core_arr, i, &core, 1) ) + { + ret = -EFAULT; + break; + } + } + if ( !guest_handle_is_null(cpu_to_socket_arr) ) + { + uint32_t socket = cpu_online(i) ? cpu_to_socket(i) : ~0u; + if ( copy_to_guest_offset(cpu_to_socket_arr, i, &socket, 1) ) + { + ret = -EFAULT; + break; + } + } + if ( !guest_handle_is_null(cpu_to_node_arr) ) { uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u; if ( copy_to_guest_offset(cpu_to_node_arr, i, &node, 1) ) @@ -92,6 +132,82 @@ long arch_do_sysctl( } } } + + if (ret) + break; + + if ( copy_to_guest(u_sysctl, sysctl, 1) ) + ret = -EFAULT; + } + break; + + case XEN_SYSCTL_numainfo: + { + uint32_t i, max_node_index; + XEN_GUEST_HANDLE_64(uint64) node_to_memsize_arr; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree_arr; + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance_arr; + + xen_sysctl_numainfo_t *ni = &sysctl->u.numainfo; + + max_node_index = ni->max_node_index; + node_to_memsize_arr = ni->node_to_memsize; + node_to_memfree_arr = ni->node_to_memfree; + node_to_node_distance_arr = ni->node_to_node_distance; + + memset(ni, 0, sizeof(*ni)); + ni->node_to_memsize = node_to_memsize_arr; + ni->node_to_memfree = node_to_memfree_arr; + ni->node_to_node_distance = node_to_node_distance_arr; + + max_node_index = min_t(uint32_t, max_node_index, num_online_nodes()); + ni->max_node_index = max_node_index; + + ret = 0; + + for ( i = 0; i < max_node_index; i++ ) + { + if ( !guest_handle_is_null(node_to_memsize_arr) ) + { + uint64_t memsize = node_online(i) ? + node_spanned_pages(i) << PAGE_SHIFT : 0ul; + if ( copy_to_guest_offset(node_to_memsize_arr, i, &memsize, 1) ) + { + ret = -EFAULT; + break; + } + } + if ( !guest_handle_is_null(node_to_memfree_arr) ) + { + uint64_t memfree = node_online(i) ? + avail_node_heap_pages(i) << PAGE_SHIFT : 0ul; + if ( copy_to_guest_offset(node_to_memfree_arr, i, &memfree, 1) ) + { + ret = -EFAULT; + break; + } + } + + if ( !guest_handle_is_null(node_to_node_distance_arr) ) + { + int j; + for ( j = 0; j < max_node_index; j++) + { + uint32_t distance = ~0u; + if (node_online(i) && node_online (j)) + distance = __node_distance(i, j); + + if ( copy_to_guest_offset(node_to_node_distance_arr, + (i * max_node_index + j), &distance, 1) ) + { + ret = -EFAULT; + break; + } + } + } + } + if (ret) + break; if ( copy_to_guest(u_sysctl, sysctl, 1) ) ret = -EFAULT; diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Wed Apr 07 15:44:29 2010 +0100 +++ b/xen/common/page_alloc.c Wed Apr 07 16:22:05 2010 +0100 @@ -1256,6 +1256,12 @@ unsigned long avail_domheap_pages(void) -1); } +unsigned long avail_node_heap_pages(unsigned int nodeid) +{ + return avail_heap_pages(MEMZONE_XEN, NR_ZONES -1, nodeid); +} + + static void pagealloc_info(unsigned char key) { unsigned int zone = MEMZONE_XEN; diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/asm-x86/numa.h --- a/xen/include/asm-x86/numa.h Wed Apr 07 15:44:29 2010 +0100 +++ b/xen/include/asm-x86/numa.h Wed Apr 07 16:22:05 2010 +0100 @@ -73,6 +73,7 @@ static inline __attribute__((pure)) int #define NODE_DATA(nid) (&(node_data[nid])) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Wed Apr 07 15:44:29 2010 +0100 +++ b/xen/include/public/sysctl.h Wed Apr 07 16:22:05 2010 +0100 @@ -34,7 +34,7 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000007 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000008 /* * Read console content from Xen buffer ring. @@ -93,29 +93,14 @@ struct xen_sysctl_physinfo { struct xen_sysctl_physinfo { uint32_t threads_per_core; uint32_t cores_per_socket; + uint32_t sockets_per_node; uint32_t nr_cpus; - uint32_t max_node_id; + uint32_t nr_nodes; uint32_t cpu_khz; uint64_aligned_t total_pages; uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; - - /* - * IN: maximum addressable entry in the caller-provided cpu_to_node array. - * OUT: largest cpu identifier in the system. - * If OUT is greater than IN then the cpu_to_node array is truncated! - */ - uint32_t max_cpu_id; - /* - * If not NULL, this array is filled with node identifier for each cpu. - * If a cpu has no node information (e.g., cpu not present) then the - * sentinel value ~0u is written. - * The size of this array is specified by the caller in @max_cpu_id. - * If the actual @max_cpu_id is smaller than the array then the trailing - * elements of the array will not be written by the sysctl. - */ - XEN_GUEST_HANDLE_64(uint32) cpu_to_node; /* XEN_SYSCTL_PHYSCAP_??? */ uint32_t capabilities; @@ -491,6 +476,73 @@ typedef struct xen_sysctl_lockprof_op xe typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); +#define XEN_SYSCTL_topologyinfo 16 +struct xen_sysctl_topologyinfo { + + /* + * IN: maximum addressable entry in the caller-provided cpu_to_core, + * cpu_to_socket & cpu_to_node arrays. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the cpu_to_node array is truncated! + */ + uint32_t max_cpu_index; + + /* + * If not NULL, this array is filled with core/socket/node identifier for + * each cpu. + * If a cpu has no core/socket/node information (e.g., cpu not present) + * then the sentinel value ~0u is written. + * The size of this array is specified by the caller in @max_cpu_index. + * If the actual @max_cpu_index is smaller than the array then the trailing + * elements of the array will not be written by the sysctl. + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_core; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; /* node_number */ + +}; +typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); + +#define XEN_SYSCTL_numainfo 17 +struct xen_sysctl_numainfo { + /* + * IN: maximum addressable entry in the caller-provided node_numbers, + * node_to_memsize & node_to_memfree arrays. + * OUT: largest possible node index for the system. + * If OUT is greater than IN then these arrays are truncated! + */ + uint32_t max_node_index; + + /* For node_to_memsize & node_to_memfree arrays, the + * entry with same index corrosponds to the same node. + * If a entry has no node information (e.g., node not present) then the + * sentinel value ~0u is written for_node_number, and value 0u is written + * for node_to_memsize & node_to_memfree. + * The size of this array is specified by the caller in @max_node_index. + * If the actual @max_node_index is smaller than the array then the + * trailing elements of the array will not be written by the sysctl. + */ + XEN_GUEST_HANDLE_64(uint64) node_to_memsize; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + + + /* node_to_node_distance is array of size (nr_nodes * nr_nodes) listing + * memory access distances between nodes. i'th entry in the array + * specifies distance between node (i / nr_nodes) & node (i % nr_nodes) + * If a entry has no node distance information (e.g., node not present) + * then the sentinel value ~0u is written. + * The size of this array is specified by the caller in + * @max_node_distance_index. If the max_node_index*max_node_index is + * smaller than the array then the trailing elements of the array will + * not be written by the sysctl. + */ + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; +}; +typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); + + struct xen_sysctl { uint32_t cmd; uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ @@ -498,6 +550,8 @@ struct xen_sysctl { struct xen_sysctl_readconsole readconsole; struct xen_sysctl_tbuf_op tbuf_op; struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_numainfo numainfo; struct xen_sysctl_sched_id sched_id; struct xen_sysctl_perfc_op perfc_op; struct xen_sysctl_getdomaininfolist getdomaininfolist; diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/xen/mm.h --- a/xen/include/xen/mm.h Wed Apr 07 15:44:29 2010 +0100 +++ b/xen/include/xen/mm.h Wed Apr 07 16:22:05 2010 +0100 @@ -57,6 +57,7 @@ unsigned long avail_domheap_pages_region unsigned long avail_domheap_pages_region( unsigned int node, unsigned int min_width, unsigned int max_width); unsigned long avail_domheap_pages(void); +unsigned long avail_node_heap_pages(unsigned int); #define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f)) #define free_domheap_page(p) (free_domheap_pages(p,0)) unsigned int online_page(unsigned long mfn, uint32_t *status); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |