|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 4/8] sysctl: Make XEN_SYSCTL_numainfo a little more efficient
A number of changes to XEN_SYSCTL_numainfo interface:
* Make sysctl NUMA topology query use fewer copies by combining some
fields into a single structure and copying distances for each node
in a single copy.
* NULL meminfo and distance handles are a request for maximum number
of nodes (num_nodes). If those handles are valid and num_nodes is
is smaller than the number of nodes in the system then -ENOBUFS is
returned (and correct num_nodes is provided)
* Instead of using max_node_index for passing number of nodes keep this
value in num_nodes: almost all uses of max_node_index required adding
or subtracting one to eventually get to number of nodes anyway.
* Replace INVALID_NUMAINFO_ID with XEN_INVALID_MEM_SZ and add
XEN_INVALID_NODE_DIST.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
Changes in v5:
* Similar to 3/8 patch:
* Make XEN_SYSCTL_numainfo treat passed in NULL handles as requests
for array size (and is too small size passed in results in -ENOBUFS)
* Make distance in xen_sysctl_numainfo a uint32
* On the toolstack side use NULL handles to determine array size in
libxl_get_numainfo() and use dynamic arrays in python's xc.c
tools/libxl/libxl.c | 65 +++++++++++++++-----------------
tools/python/xen/lowlevel/xc/xc.c | 58 +++++++++++++---------------
xen/common/sysctl.c | 75 ++++++++++++++++++++----------------
xen/include/public/sysctl.h | 53 +++++++++++++++-----------
4 files changed, 129 insertions(+), 122 deletions(-)
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index c989abf..0234e36 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -5108,65 +5108,60 @@ libxl_numainfo *libxl_get_numainfo(libxl_ctx *ctx, int
*nr)
{
GC_INIT(ctx);
xc_numainfo_t ninfo;
- DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, memsize);
- DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, memfree);
- DECLARE_HYPERCALL_BUFFER(uint32_t, node_dists);
+ DECLARE_HYPERCALL_BUFFER(xen_sysctl_meminfo_t, meminfo);
+ DECLARE_HYPERCALL_BUFFER(uint32_t, distance);
libxl_numainfo *ret = NULL;
- int i, j, max_nodes;
+ int i, j;
- max_nodes = libxl_get_max_nodes(ctx);
- if (max_nodes < 0)
+ set_xen_guest_handle(ninfo.meminfo, HYPERCALL_BUFFER_NULL);
+ set_xen_guest_handle(ninfo.distance, HYPERCALL_BUFFER_NULL);
+ if ( xc_numainfo(ctx->xch, &ninfo) != 0)
{
LIBXL__LOG(ctx, XTL_ERROR, "Unable to determine number of NODES");
ret = NULL;
goto out;
}
- memsize = xc_hypercall_buffer_alloc
- (ctx->xch, memsize, sizeof(*memsize) * max_nodes);
- memfree = xc_hypercall_buffer_alloc
- (ctx->xch, memfree, sizeof(*memfree) * max_nodes);
- node_dists = xc_hypercall_buffer_alloc
- (ctx->xch, node_dists, sizeof(*node_dists) * max_nodes * max_nodes);
- if ((memsize == NULL) || (memfree == NULL) || (node_dists == NULL)) {
+ meminfo = xc_hypercall_buffer_alloc(ctx->xch, meminfo,
+ sizeof(*meminfo) * ninfo.num_nodes);
+ distance = xc_hypercall_buffer_alloc(ctx->xch, distance,
+ sizeof(*distance) *
+ ninfo.num_nodes * ninfo.num_nodes);
+ if ((meminfo == NULL) || (distance == NULL)) {
LIBXL__LOG_ERRNOVAL(ctx, XTL_ERROR, ENOMEM,
"Unable to allocate hypercall arguments");
goto fail;
}
- set_xen_guest_handle(ninfo.node_to_memsize, memsize);
- set_xen_guest_handle(ninfo.node_to_memfree, memfree);
- set_xen_guest_handle(ninfo.node_to_node_distance, node_dists);
- ninfo.max_node_index = max_nodes - 1;
+ set_xen_guest_handle(ninfo.meminfo, meminfo);
+ set_xen_guest_handle(ninfo.distance, distance);
if (xc_numainfo(ctx->xch, &ninfo) != 0) {
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "getting numainfo");
goto fail;
}
- if (ninfo.max_node_index < max_nodes - 1)
- max_nodes = ninfo.max_node_index + 1;
+ *nr = ninfo.num_nodes;
- *nr = max_nodes;
+ ret = libxl__zalloc(NOGC, sizeof(libxl_numainfo) * ninfo.num_nodes);
+ for (i = 0; i < ninfo.num_nodes; i++)
+ ret[i].dists = libxl__calloc(NOGC, ninfo.num_nodes, sizeof(*distance));
- ret = libxl__zalloc(NOGC, sizeof(libxl_numainfo) * max_nodes);
- for (i = 0; i < max_nodes; i++)
- ret[i].dists = libxl__calloc(NOGC, max_nodes, sizeof(*node_dists));
-
- for (i = 0; i < max_nodes; i++) {
-#define V(mem, i) (mem[i] == INVALID_NUMAINFO_ID) ? \
- LIBXL_NUMAINFO_INVALID_ENTRY : mem[i]
- ret[i].size = V(memsize, i);
- ret[i].free = V(memfree, i);
- ret[i].num_dists = max_nodes;
- for (j = 0; j < ret[i].num_dists; j++)
- ret[i].dists[j] = V(node_dists, i * max_nodes + j);
+ for (i = 0; i < ninfo.num_nodes; i++) {
+#define V(val, invalid) (val == invalid) ? \
+ LIBXL_NUMAINFO_INVALID_ENTRY : val
+ ret[i].size = V(meminfo[i].memsize, XEN_INVALID_MEM_SZ);
+ ret[i].free = V(meminfo[i].memfree, XEN_INVALID_MEM_SZ);
+ ret[i].num_dists = ninfo.num_nodes;
+ for (j = 0; j < ret[i].num_dists; j++) {
+ unsigned idx = i * ninfo.num_nodes + j;
+ ret[i].dists[j] = V(distance[idx], XEN_INVALID_NODE_DIST);
+ }
#undef V
}
fail:
- xc_hypercall_buffer_free(ctx->xch, memsize);
- xc_hypercall_buffer_free(ctx->xch, memfree);
- xc_hypercall_buffer_free(ctx->xch, node_dists);
+ xc_hypercall_buffer_free(ctx->xch, meminfo);
+ xc_hypercall_buffer_free(ctx->xch, distance);
out:
GC_FREE;
diff --git a/tools/python/xen/lowlevel/xc/xc.c
b/tools/python/xen/lowlevel/xc/xc.c
index 5e81c4a..ba66d55 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1297,55 +1297,52 @@ out:
static PyObject *pyxc_numainfo(XcObject *self)
{
-#define MAX_NODE_INDEX 31
xc_numainfo_t ninfo = { 0 };
- int i, j, max_node_index;
+ unsigned i, j;
uint64_t free_heap;
PyObject *ret_obj = NULL, *node_to_node_dist_list_obj;
PyObject *node_to_memsize_obj, *node_to_memfree_obj;
PyObject *node_to_dma32_mem_obj, *node_to_node_dist_obj;
- DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, node_memsize);
- DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, node_memfree);
- DECLARE_HYPERCALL_BUFFER(xc_node_to_node_dist_t, nodes_dist);
+ DECLARE_HYPERCALL_BUFFER(xen_sysctl_meminfo_t, meminfo);
+ DECLARE_HYPERCALL_BUFFER(uint32_t, distance);
- node_memsize = xc_hypercall_buffer_alloc(self->xc_handle, node_memsize,
sizeof(*node_memsize)*(MAX_NODE_INDEX+1));
- if ( node_memsize == NULL )
+ set_xen_guest_handle(ninfo.meminfo, HYPERCALL_BUFFER_NULL);
+ set_xen_guest_handle(ninfo.distance, HYPERCALL_BUFFER_NULL);
+ if ( xc_numainfo(self->xc_handle, &ninfo) != 0 )
goto out;
- node_memfree = xc_hypercall_buffer_alloc(self->xc_handle, node_memfree,
sizeof(*node_memfree)*(MAX_NODE_INDEX+1));
- if ( node_memfree == NULL )
+
+ meminfo = xc_hypercall_buffer_alloc(self->xc_handle, meminfo,
+ sizeof(*meminfo) * ninfo.num_nodes);
+ if ( meminfo == NULL )
goto out;
- nodes_dist = xc_hypercall_buffer_alloc(self->xc_handle, nodes_dist,
sizeof(*nodes_dist)*(MAX_NODE_INDEX+1)*(MAX_NODE_INDEX+1));
- if ( nodes_dist == NULL )
+ distance = xc_hypercall_buffer_alloc(self->xc_handle, distance,
+ sizeof(*distance) *
+ ninfo.num_nodes * ninfo.num_nodes);
+ if ( distance == NULL )
goto out;
- set_xen_guest_handle(ninfo.node_to_memsize, node_memsize);
- set_xen_guest_handle(ninfo.node_to_memfree, node_memfree);
- set_xen_guest_handle(ninfo.node_to_node_distance, nodes_dist);
- ninfo.max_node_index = MAX_NODE_INDEX;
-
+ set_xen_guest_handle(ninfo.meminfo, meminfo);
+ set_xen_guest_handle(ninfo.distance, distance);
if ( xc_numainfo(self->xc_handle, &ninfo) != 0 )
goto out;
- max_node_index = ninfo.max_node_index;
- if ( max_node_index > MAX_NODE_INDEX )
- max_node_index = MAX_NODE_INDEX;
-
/* Construct node-to-* lists. */
node_to_memsize_obj = PyList_New(0);
node_to_memfree_obj = PyList_New(0);
node_to_dma32_mem_obj = PyList_New(0);
node_to_node_dist_list_obj = PyList_New(0);
- for ( i = 0; i <= max_node_index; i++ )
+ for ( i = 0; i < ninfo.num_nodes; i++ )
{
PyObject *pyint;
+ unsigned invalid_node;
/* Total Memory */
- pyint = PyInt_FromLong(node_memsize[i] >> 20); /* MB */
+ pyint = PyInt_FromLong(meminfo[i].memsize >> 20); /* MB */
PyList_Append(node_to_memsize_obj, pyint);
Py_DECREF(pyint);
/* Free Memory */
- pyint = PyInt_FromLong(node_memfree[i] >> 20); /* MB */
+ pyint = PyInt_FromLong(meminfo[i].memfree >> 20); /* MB */
PyList_Append(node_to_memfree_obj, pyint);
Py_DECREF(pyint);
@@ -1357,10 +1354,11 @@ static PyObject *pyxc_numainfo(XcObject *self)
/* Node to Node Distance */
node_to_node_dist_obj = PyList_New(0);
- for ( j = 0; j <= max_node_index; j++ )
+ invalid_node = (meminfo[i].memsize == XEN_INVALID_MEM_SZ);
+ for ( j = 0; j < ninfo.num_nodes; j++ )
{
- uint32_t dist = nodes_dist[i*(max_node_index+1) + j];
- if ( dist == ~0u )
+ uint32_t dist = distance[i * ninfo.num_nodes + j];
+ if ( invalid_node || (dist == XEN_INVALID_NODE_DIST) )
{
PyList_Append(node_to_node_dist_obj, Py_None);
}
@@ -1375,7 +1373,7 @@ static PyObject *pyxc_numainfo(XcObject *self)
Py_DECREF(node_to_node_dist_obj);
}
- ret_obj = Py_BuildValue("{s:i}", "max_node_index", max_node_index);
+ ret_obj = Py_BuildValue("{s:i}", "max_node_index", ninfo.num_nodes + 1);
PyDict_SetItemString(ret_obj, "node_memsize", node_to_memsize_obj);
Py_DECREF(node_to_memsize_obj);
@@ -1391,11 +1389,9 @@ static PyObject *pyxc_numainfo(XcObject *self)
Py_DECREF(node_to_node_dist_list_obj);
out:
- xc_hypercall_buffer_free(self->xc_handle, node_memsize);
- xc_hypercall_buffer_free(self->xc_handle, node_memfree);
- xc_hypercall_buffer_free(self->xc_handle, nodes_dist);
+ xc_hypercall_buffer_free(self->xc_handle, meminfo);
+ xc_hypercall_buffer_free(self->xc_handle, distance);
return ret_obj ? ret_obj : pyxc_error_to_exception(self->xc_handle);
-#undef MAX_NODE_INDEX
}
static PyObject *pyxc_xeninfo(XcObject *self)
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index b2cfe11..acaeeb2 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -274,53 +274,62 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t)
u_sysctl)
case XEN_SYSCTL_numainfo:
{
- uint32_t i, j, max_node_index, last_online_node;
+ uint32_t i, j, num_nodes;
xen_sysctl_numainfo_t *ni = &op->u.numainfo;
- last_online_node = last_node(node_online_map);
- max_node_index = min_t(uint32_t, ni->max_node_index, last_online_node);
- ni->max_node_index = last_online_node;
+ num_nodes = last_node(node_online_map) + 1;
- for ( i = 0; i <= max_node_index; i++ )
+ if ( !guest_handle_is_null(ni->meminfo) &&
+ !guest_handle_is_null(ni->distance) )
{
- if ( !guest_handle_is_null(ni->node_to_memsize) )
+ if ( ni->num_nodes < num_nodes )
{
- uint64_t memsize = node_online(i) ?
- node_spanned_pages(i) << PAGE_SHIFT : 0ul;
- if ( copy_to_guest_offset(ni->node_to_memsize, i, &memsize, 1)
)
- break;
- }
- if ( !guest_handle_is_null(ni->node_to_memfree) )
- {
- uint64_t memfree = node_online(i) ?
- avail_node_heap_pages(i) << PAGE_SHIFT :
0ul;
- if ( copy_to_guest_offset(ni->node_to_memfree, i, &memfree, 1)
)
- break;
+ ret = -ENOBUFS;
+ i = num_nodes;
}
- if ( !guest_handle_is_null(ni->node_to_node_distance) )
+ for ( i = 0; i < num_nodes; i++ )
{
- for ( j = 0; j <= max_node_index; j++)
+ xen_sysctl_meminfo_t meminfo;
+ uint32_t distance[MAX_NUMNODES];
+
+ if ( node_online(i) )
{
- uint32_t distance = ~0u;
- if ( node_online(i) && node_online(j) )
- {
- u8 d = __node_distance(i, j);
- if ( d != NUMA_NO_DISTANCE )
- distance = d;
- }
- if ( copy_to_guest_offset(
- ni->node_to_node_distance,
- i*(max_node_index+1) + j, &distance, 1) )
- break;
+ meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT;
+ meminfo.memfree = avail_node_heap_pages(i) << PAGE_SHIFT;
}
- if ( j <= max_node_index )
+ else
+ meminfo.memsize = meminfo.memfree = XEN_INVALID_MEM_SZ;
+
+ for ( j = 0; j < num_nodes; j++ )
+ {
+ distance[j] = __node_distance(i, j);
+ if ( distance[j] == NUMA_NO_DISTANCE )
+ distance[j] = XEN_INVALID_NODE_DIST;
+ }
+
+ if ( copy_to_guest_offset(ni->distance, i * num_nodes,
+ distance, num_nodes) ||
+ copy_to_guest_offset(ni->meminfo, i, &meminfo, 1) )
+ {
+ ret = -EFAULT;
break;
+ }
}
}
+ else
+ i = num_nodes;
- ret = ((i <= max_node_index) || copy_to_guest(u_sysctl, op, 1))
- ? -EFAULT : 0;
+ if ( (!ret || (ret = -ENOBUFS)) && (ni->num_nodes != i) )
+ {
+ ni->num_nodes = i;
+ if ( __copy_field_to_guest(u_sysctl, op,
+ u.numainfo.num_nodes) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+ }
}
break;
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 711441f..7e0d5fe 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -494,34 +494,41 @@ typedef struct xen_sysctl_cputopoinfo
xen_sysctl_cputopoinfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t);
/* XEN_SYSCTL_numainfo */
-#define INVALID_NUMAINFO_ID (~0U)
+#define XEN_INVALID_MEM_SZ (~0U)
+#define XEN_INVALID_NODE_DIST (~0U)
+
+struct xen_sysctl_meminfo {
+ uint64_t memsize;
+ uint64_t memfree;
+};
+typedef struct xen_sysctl_meminfo xen_sysctl_meminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_meminfo_t);
+
+/*
+ * IN:
+ * - NULL 'meminfo' and 'distance' handles is a request for maximun
+ * 'num_nodes'.
+ * - otherwise it's the number of entries in 'meminfo' (and square root
+ * of number of entries in 'distance')
+ *
+ * OUT:
+ * - If 'num_nodes' is less than the number Xen needs to write, -ENOBUFS shall
+ * be returned and 'num_nodes' updated to reflect the intended number.
+ * - On success, 'num_nodes' shall indicate the number of entries written,
which
+ * may be less than the maximum.
+ */
+
struct xen_sysctl_numainfo {
- /*
- * IN: maximum addressable entry in the caller-provided arrays.
- * OUT: largest node identifier in the system.
- * If OUT is greater than IN then the arrays are truncated!
- */
- uint32_t max_node_index;
+ uint32_t num_nodes;
- /* NB. Entries are 0 if node is not present. */
- XEN_GUEST_HANDLE_64(uint64) node_to_memsize;
- XEN_GUEST_HANDLE_64(uint64) node_to_memfree;
+ XEN_GUEST_HANDLE_64(xen_sysctl_meminfo_t) meminfo;
/*
- * Array, of size (max_node_index+1)^2, listing memory access distances
- * between nodes. If an entry has no node distance information (e.g., node
- * not present) then the value ~0u is written.
- *
- * Note that the array rows must be indexed by multiplying by the minimum
- * of the caller-provided max_node_index and the returned value of
- * max_node_index. That is, if the largest node index in the system is
- * smaller than the caller can handle, a smaller 2-d array is constructed
- * within the space provided by the caller. When this occurs, trailing
- * space provided by the caller is not modified. If the largest node index
- * in the system is larger than the caller can handle, then a 2-d array of
- * the maximum size handleable by the caller is constructed.
+ * Distance between nodes 'i' and 'j' is stored in index 'i*N + j',
+ * where N is the number of nodes that will be returned in 'num_nodes'
+ * (i.e. not 'num_nodes' provided by the caller)
*/
- XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;
+ XEN_GUEST_HANDLE_64(uint32) distance;
};
typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |