[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 1/5] sysctl: Make XEN_SYSCTL_numainfo a little more efficient
A number of changes to XEN_SYSCTL_numainfo interface: * Make sysctl NUMA topology query use fewer copies by combining some fields into a single structure and copying distances for each node in a single copy. * NULL meminfo and distance handles are a request for maximum number of nodes (num_nodes). If those handles are valid and num_nodes is is smaller than the number of nodes in the system then -ENOBUFS is returned (and correct num_nodes is provided) * Instead of using max_node_index for passing number of nodes keep this value in num_nodes: almost all uses of max_node_index required adding or subtracting one to eventually get to number of nodes anyway. * Replace INVALID_NUMAINFO_ID with XEN_INVALID_MEM_SZ and add XEN_INVALID_NODE_DIST. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx> --- Changes in v7: * Allow one of arguments to NUMA info sysctls to be NULL, in which case only the non-NULL buffer will be filled in by hypervisor (changes in sysctl.[ch]) tools/libxl/libxl.c | 66 +++++++++++++---------------- tools/python/xen/lowlevel/xc/xc.c | 58 ++++++++++++-------------- xen/common/sysctl.c | 84 +++++++++++++++++++++++-------------- xen/include/public/sysctl.h | 54 ++++++++++++++---------- 4 files changed, 141 insertions(+), 121 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 511eef1..2ff46b4 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -5156,65 +5156,59 @@ libxl_numainfo *libxl_get_numainfo(libxl_ctx *ctx, int *nr) { GC_INIT(ctx); xc_numainfo_t ninfo; - DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, memsize); - DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, memfree); - DECLARE_HYPERCALL_BUFFER(uint32_t, node_dists); + DECLARE_HYPERCALL_BUFFER(xen_sysctl_meminfo_t, meminfo); + DECLARE_HYPERCALL_BUFFER(uint32_t, distance); libxl_numainfo *ret = NULL; - int i, j, max_nodes; + int i, j; - max_nodes = libxl_get_max_nodes(ctx); - if (max_nodes < 0) - { + set_xen_guest_handle(ninfo.meminfo, HYPERCALL_BUFFER_NULL); + set_xen_guest_handle(ninfo.distance, HYPERCALL_BUFFER_NULL); + if (xc_numainfo(ctx->xch, &ninfo) != 0) { LIBXL__LOG(ctx, XTL_ERROR, "Unable to determine number of NODES"); ret = NULL; goto out; } - memsize = xc_hypercall_buffer_alloc - (ctx->xch, memsize, sizeof(*memsize) * max_nodes); - memfree = xc_hypercall_buffer_alloc - (ctx->xch, memfree, sizeof(*memfree) * max_nodes); - node_dists = xc_hypercall_buffer_alloc - (ctx->xch, node_dists, sizeof(*node_dists) * max_nodes * max_nodes); - if ((memsize == NULL) || (memfree == NULL) || (node_dists == NULL)) { + meminfo = xc_hypercall_buffer_alloc(ctx->xch, meminfo, + sizeof(*meminfo) * ninfo.num_nodes); + distance = xc_hypercall_buffer_alloc(ctx->xch, distance, + sizeof(*distance) * + ninfo.num_nodes * ninfo.num_nodes); + if ((meminfo == NULL) || (distance == NULL)) { LIBXL__LOG_ERRNOVAL(ctx, XTL_ERROR, ENOMEM, "Unable to allocate hypercall arguments"); goto fail; } - set_xen_guest_handle(ninfo.node_to_memsize, memsize); - set_xen_guest_handle(ninfo.node_to_memfree, memfree); - set_xen_guest_handle(ninfo.node_to_node_distance, node_dists); - ninfo.max_node_index = max_nodes - 1; + set_xen_guest_handle(ninfo.meminfo, meminfo); + set_xen_guest_handle(ninfo.distance, distance); if (xc_numainfo(ctx->xch, &ninfo) != 0) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "getting numainfo"); goto fail; } - if (ninfo.max_node_index < max_nodes - 1) - max_nodes = ninfo.max_node_index + 1; + *nr = ninfo.num_nodes; - *nr = max_nodes; + ret = libxl__zalloc(NOGC, sizeof(libxl_numainfo) * ninfo.num_nodes); + for (i = 0; i < ninfo.num_nodes; i++) + ret[i].dists = libxl__calloc(NOGC, ninfo.num_nodes, sizeof(*distance)); - ret = libxl__zalloc(NOGC, sizeof(libxl_numainfo) * max_nodes); - for (i = 0; i < max_nodes; i++) - ret[i].dists = libxl__calloc(NOGC, max_nodes, sizeof(*node_dists)); - - for (i = 0; i < max_nodes; i++) { -#define V(mem, i) (mem[i] == INVALID_NUMAINFO_ID) ? \ - LIBXL_NUMAINFO_INVALID_ENTRY : mem[i] - ret[i].size = V(memsize, i); - ret[i].free = V(memfree, i); - ret[i].num_dists = max_nodes; - for (j = 0; j < ret[i].num_dists; j++) - ret[i].dists[j] = V(node_dists, i * max_nodes + j); + for (i = 0; i < ninfo.num_nodes; i++) { +#define V(val, invalid) (val == invalid) ? \ + LIBXL_NUMAINFO_INVALID_ENTRY : val + ret[i].size = V(meminfo[i].memsize, XEN_INVALID_MEM_SZ); + ret[i].free = V(meminfo[i].memfree, XEN_INVALID_MEM_SZ); + ret[i].num_dists = ninfo.num_nodes; + for (j = 0; j < ret[i].num_dists; j++) { + unsigned idx = i * ninfo.num_nodes + j; + ret[i].dists[j] = V(distance[idx], XEN_INVALID_NODE_DIST); + } #undef V } fail: - xc_hypercall_buffer_free(ctx->xch, memsize); - xc_hypercall_buffer_free(ctx->xch, memfree); - xc_hypercall_buffer_free(ctx->xch, node_dists); + xc_hypercall_buffer_free(ctx->xch, meminfo); + xc_hypercall_buffer_free(ctx->xch, distance); out: GC_FREE; diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index 5e81c4a..ba66d55 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -1297,55 +1297,52 @@ out: static PyObject *pyxc_numainfo(XcObject *self) { -#define MAX_NODE_INDEX 31 xc_numainfo_t ninfo = { 0 }; - int i, j, max_node_index; + unsigned i, j; uint64_t free_heap; PyObject *ret_obj = NULL, *node_to_node_dist_list_obj; PyObject *node_to_memsize_obj, *node_to_memfree_obj; PyObject *node_to_dma32_mem_obj, *node_to_node_dist_obj; - DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, node_memsize); - DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, node_memfree); - DECLARE_HYPERCALL_BUFFER(xc_node_to_node_dist_t, nodes_dist); + DECLARE_HYPERCALL_BUFFER(xen_sysctl_meminfo_t, meminfo); + DECLARE_HYPERCALL_BUFFER(uint32_t, distance); - node_memsize = xc_hypercall_buffer_alloc(self->xc_handle, node_memsize, sizeof(*node_memsize)*(MAX_NODE_INDEX+1)); - if ( node_memsize == NULL ) + set_xen_guest_handle(ninfo.meminfo, HYPERCALL_BUFFER_NULL); + set_xen_guest_handle(ninfo.distance, HYPERCALL_BUFFER_NULL); + if ( xc_numainfo(self->xc_handle, &ninfo) != 0 ) goto out; - node_memfree = xc_hypercall_buffer_alloc(self->xc_handle, node_memfree, sizeof(*node_memfree)*(MAX_NODE_INDEX+1)); - if ( node_memfree == NULL ) + + meminfo = xc_hypercall_buffer_alloc(self->xc_handle, meminfo, + sizeof(*meminfo) * ninfo.num_nodes); + if ( meminfo == NULL ) goto out; - nodes_dist = xc_hypercall_buffer_alloc(self->xc_handle, nodes_dist, sizeof(*nodes_dist)*(MAX_NODE_INDEX+1)*(MAX_NODE_INDEX+1)); - if ( nodes_dist == NULL ) + distance = xc_hypercall_buffer_alloc(self->xc_handle, distance, + sizeof(*distance) * + ninfo.num_nodes * ninfo.num_nodes); + if ( distance == NULL ) goto out; - set_xen_guest_handle(ninfo.node_to_memsize, node_memsize); - set_xen_guest_handle(ninfo.node_to_memfree, node_memfree); - set_xen_guest_handle(ninfo.node_to_node_distance, nodes_dist); - ninfo.max_node_index = MAX_NODE_INDEX; - + set_xen_guest_handle(ninfo.meminfo, meminfo); + set_xen_guest_handle(ninfo.distance, distance); if ( xc_numainfo(self->xc_handle, &ninfo) != 0 ) goto out; - max_node_index = ninfo.max_node_index; - if ( max_node_index > MAX_NODE_INDEX ) - max_node_index = MAX_NODE_INDEX; - /* Construct node-to-* lists. */ node_to_memsize_obj = PyList_New(0); node_to_memfree_obj = PyList_New(0); node_to_dma32_mem_obj = PyList_New(0); node_to_node_dist_list_obj = PyList_New(0); - for ( i = 0; i <= max_node_index; i++ ) + for ( i = 0; i < ninfo.num_nodes; i++ ) { PyObject *pyint; + unsigned invalid_node; /* Total Memory */ - pyint = PyInt_FromLong(node_memsize[i] >> 20); /* MB */ + pyint = PyInt_FromLong(meminfo[i].memsize >> 20); /* MB */ PyList_Append(node_to_memsize_obj, pyint); Py_DECREF(pyint); /* Free Memory */ - pyint = PyInt_FromLong(node_memfree[i] >> 20); /* MB */ + pyint = PyInt_FromLong(meminfo[i].memfree >> 20); /* MB */ PyList_Append(node_to_memfree_obj, pyint); Py_DECREF(pyint); @@ -1357,10 +1354,11 @@ static PyObject *pyxc_numainfo(XcObject *self) /* Node to Node Distance */ node_to_node_dist_obj = PyList_New(0); - for ( j = 0; j <= max_node_index; j++ ) + invalid_node = (meminfo[i].memsize == XEN_INVALID_MEM_SZ); + for ( j = 0; j < ninfo.num_nodes; j++ ) { - uint32_t dist = nodes_dist[i*(max_node_index+1) + j]; - if ( dist == ~0u ) + uint32_t dist = distance[i * ninfo.num_nodes + j]; + if ( invalid_node || (dist == XEN_INVALID_NODE_DIST) ) { PyList_Append(node_to_node_dist_obj, Py_None); } @@ -1375,7 +1373,7 @@ static PyObject *pyxc_numainfo(XcObject *self) Py_DECREF(node_to_node_dist_obj); } - ret_obj = Py_BuildValue("{s:i}", "max_node_index", max_node_index); + ret_obj = Py_BuildValue("{s:i}", "max_node_index", ninfo.num_nodes + 1); PyDict_SetItemString(ret_obj, "node_memsize", node_to_memsize_obj); Py_DECREF(node_to_memsize_obj); @@ -1391,11 +1389,9 @@ static PyObject *pyxc_numainfo(XcObject *self) Py_DECREF(node_to_node_dist_list_obj); out: - xc_hypercall_buffer_free(self->xc_handle, node_memsize); - xc_hypercall_buffer_free(self->xc_handle, node_memfree); - xc_hypercall_buffer_free(self->xc_handle, nodes_dist); + xc_hypercall_buffer_free(self->xc_handle, meminfo); + xc_hypercall_buffer_free(self->xc_handle, distance); return ret_obj ? ret_obj : pyxc_error_to_exception(self->xc_handle); -#undef MAX_NODE_INDEX } static PyObject *pyxc_xeninfo(XcObject *self) diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index 70413cc..b025a90 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -274,54 +274,76 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) case XEN_SYSCTL_numainfo: { - uint32_t i, j, max_node_index, last_online_node; + unsigned int i, j, num_nodes; xen_sysctl_numainfo_t *ni = &op->u.numainfo; + bool_t do_meminfo = !guest_handle_is_null(ni->meminfo); + bool_t do_distance = !guest_handle_is_null(ni->distance); - last_online_node = last_node(node_online_map); - max_node_index = min_t(uint32_t, ni->max_node_index, last_online_node); - ni->max_node_index = last_online_node; + num_nodes = last_node(node_online_map) + 1; - for ( i = 0; i <= max_node_index; i++ ) + if ( do_meminfo || do_distance ) { - if ( !guest_handle_is_null(ni->node_to_memsize) ) + if ( ni->num_nodes < num_nodes ) { - uint64_t memsize = node_online(i) ? - node_spanned_pages(i) << PAGE_SHIFT : 0ul; - if ( copy_to_guest_offset(ni->node_to_memsize, i, &memsize, 1) ) - break; - } - if ( !guest_handle_is_null(ni->node_to_memfree) ) - { - uint64_t memfree = node_online(i) ? - avail_node_heap_pages(i) << PAGE_SHIFT : 0ul; - if ( copy_to_guest_offset(ni->node_to_memfree, i, &memfree, 1) ) - break; + ret = -ENOBUFS; + i = num_nodes; } + else + i = 0; - if ( !guest_handle_is_null(ni->node_to_node_distance) ) + for ( ; i < num_nodes; i++ ) { - for ( j = 0; j <= max_node_index; j++) + xen_sysctl_meminfo_t meminfo; + static uint32_t distance[MAX_NUMNODES]; + + if ( do_meminfo ) { - uint32_t distance = ~0u; - if ( node_online(i) && node_online(j) ) + if ( node_online(i) ) + { + meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT; + meminfo.memfree = avail_node_heap_pages(i) << PAGE_SHIFT; + } + else + meminfo.memsize = meminfo.memfree = XEN_INVALID_MEM_SZ; + + if ( copy_to_guest_offset(ni->meminfo, i, &meminfo, 1) ) { - u8 d = __node_distance(i, j); + ret = -EFAULT; + break; + } + } - if ( d != NUMA_NO_DISTANCE ) - distance = d; + if ( do_distance ) + { + for ( j = 0; j < num_nodes; j++ ) + { + distance[j] = __node_distance(i, j); + if ( distance[j] == NUMA_NO_DISTANCE ) + distance[j] = XEN_INVALID_NODE_DIST; } - if ( copy_to_guest_offset( - ni->node_to_node_distance, - i*(max_node_index+1) + j, &distance, 1) ) + + if ( copy_to_guest_offset(ni->distance, i * num_nodes, + distance, num_nodes) ) + { + ret = -EFAULT; break; + } } - if ( j <= max_node_index ) - break; } } + else + i = num_nodes; - ret = ((i <= max_node_index) || copy_to_guest(u_sysctl, op, 1)) - ? -EFAULT : 0; + if ( (!ret || (ret == -ENOBUFS)) && (ni->num_nodes != i) ) + { + ni->num_nodes = i; + if ( __copy_field_to_guest(u_sysctl, op, + u.numainfo.num_nodes) ) + { + ret = -EFAULT; + break; + } + } } break; diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 711441f..021d505 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -494,34 +494,42 @@ typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t); /* XEN_SYSCTL_numainfo */ -#define INVALID_NUMAINFO_ID (~0U) +#define XEN_INVALID_MEM_SZ (~0U) +#define XEN_INVALID_NODE_DIST (~0U) + +struct xen_sysctl_meminfo { + uint64_t memsize; + uint64_t memfree; +}; +typedef struct xen_sysctl_meminfo xen_sysctl_meminfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_meminfo_t); + +/* + * IN: + * - Both 'meminfo' and 'distance' handles being null is a request + * for maximum value of 'num_nodes'. + * - Otherwise it's the number of entries in 'meminfo' and square root + * of number of entries in 'distance' (when corresponding handle is + * non-null) + * + * OUT: + * - If 'num_nodes' is less than the number Xen needs to write, -ENOBUFS shall + * be returned and 'num_nodes' updated to reflect the intended number. + * - On success, 'num_nodes' shall indicate the number of entries written, which + * may be less than the maximum. + */ + struct xen_sysctl_numainfo { - /* - * IN: maximum addressable entry in the caller-provided arrays. - * OUT: largest node identifier in the system. - * If OUT is greater than IN then the arrays are truncated! - */ - uint32_t max_node_index; + uint32_t num_nodes; - /* NB. Entries are 0 if node is not present. */ - XEN_GUEST_HANDLE_64(uint64) node_to_memsize; - XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + XEN_GUEST_HANDLE_64(xen_sysctl_meminfo_t) meminfo; /* - * Array, of size (max_node_index+1)^2, listing memory access distances - * between nodes. If an entry has no node distance information (e.g., node - * not present) then the value ~0u is written. - * - * Note that the array rows must be indexed by multiplying by the minimum - * of the caller-provided max_node_index and the returned value of - * max_node_index. That is, if the largest node index in the system is - * smaller than the caller can handle, a smaller 2-d array is constructed - * within the space provided by the caller. When this occurs, trailing - * space provided by the caller is not modified. If the largest node index - * in the system is larger than the caller can handle, then a 2-d array of - * the maximum size handleable by the caller is constructed. + * Distance between nodes 'i' and 'j' is stored in index 'i*N + j', + * where N is the number of nodes that will be returned in 'num_nodes' + * (i.e. not 'num_nodes' provided by the caller) */ - XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; + XEN_GUEST_HANDLE_64(uint32) distance; }; typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |