[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 1/8] xen: vnuma topoplogy and subop hypercalls
Defines interface, structures and hypercalls for toolstack to build vnuma topology and for guests that wish to retreive it. Two subop hypercalls introduced by patch: XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain and XENMEM_get_vnuma_info to retreive that topology by guest. Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> --- Changes since v4: - added check to make sure guest has enough memory for vnuma topology; - code style fixes; Changes since v3: - added subop hypercall to retrive number of vnodes and vcpus for domain to make correct allocations before requesting vnuma topology. --- xen/common/domain.c | 26 ++++++++++++++ xen/common/domctl.c | 84 +++++++++++++++++++++++++++++++++++++++++++ xen/common/memory.c | 67 ++++++++++++++++++++++++++++++++++ xen/include/public/domctl.h | 28 +++++++++++++++ xen/include/public/memory.h | 14 ++++++++ xen/include/public/vnuma.h | 54 ++++++++++++++++++++++++++++ xen/include/xen/domain.h | 11 ++++++ xen/include/xen/sched.h | 1 + 8 files changed, 285 insertions(+) create mode 100644 xen/include/public/vnuma.h diff --git a/xen/common/domain.c b/xen/common/domain.c index bc57174..5b7ce17 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -567,6 +567,15 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d) return 0; } +static void vnuma_destroy(struct vnuma_info *vnuma) +{ + vnuma->nr_vnodes = 0; + xfree(vnuma->vmemrange); + xfree(vnuma->vcpu_to_vnode); + xfree(vnuma->vdistance); + xfree(vnuma->vnode_to_pnode); +} + int domain_kill(struct domain *d) { int rc = 0; @@ -585,6 +594,7 @@ int domain_kill(struct domain *d) evtchn_destroy(d); gnttab_release_mappings(d); tmem_destroy(d->tmem_client); + vnuma_destroy(&d->vnuma); domain_set_outstanding_pages(d, 0); d->tmem_client = NULL; /* fallthrough */ @@ -1350,6 +1360,22 @@ int continue_hypercall_on_cpu( } /* + * Changes previously set domain vnuma topology to the defalt one + * that has one node and all other default values. Since the domain + * memory may be at this point allocated on multiple HW NUMA nodes, + * NUMA_NO_NODE is set for vnode to pnode mask. + */ +int vnuma_init_zero_topology(struct domain *d) +{ + d->vnuma.vmemrange[0].end = d->vnuma.vmemrange[d->vnuma.nr_vnodes - 1].end; + d->vnuma.vdistance[0] = 10; + memset(d->vnuma.vnode_to_pnode, NUMA_NO_NODE, d->vnuma.nr_vnodes); + memset(d->vnuma.vcpu_to_vnode, 0, d->max_vcpus); + d->vnuma.nr_vnodes = 1; + return 0; +} + +/* * Local variables: * mode: C * c-file-style: "BSD" diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 4774277..66fdcee 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -29,6 +29,7 @@ #include <asm/page.h> #include <public/domctl.h> #include <xsm/xsm.h> +#include <public/vnuma.h> static DEFINE_SPINLOCK(domctl_lock); DEFINE_SPINLOCK(vcpu_alloc_lock); @@ -888,6 +889,89 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) } break; + case XEN_DOMCTL_setvnumainfo: + { + unsigned int dist_size, nr_vnodes; + + ret = -EINVAL; + + /* If number of vnodes was set before, skip */ + if ( d->vnuma.nr_vnodes > 0 ) + break; + + nr_vnodes = op->u.vnuma.nr_vnodes; + if ( nr_vnodes == 0 ) + goto setvnumainfo_out; + + if ( nr_vnodes > (UINT_MAX / nr_vnodes) ) + goto setvnumainfo_out; + + ret = -EFAULT; + if ( guest_handle_is_null(op->u.vnuma.vdistance) || + guest_handle_is_null(op->u.vnuma.vmemrange) || + guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) || + guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) + goto setvnumainfo_out; + + dist_size = nr_vnodes * nr_vnodes; + + d->vnuma.vdistance = xmalloc_array(unsigned int, dist_size); + d->vnuma.vmemrange = xmalloc_array(vmemrange_t, nr_vnodes); + d->vnuma.vcpu_to_vnode = xmalloc_array(unsigned int, d->max_vcpus); + d->vnuma.vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes); + + if ( d->vnuma.vdistance == NULL || + d->vnuma.vmemrange == NULL || + d->vnuma.vcpu_to_vnode == NULL || + d->vnuma.vnode_to_pnode == NULL ) + { + ret = -ENOMEM; + goto setvnumainfo_nomem; + } + + if ( unlikely(__copy_from_guest(d->vnuma.vdistance, + op->u.vnuma.vdistance, + dist_size)) ) + goto setvnumainfo_out; + if ( unlikely(__copy_from_guest(d->vnuma.vmemrange, + op->u.vnuma.vmemrange, + nr_vnodes)) ) + goto setvnumainfo_out; + if ( unlikely(__copy_from_guest(d->vnuma.vcpu_to_vnode, + op->u.vnuma.vcpu_to_vnode, + d->max_vcpus)) ) + goto setvnumainfo_out; + if ( unlikely(__copy_from_guest(d->vnuma.vnode_to_pnode, + op->u.vnuma.vnode_to_pnode, + nr_vnodes)) ) + goto setvnumainfo_out; + + /* Everything is good, lets set the number of vnodes */ + d->vnuma.nr_vnodes = nr_vnodes; + + ret = 0; + break; + + setvnumainfo_out: + /* On failure, set one vNUMA node */ + d->vnuma.vmemrange[0].end = d->vnuma.vmemrange[d->vnuma.nr_vnodes - 1].end; + d->vnuma.vdistance[0] = 10; + memset(d->vnuma.vnode_to_pnode, NUMA_NO_NODE, d->vnuma.nr_vnodes); + memset(d->vnuma.vcpu_to_vnode, 0, d->max_vcpus); + d->vnuma.nr_vnodes = 1; + ret = 0; + break; + + setvnumainfo_nomem: + /* The only case where we set number of vnodes to 0 */ + d->vnuma.nr_vnodes = 0; + xfree(d->vnuma.vmemrange); + xfree(d->vnuma.vdistance); + xfree(d->vnuma.vnode_to_pnode); + xfree(d->vnuma.vcpu_to_vnode); + } + break; + default: ret = arch_do_domctl(op, d, u_domctl); break; diff --git a/xen/common/memory.c b/xen/common/memory.c index 257f4b0..2067f42 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -963,6 +963,73 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; + case XENMEM_get_vnuma_info: + { + struct vnuma_topology_info guest_topo; + struct domain *d; + + if ( copy_from_guest(&guest_topo, arg, 1) ) + return -EFAULT; + if ( (d = rcu_lock_domain_by_any_id(guest_topo.domid)) == NULL ) + return -ESRCH; + + if ( d->vnuma.nr_vnodes == 0 ) { + rc = -EOPNOTSUPP; + goto vnumainfo_out; + } + + rc = -EOPNOTSUPP; + /* + * Guest may have different kernel configuration for + * number of cpus/nodes. It informs about them via hypercall. + */ + if ( guest_topo.nr_vnodes < d->vnuma.nr_vnodes || + guest_topo.nr_vcpus < d->max_vcpus ) + goto vnumainfo_out; + + rc = -EFAULT; + + if ( guest_handle_is_null(guest_topo.vmemrange.h) || + guest_handle_is_null(guest_topo.vdistance.h) || + guest_handle_is_null(guest_topo.vcpu_to_vnode.h) ) + goto vnumainfo_out; + + /* + * Take a failure path if out of guest allocated memory for topology. + * No partial copying. + */ + guest_topo.nr_vnodes = d->vnuma.nr_vnodes; + + if ( __copy_to_guest(guest_topo.vmemrange.h, + d->vnuma.vmemrange, + d->vnuma.nr_vnodes) != 0 ) + goto vnumainfo_out; + + if ( __copy_to_guest(guest_topo.vdistance.h, + d->vnuma.vdistance, + d->vnuma.nr_vnodes * d->vnuma.nr_vnodes) != 0 ) + goto vnumainfo_out; + + if ( __copy_to_guest(guest_topo.vcpu_to_vnode.h, + d->vnuma.vcpu_to_vnode, + d->max_vcpus) != 0 ) + goto vnumainfo_out; + + rc = 0; + + vnumainfo_out: + if ( rc != 0 ) + /* + * In case of failure to set vNUMA topology for guest, + * leave everything as it is, print error only. Tools will + * show for domain vnuma topology, but wont be seen in guest. + */ + gdprintk(XENLOG_INFO, "vNUMA: failed to copy topology info to guest.\n"); + + rcu_unlock_domain(d); + break; + } + default: rc = arch_memory_op(cmd, arg); break; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 565fa4c..8b65a75 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -35,6 +35,7 @@ #include "xen.h" #include "grant_table.h" #include "hvm/save.h" +#include "vnuma.h" #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a @@ -895,6 +896,31 @@ struct xen_domctl_cacheflush { typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t); +/* + * XEN_DOMCTL_setvnumainfo: sets the vNUMA topology + * parameters for domain from toolstack. + */ +struct xen_domctl_vnuma { + uint32_t nr_vnodes; + uint32_t __pad; + XEN_GUEST_HANDLE_64(uint) vdistance; + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + + /* + * vnodes to physical NUMA nodes mask. + * This kept on per-domain basis for + * interested consumers, such as numa aware ballooning. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + + /* + * memory rages for each vNUMA node + */ + XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange; +}; +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + struct xen_domctl { uint32_t cmd; #define XEN_DOMCTL_createdomain 1 @@ -965,6 +991,7 @@ struct xen_domctl { #define XEN_DOMCTL_getnodeaffinity 69 #define XEN_DOMCTL_set_max_evtchn 70 #define XEN_DOMCTL_cacheflush 71 +#define XEN_DOMCTL_setvnumainfo 72 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -1024,6 +1051,7 @@ struct xen_domctl { struct xen_domctl_cacheflush cacheflush; struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; uint8_t pad[128]; } u; }; diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 2c57aa0..a7dc035 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -354,6 +354,20 @@ struct xen_pod_target { }; typedef struct xen_pod_target xen_pod_target_t; +/* + * XENMEM_get_vnuma_info used by caller to get + * vNUMA topology constructed for particular domain. + * + * The data exchanged is presented by vnuma_topology_info. + */ +#define XENMEM_get_vnuma_info 26 + +/* + * XENMEM_get_vnuma_pnode used by guest to determine + * the physical node of the specified vnode. + */ +/*#define XENMEM_get_vnuma_pnode 27*/ + #if defined(__XEN__) || defined(__XEN_TOOLS__) #ifndef uint64_aligned_t diff --git a/xen/include/public/vnuma.h b/xen/include/public/vnuma.h new file mode 100644 index 0000000..ab9eda0 --- /dev/null +++ b/xen/include/public/vnuma.h @@ -0,0 +1,54 @@ +#ifndef _XEN_PUBLIC_VNUMA_H +#define _XEN_PUBLIC_VNUMA_H + +#include "xen.h" + +/* + * Following structures are used to represent vNUMA + * topology to guest if requested. + */ + +/* + * Memory ranges can be used to define + * vNUMA memory node boundaries by the + * linked list. As of now, only one range + * per domain is suported. + */ +struct vmemrange { + uint64_t start, end; +}; + +typedef struct vmemrange vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, memory ranges and + * vcpu mapping provided for guests. + * When issuing hypercall, guest is expected to inform Xen about the memory allocated + * for vnuma structure through nr_vnodes and nr_vcpus. + */ + +struct vnuma_topology_info { + /* IN */ + domid_t domid; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t _pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t _pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(vmemrange_t) h; + uint64_t _pad; + } vmemrange; +}; +typedef struct vnuma_topology_info vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t); + +#endif diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index bb1c398..e8b36e3 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits; extern bool_t opt_dom0_vcpus_pin; +/* vnuma_info struct to manage by Xen */ +struct vnuma_info { + unsigned int nr_vnodes; + unsigned int *vdistance; + unsigned int *vcpu_to_vnode; + unsigned int *vnode_to_pnode; + struct vmemrange *vmemrange; +}; + +int vnuma_init_zero_topology(struct domain *d); + #endif /* __XEN_DOMAIN_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 44851ae..a1163fd 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -444,6 +444,7 @@ struct domain nodemask_t node_affinity; unsigned int last_alloc_node; spinlock_t node_affinity_lock; + struct vnuma_info vnuma; }; struct domain_setup_info -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |