|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 1/9] xen: vnuma topology and subop hypercalls
Define interface, structures and hypercalls for toolstack to
build vnuma topology and for guests that wish to retrieve it.
Two subop hypercalls introduced by patch:
XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain
and XENMEM_get_vnumainfo to retrieve that topology by guest.
Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
xen/common/domain.c | 15 +++++
xen/common/domctl.c | 122 +++++++++++++++++++++++++++++++++++++
xen/common/memory.c | 75 +++++++++++++++++++++++
xen/include/public/arch-x86/xen.h | 8 +++
xen/include/public/domctl.h | 29 +++++++++
xen/include/public/memory.h | 47 +++++++++++++-
xen/include/xen/domain.h | 11 ++++
xen/include/xen/sched.h | 4 ++
8 files changed, 310 insertions(+), 1 deletion(-)
diff --git a/xen/common/domain.c b/xen/common/domain.c
index d7a84cf..fe96ba0 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -280,6 +280,8 @@ struct domain *domain_create(
spin_lock_init(&d->pbuf_lock);
+ rwlock_init(&d->vnuma_rwlock);
+
err = -ENOMEM;
if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) )
goto fail;
@@ -584,6 +586,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct
domain **d)
return 0;
}
+void vnuma_destroy(struct vnuma_info *vnuma)
+{
+ if ( vnuma )
+ {
+ xfree(vnuma->vmemrange);
+ xfree(vnuma->vcpu_to_vnode);
+ xfree(vnuma->vdistance);
+ xfree(vnuma->vnode_to_pnode);
+ xfree(vnuma);
+ }
+}
+
int domain_kill(struct domain *d)
{
int rc = 0;
@@ -602,6 +616,7 @@ int domain_kill(struct domain *d)
evtchn_destroy(d);
gnttab_release_mappings(d);
tmem_destroy(d->tmem_client);
+ vnuma_destroy(d->vnuma);
domain_set_outstanding_pages(d, 0);
d->tmem_client = NULL;
/* fallthrough */
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index c326aba..356a3cf 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -297,6 +297,99 @@ int vcpuaffinity_params_invalid(const
xen_domctl_vcpuaffinity_t *vcpuaff)
guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
}
+/*
+ * Allocates memory for vNUMA, **vnuma should be NULL.
+ * Caller has to make sure that domain has max_pages
+ * and number of vcpus set for domain.
+ * Verifies that single allocation does not exceed
+ * PAGE_SIZE.
+ */
+static int vnuma_alloc(struct vnuma_info **vnuma,
+ unsigned int nr_vnodes,
+ unsigned int nr_vcpus)
+{
+ if ( vnuma && *vnuma )
+ return -EINVAL;
+
+ if ( nr_vnodes > XEN_MAX_VNODES )
+ return -EINVAL;
+
+ /*
+ * If XEN_MAX_VNODES increases, these allocations
+ * should be split into PAGE_SIZE allocations
+ * due to XCA-77.
+ */
+ *vnuma = xzalloc(struct vnuma_info);
+ if ( !*vnuma )
+ return -ENOMEM;
+
+ (*vnuma)->vdistance = xmalloc_array(unsigned int, nr_vnodes * nr_vnodes);
+ (*vnuma)->vmemrange = xmalloc_array(vmemrange_t, nr_vnodes);
+ (*vnuma)->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus);
+ (*vnuma)->vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes);
+
+ if ( (*vnuma)->vdistance == NULL || (*vnuma)->vmemrange == NULL ||
+ (*vnuma)->vcpu_to_vnode == NULL || (*vnuma)->vnode_to_pnode == NULL )
+ {
+ vnuma_destroy(*vnuma);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Construct vNUMA topology form u_vnuma struct and return
+ * it in dst.
+ */
+long vnuma_init(const struct xen_domctl_vnuma *u_vnuma,
+ const struct domain *d,
+ struct vnuma_info **dst)
+{
+ unsigned int nr_vnodes;
+ long ret = -EINVAL;
+ struct vnuma_info *v = NULL;
+
+ /* If vNUMA topology already set, just exit. */
+ if ( *dst )
+ return ret;
+
+ nr_vnodes = u_vnuma->nr_vnodes;
+
+ if ( nr_vnodes == 0 )
+ return ret;
+
+ ret = vnuma_alloc(&v, nr_vnodes, d->max_vcpus);
+ if ( ret )
+ return ret;
+
+ ret = -EFAULT;
+
+ if ( copy_from_guest(v->vdistance, u_vnuma->vdistance,
+ nr_vnodes * nr_vnodes) )
+ goto vnuma_fail;
+
+ if ( copy_from_guest(v->vmemrange, u_vnuma->vmemrange, nr_vnodes) )
+ goto vnuma_fail;
+
+ if ( copy_from_guest(v->vcpu_to_vnode, u_vnuma->vcpu_to_vnode,
+ d->max_vcpus) )
+ goto vnuma_fail;
+
+ if ( copy_from_guest(v->vnode_to_pnode, u_vnuma->vnode_to_pnode,
+ nr_vnodes) )
+ goto vnuma_fail;
+
+ v->nr_vnodes = nr_vnodes;
+ *dst = v;
+
+ return 0;
+
+ vnuma_fail:
+ vnuma_destroy(v);
+ return ret;
+}
+
long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
{
long ret = 0;
@@ -967,6 +1060,35 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t)
u_domctl)
}
break;
+ case XEN_DOMCTL_setvnumainfo:
+ {
+ struct vnuma_info *v = NULL;
+
+ ret = -EINVAL;
+
+ if ( guest_handle_is_null(op->u.vnuma.vdistance) ||
+ guest_handle_is_null(op->u.vnuma.vmemrange) ||
+ guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) ||
+ guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) {
+ break;
+ }
+
+ ret = vnuma_init(&op->u.vnuma, d, &v);
+ if ( ret < 0 )
+ break;
+
+ ASSERT(v != NULL);
+
+ /* overwrite vnuma for domain */
+ write_lock(&d->vnuma_rwlock);
+ vnuma_destroy(d->vnuma);
+ d->vnuma = v;
+ write_unlock(&d->vnuma_rwlock);
+
+ ret = 0;
+ }
+ break;
+
default:
ret = arch_do_domctl(op, d, u_domctl);
break;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index c2dd31b..ad61ec0 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -969,6 +969,81 @@ long do_memory_op(unsigned long cmd,
XEN_GUEST_HANDLE_PARAM(void) arg)
break;
+ case XENMEM_get_vnumainfo:
+ {
+ struct vnuma_topology_info topology;
+ struct domain *d;
+ unsigned int dom_vnodes, dom_vcpus;
+
+ /*
+ * guest passes nr_vnodes and nr_vcpus thus
+ * we know how much memory guest has allocated.
+ */
+ if ( copy_from_guest(&topology, arg, 1) ||
+ guest_handle_is_null(topology.vmemrange.h) ||
+ guest_handle_is_null(topology.vdistance.h) ||
+ guest_handle_is_null(topology.vcpu_to_vnode.h) ) {
+ return -EFAULT;
+ }
+
+ if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL )
+ return -ESRCH;
+
+ rc = -EOPNOTSUPP;
+
+ read_lock(&d->vnuma_rwlock);
+
+ if ( d->vnuma == NULL )
+ goto vnumainfo_out;
+
+ dom_vnodes = d->vnuma->nr_vnodes;
+ dom_vcpus = d->max_vcpus;
+
+ if ( d->vnuma->vdistance == NULL || d->vnuma->vmemrange == NULL ||
+ d->vnuma->vcpu_to_vnode == NULL )
+ {
+ rc = -ENOMEM;
+ goto vnumainfo_out;
+ }
+
+ /*
+ * guest nr_cpus and nr_nodes may differ from domain vnuma config.
+ * Check here guest nr_nodes and nr_cpus to make sure we dont overflow.
+ */
+ rc = -ENOBUFS;
+ if ( topology.nr_vnodes < dom_vnodes ||
+ topology.nr_vcpus < dom_vcpus )
+ goto vnumainfo_out;
+
+ rc = -EFAULT;
+
+ if ( copy_to_guest(topology.vmemrange.h, d->vnuma->vmemrange,
+ dom_vnodes) != 0 )
+ goto vnumainfo_out;
+
+ if ( copy_to_guest(topology.vdistance.h, d->vnuma->vdistance,
+ dom_vnodes * dom_vnodes) != 0 )
+ goto vnumainfo_out;
+
+ if ( copy_to_guest(topology.vcpu_to_vnode.h, d->vnuma->vcpu_to_vnode,
+ dom_vcpus) != 0 )
+ goto vnumainfo_out;
+
+ topology.nr_vnodes = dom_vnodes;
+ topology.nr_vcpus = dom_vcpus;
+
+ if ( __copy_to_guest(arg, &topology, 1) != 0 )
+ goto vnumainfo_out;
+
+ rc = 0;
+
+ vnumainfo_out:
+ read_unlock(&d->vnuma_rwlock);
+ rcu_unlock_domain(d);
+
+ break;
+ }
+
default:
rc = arch_memory_op(cmd, arg);
break;
diff --git a/xen/include/public/arch-x86/xen.h
b/xen/include/public/arch-x86/xen.h
index f35804b..6358cbb 100644
--- a/xen/include/public/arch-x86/xen.h
+++ b/xen/include/public/arch-x86/xen.h
@@ -108,6 +108,14 @@ typedef unsigned long xen_pfn_t;
/* Maximum number of virtual CPUs in legacy multi-processor guests. */
#define XEN_LEGACY_MAX_VCPUS 32
+/*
+ * Maximum number of virtual NUMA nodes per domain.
+ * This restriction is related to a security advice
+ * XSA-77 and max xmalloc size of PAGE_SIZE. This limit
+ * avoids multi page allocation for vnuma.
+ */
+#define XEN_MAX_VNODES 32
+
#ifndef __ASSEMBLY__
typedef unsigned long xen_ulong_t;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b11bbf..5ee74f4 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,6 +35,7 @@
#include "xen.h"
#include "grant_table.h"
#include "hvm/save.h"
+#include "memory.h"
#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
@@ -934,6 +935,32 @@ struct xen_domctl_vcpu_msrs {
};
typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
+
+/*
+ * Use in XEN_DOMCTL_setvnumainfo to set
+ * vNUMA domain topology.
+ */
+struct xen_domctl_vnuma {
+ uint32_t nr_vnodes;
+ uint32_t _pad;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+
+ /*
+ * vnodes to physical NUMA nodes mask.
+ * This kept on per-domain basis for
+ * interested consumers, such as numa aware ballooning.
+ */
+ XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
+
+ /*
+ * memory rages for each vNUMA node
+ */
+ XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange;
+};
+typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
+
#endif
struct xen_domctl {
@@ -1008,6 +1035,7 @@ struct xen_domctl {
#define XEN_DOMCTL_cacheflush 71
#define XEN_DOMCTL_get_vcpu_msrs 72
#define XEN_DOMCTL_set_vcpu_msrs 73
+#define XEN_DOMCTL_setvnumainfo 74
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -1068,6 +1096,7 @@ struct xen_domctl {
struct xen_domctl_cacheflush cacheflush;
struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
+ struct xen_domctl_vnuma vnuma;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 2c57aa0..2c212e1 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -521,9 +521,54 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
* The zero value is appropiate.
*/
+/* vNUMA node memory range */
+struct vmemrange {
+ uint64_t start, end;
+};
+
+typedef struct vmemrange vmemrange_t;
+DEFINE_XEN_GUEST_HANDLE(vmemrange_t);
+
+/*
+ * vNUMA topology specifies vNUMA node number, distance table,
+ * memory ranges and vcpu mapping provided for guests.
+ * XENMEM_get_vnumainfo hypercall expects to see from guest
+ * nr_vnodes and nr_vcpus to indicate available memory. After
+ * filling guests structures, nr_vnodes and nr_vcpus copied
+ * back to guest.
+ */
+struct vnuma_topology_info {
+ /* IN */
+ domid_t domid;
+ /* IN/OUT */
+ unsigned int nr_vnodes;
+ unsigned int nr_vcpus;
+ /* OUT */
+ union {
+ XEN_GUEST_HANDLE(uint) h;
+ uint64_t pad;
+ } vdistance;
+ union {
+ XEN_GUEST_HANDLE(uint) h;
+ uint64_t pad;
+ } vcpu_to_vnode;
+ union {
+ XEN_GUEST_HANDLE(vmemrange_t) h;
+ uint64_t pad;
+ } vmemrange;
+};
+typedef struct vnuma_topology_info vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
+
+/*
+ * XENMEM_get_vnumainfo used by guest to get
+ * vNUMA topology from hypervisor.
+ */
+#define XENMEM_get_vnumainfo 26
+
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
-/* Next available subop number is 26 */
+/* Next available subop number is 27 */
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index bb1c398..d29a84d 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits;
extern bool_t opt_dom0_vcpus_pin;
+/* vnuma topology per domain. */
+struct vnuma_info {
+ unsigned int nr_vnodes;
+ unsigned int *vdistance;
+ unsigned int *vcpu_to_vnode;
+ unsigned int *vnode_to_pnode;
+ struct vmemrange *vmemrange;
+};
+
+void vnuma_destroy(struct vnuma_info *vnuma);
+
#endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 4575dda..5bb7153 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -452,6 +452,10 @@ struct domain
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
+
+ /* vNUMA topology protected by rwlock. */
+ rwlock_t vnuma_rwlock;
+ struct vnuma_info *vnuma;
};
struct domain_setup_info
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |