[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v6 01/10] xen: vnuma topology and subop hypercalls
On Fri, Jul 18, 2014 at 01:50:00AM -0400, Elena Ufimtseva wrote: > Define interface, structures and hypercalls for toolstack to > build vnuma topology and for guests that wish to retrieve it. > Two subop hypercalls introduced by patch: > XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain > and XENMEM_get_vnumainfo to retrieve that topology by guest. > > Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> > --- > xen/common/domain.c | 13 ++++ > xen/common/domctl.c | 167 > +++++++++++++++++++++++++++++++++++++++++++ > xen/common/memory.c | 62 ++++++++++++++++ > xen/include/public/domctl.h | 29 ++++++++ > xen/include/public/memory.h | 47 +++++++++++- > xen/include/xen/domain.h | 11 +++ > xen/include/xen/sched.h | 1 + > 7 files changed, 329 insertions(+), 1 deletion(-) > > diff --git a/xen/common/domain.c b/xen/common/domain.c > index cd64aea..895584a 100644 > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -584,6 +584,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, > struct domain **d) > return 0; > } > > +void vnuma_destroy(struct vnuma_info *vnuma) > +{ > + if ( vnuma ) > + { > + xfree(vnuma->vmemrange); > + xfree(vnuma->vcpu_to_vnode); > + xfree(vnuma->vdistance); > + xfree(vnuma->vnode_to_pnode); > + xfree(vnuma); > + } > +} > + > int domain_kill(struct domain *d) > { > int rc = 0; > @@ -602,6 +614,7 @@ int domain_kill(struct domain *d) > evtchn_destroy(d); > gnttab_release_mappings(d); > tmem_destroy(d->tmem_client); > + vnuma_destroy(d->vnuma); > domain_set_outstanding_pages(d, 0); > d->tmem_client = NULL; > /* fallthrough */ > diff --git a/xen/common/domctl.c b/xen/common/domctl.c > index c326aba..7464284 100644 > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -297,6 +297,144 @@ int vcpuaffinity_params_invalid(const > xen_domctl_vcpuaffinity_t *vcpuaff) > guest_handle_is_null(vcpuaff->cpumap_soft.bitmap)); > } > > +/* > + * Allocates memory for vNUMA, **vnuma should be NULL. > + * Caller has to make sure that domain has max_pages > + * and number of vcpus set for domain. > + * Verifies that single allocation does not exceed > + * PAGE_SIZE. > + */ > +static int vnuma_alloc(struct vnuma_info **vnuma, > + unsigned int nr_vnodes, > + unsigned int nr_vcpus, > + unsigned int dist_size) > +{ > + struct vnuma_info *v; > + > + if ( vnuma && *vnuma ) > + return -EINVAL; > + > + v = *vnuma; > + /* > + * check if any of xmallocs exeeds PAGE_SIZE. > + * If yes, consider it as an error for now. > + */ > + if ( nr_vnodes > PAGE_SIZE / sizeof(nr_vnodes) || > + nr_vcpus > PAGE_SIZE / sizeof(nr_vcpus) || > + nr_vnodes > PAGE_SIZE / sizeof(struct vmemrange) || > + dist_size > PAGE_SIZE / sizeof(dist_size) ) > + return -EINVAL; > + > + v = xzalloc(struct vnuma_info); > + if ( !v ) > + return -ENOMEM; > + > + v->vdistance = xmalloc_array(unsigned int, dist_size); > + v->vmemrange = xmalloc_array(vmemrange_t, nr_vnodes); > + v->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus); > + v->vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes); > + > + if ( v->vdistance == NULL || v->vmemrange == NULL || > + v->vcpu_to_vnode == NULL || v->vnode_to_pnode == NULL ) > + { > + vnuma_destroy(v); > + return -ENOMEM; > + } > + > + *vnuma = v; > + > + return 0; > +} > + > +/* > + * Allocate memory and construct one vNUMA node, > + * set default parameters, assign all memory and > + * vcpus to this node, set distance to 10. > + */ > +static long vnuma_fallback(const struct domain *d, > + struct vnuma_info **vnuma) > +{ > + struct vnuma_info *v; > + long ret; > + > + > + /* Will not destroy vNUMA here, destroy before calling this. */ > + if ( vnuma && *vnuma ) > + return -EINVAL; > + > + v = *vnuma; > + ret = vnuma_alloc(&v, 1, d->max_vcpus, 1); > + if ( ret ) > + return ret; > + > + v->vmemrange[0].start = 0; > + v->vmemrange[0].end = d->max_pages << PAGE_SHIFT; > + v->vdistance[0] = 10; > + v->vnode_to_pnode[0] = NUMA_NO_NODE; > + memset(v->vcpu_to_vnode, 0, d->max_vcpus); > + v->nr_vnodes = 1; > + > + *vnuma = v; > + > + return 0; > +} > + > +/* > + * construct vNUMA topology form u_vnuma struct and return > + * it in dst. > + */ > +long vnuma_init(const struct xen_domctl_vnuma *u_vnuma, > + const struct domain *d, > + struct vnuma_info **dst) > +{ > + unsigned int dist_size, nr_vnodes = 0; > + long ret; > + struct vnuma_info *v = NULL; > + > + ret = -EINVAL; > + > + /* If vNUMA topology already set, just exit. */ > + if ( !u_vnuma || *dst ) > + return ret; > + > + nr_vnodes = u_vnuma->nr_vnodes; > + > + if ( nr_vnodes == 0 ) > + return ret; > + > + if ( nr_vnodes > (UINT_MAX / nr_vnodes) ) > + return ret; > + > + dist_size = nr_vnodes * nr_vnodes; > + > + ret = vnuma_alloc(&v, nr_vnodes, d->max_vcpus, dist_size); > + if ( ret ) > + return ret; > + > + /* On failure, set only one vNUMA node and its success. */ > + ret = 0; > + > + if ( copy_from_guest(v->vdistance, u_vnuma->vdistance, dist_size) ) > + goto vnuma_onenode; > + if ( copy_from_guest(v->vmemrange, u_vnuma->vmemrange, nr_vnodes) ) > + goto vnuma_onenode; > + if ( copy_from_guest(v->vcpu_to_vnode, u_vnuma->vcpu_to_vnode, > + d->max_vcpus) ) > + goto vnuma_onenode; > + if ( copy_from_guest(v->vnode_to_pnode, u_vnuma->vnode_to_pnode, > + nr_vnodes) ) > + goto vnuma_onenode; > + > + v->nr_vnodes = nr_vnodes; > + *dst = v; > + > + return ret; > + > +vnuma_onenode: > + vnuma_destroy(v); > + return vnuma_fallback(d, dst); > +} > + > long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > { > long ret = 0; > @@ -967,6 +1105,35 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) > u_domctl) > } > break; > > + case XEN_DOMCTL_setvnumainfo: > + { > + struct vnuma_info *v = NULL; > + > + ret = -EFAULT; > + if ( guest_handle_is_null(op->u.vnuma.vdistance) || > + guest_handle_is_null(op->u.vnuma.vmemrange) || > + guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) || > + guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) > + return ret; > + > + ret = -EINVAL; > + > + ret = vnuma_init(&op->u.vnuma, d, &v); > + if ( ret < 0 || v == NULL ) > + break; > + > + /* overwrite vnuma for domain */ > + if ( !d->vnuma ) You want that in within the domain_lock. Otherwise an caller (on another CPU) could try to read the d->vnuma and blow up. Say by using the serial console and wanting to read the guest vNUMA topology. > + vnuma_destroy(d->vnuma); > + > + domain_lock(d); I would just do vnuma_destroy(d->vnuma) here and remove the 'if' above. > + d->vnuma = v; > + domain_unlock(d); > + > + ret = 0; > + } > + break; > + > default: > ret = arch_do_domctl(op, d, u_domctl); > break; > diff --git a/xen/common/memory.c b/xen/common/memory.c > index c2dd31b..925b9fc 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -969,6 +969,68 @@ long do_memory_op(unsigned long cmd, > XEN_GUEST_HANDLE_PARAM(void) arg) > > break; > > + case XENMEM_get_vnumainfo: > + { > + struct vnuma_topology_info topology; > + struct domain *d; > + unsigned int dom_vnodes = 0; > + > + /* > + * guest passes nr_vnodes and nr_vcpus thus > + * we know how much memory guest has allocated. > + */ > + if ( copy_from_guest(&topology, arg, 1) || > + guest_handle_is_null(topology.vmemrange.h) || > + guest_handle_is_null(topology.vdistance.h) || > + guest_handle_is_null(topology.vcpu_to_vnode.h) ) > + return -EFAULT; > + > + if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL ) > + return -ESRCH; > + > + rc = -EOPNOTSUPP; > + if ( d->vnuma == NULL ) > + goto vnumainfo_out; > + > + if ( d->vnuma->nr_vnodes == 0 ) > + goto vnumainfo_out; > + > + dom_vnodes = d->vnuma->nr_vnodes; > + > + /* > + * guest nr_cpus and nr_nodes may differ from domain vnuma config. > + * Check here guest nr_nodes and nr_cpus to make sure we dont > overflow. > + */ > + rc = -ENOBUFS; > + if ( topology.nr_vnodes < dom_vnodes || > + topology.nr_vcpus < d->max_vcpus ) > + goto vnumainfo_out; > + > + rc = -EFAULT; > + > + if ( copy_to_guest(topology.vmemrange.h, d->vnuma->vmemrange, > + dom_vnodes) != 0 ) > + goto vnumainfo_out; > + > + if ( copy_to_guest(topology.vdistance.h, d->vnuma->vdistance, > + dom_vnodes * dom_vnodes) != 0 ) > + goto vnumainfo_out; > + > + if ( copy_to_guest(topology.vcpu_to_vnode.h, d->vnuma->vcpu_to_vnode, > + d->max_vcpus) != 0 ) > + goto vnumainfo_out; > + > + topology.nr_vnodes = dom_vnodes; > + > + if ( copy_to_guest(arg, &topology, 1) != 0 ) > + goto vnumainfo_out; > + rc = 0; > + > + vnumainfo_out: > + rcu_unlock_domain(d); > + break; > + } > + > default: > rc = arch_memory_op(cmd, arg); > break; > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > index 5b11bbf..5ee74f4 100644 > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > @@ -35,6 +35,7 @@ > #include "xen.h" > #include "grant_table.h" > #include "hvm/save.h" > +#include "memory.h" > > #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a > > @@ -934,6 +935,32 @@ struct xen_domctl_vcpu_msrs { > }; > typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t); > + > +/* > + * Use in XEN_DOMCTL_setvnumainfo to set > + * vNUMA domain topology. > + */ > +struct xen_domctl_vnuma { > + uint32_t nr_vnodes; > + uint32_t _pad; > + XEN_GUEST_HANDLE_64(uint) vdistance; > + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; > + > + /* > + * vnodes to physical NUMA nodes mask. > + * This kept on per-domain basis for > + * interested consumers, such as numa aware ballooning. > + */ > + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; > + > + /* > + * memory rages for each vNUMA node > + */ > + XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange; > +}; > +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); > + > #endif > > struct xen_domctl { > @@ -1008,6 +1035,7 @@ struct xen_domctl { > #define XEN_DOMCTL_cacheflush 71 > #define XEN_DOMCTL_get_vcpu_msrs 72 > #define XEN_DOMCTL_set_vcpu_msrs 73 > +#define XEN_DOMCTL_setvnumainfo 74 > #define XEN_DOMCTL_gdbsx_guestmemio 1000 > #define XEN_DOMCTL_gdbsx_pausevcpu 1001 > #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 > @@ -1068,6 +1096,7 @@ struct xen_domctl { > struct xen_domctl_cacheflush cacheflush; > struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; > struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; > + struct xen_domctl_vnuma vnuma; > uint8_t pad[128]; > } u; > }; > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h > index 2c57aa0..2c212e1 100644 > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -521,9 +521,54 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); > * The zero value is appropiate. > */ > > +/* vNUMA node memory range */ > +struct vmemrange { > + uint64_t start, end; > +}; > + > +typedef struct vmemrange vmemrange_t; > +DEFINE_XEN_GUEST_HANDLE(vmemrange_t); > + > +/* > + * vNUMA topology specifies vNUMA node number, distance table, > + * memory ranges and vcpu mapping provided for guests. > + * XENMEM_get_vnumainfo hypercall expects to see from guest > + * nr_vnodes and nr_vcpus to indicate available memory. After > + * filling guests structures, nr_vnodes and nr_vcpus copied > + * back to guest. > + */ > +struct vnuma_topology_info { > + /* IN */ > + domid_t domid; > + /* IN/OUT */ > + unsigned int nr_vnodes; > + unsigned int nr_vcpus; > + /* OUT */ > + union { > + XEN_GUEST_HANDLE(uint) h; > + uint64_t pad; > + } vdistance; > + union { > + XEN_GUEST_HANDLE(uint) h; > + uint64_t pad; > + } vcpu_to_vnode; > + union { > + XEN_GUEST_HANDLE(vmemrange_t) h; > + uint64_t pad; > + } vmemrange; > +}; > +typedef struct vnuma_topology_info vnuma_topology_info_t; > +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t); > + > +/* > + * XENMEM_get_vnumainfo used by guest to get > + * vNUMA topology from hypervisor. > + */ > +#define XENMEM_get_vnumainfo 26 > + > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ > > -/* Next available subop number is 26 */ > +/* Next available subop number is 27 */ > > #endif /* __XEN_PUBLIC_MEMORY_H__ */ > > diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h > index bb1c398..d29a84d 100644 > --- a/xen/include/xen/domain.h > +++ b/xen/include/xen/domain.h > @@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits; > > extern bool_t opt_dom0_vcpus_pin; > > +/* vnuma topology per domain. */ > +struct vnuma_info { > + unsigned int nr_vnodes; > + unsigned int *vdistance; > + unsigned int *vcpu_to_vnode; > + unsigned int *vnode_to_pnode; > + struct vmemrange *vmemrange; > +}; > + > +void vnuma_destroy(struct vnuma_info *vnuma); > + > #endif /* __XEN_DOMAIN_H__ */ > diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h > index d5bc461..71e4218 100644 > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -447,6 +447,7 @@ struct domain > nodemask_t node_affinity; > unsigned int last_alloc_node; > spinlock_t node_affinity_lock; > + struct vnuma_info *vnuma; > }; > > struct domain_setup_info > -- > 1.7.10.4 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |