[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/5] [POST-4.0]: HVM NUMA guest: pass NUMA information to libxc
On Thu, Feb 04, 2010 at 10:54:22PM +0100, Andre Przywara wrote: > This patch adds a "struct numa_guest_info" to libxc, which allows to > specify a guest's NUMA topology along with the appropriate host binding. > Here the information will be filled out by the Python wrapper (I left > out the libxl part for now), it will fill up the struct with sensible > default values (equal distribution), only the number of guest nodes > should be specified by the caller. The information is passed on to the > hvm_info_table. The respective memory allocation is in another patch. > > Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx> > > Regards, > Andre. > > -- > Andre Przywara > AMD-Operating System Research Center (OSRC), Dresden, Germany > Tel: +49 351 488-3567-12 > ----to satisfy European Law for business letters: > Advanced Micro Devices GmbH > Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen > Geschaeftsfuehrer: Andrew Bowd; Thomas M. McCoy; Giuliano Meroni > Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen > Registergericht Muenchen, HRB Nr. 43632 > commit b4ee75af6f6220bdafccf75c9bc6e4915a413b18 > Author: Andre Przywara <andre.przywara@xxxxxxx> > Date: Mon Feb 1 12:36:28 2010 +0100 > > add struct numainfo to interface and > parse xend passed NUMA info and pass on to libxc > > diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c > index 8fc7ac5..02103b1 100644 > --- a/tools/libxc/xc_hvm_build.c > +++ b/tools/libxc/xc_hvm_build.c > @@ -106,6 +106,7 @@ static int loadelfimage( > > static int setup_guest(int xc_handle, > uint32_t dom, int memsize, int target, > + struct numa_guest_info *numainfo, > char *image, unsigned long image_size) > { > xen_pfn_t *page_array = NULL; > @@ -334,6 +335,7 @@ static int xc_hvm_build_internal(int xc_handle, > uint32_t domid, > int memsize, > int target, > + struct numa_guest_info *numainfo, > char *image, > unsigned long image_size) > { > @@ -343,7 +345,8 @@ static int xc_hvm_build_internal(int xc_handle, > return -1; > } > > - return setup_guest(xc_handle, domid, memsize, target, image, image_size); > + return setup_guest(xc_handle, domid, memsize, target, > + numainfo, image, image_size); > } > > /* xc_hvm_build: > @@ -352,6 +355,7 @@ static int xc_hvm_build_internal(int xc_handle, > int xc_hvm_build(int xc_handle, > uint32_t domid, > int memsize, > + struct numa_guest_info *numainfo, > const char *image_name) > { > char *image; > @@ -362,7 +366,8 @@ int xc_hvm_build(int xc_handle, > ((image = xc_read_image(image_name, &image_size)) == NULL) ) > return -1; > > - sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, image, > image_size); > + sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, > + numainfo, image, image_size); > > free(image); > > @@ -379,6 +384,7 @@ int xc_hvm_build_target_mem(int xc_handle, > uint32_t domid, > int memsize, > int target, > + struct numa_guest_info *numainfo, > const char *image_name) > { > char *image; > @@ -389,7 +395,8 @@ int xc_hvm_build_target_mem(int xc_handle, > ((image = xc_read_image(image_name, &image_size)) == NULL) ) > return -1; > > - sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, image, > image_size); > + sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, > + numainfo, image, image_size); > > free(image); > > @@ -402,6 +409,7 @@ int xc_hvm_build_target_mem(int xc_handle, > int xc_hvm_build_mem(int xc_handle, > uint32_t domid, > int memsize, > + struct numa_guest_info *numainfo, > const char *image_buffer, > unsigned long image_size) > { > @@ -425,7 +433,7 @@ int xc_hvm_build_mem(int xc_handle, > } > > sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, > - img, img_len); > + numainfo, img, img_len); > > /* xc_inflate_buffer may return the original buffer pointer (for > for already inflated buffers), so exercise some care in freeing */ > diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h > index 851f769..110b0c9 100644 > --- a/tools/libxc/xenguest.h > +++ b/tools/libxc/xenguest.h > @@ -62,6 +62,13 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t > dom, > unsigned int console_evtchn, unsigned long > *console_mfn, > unsigned int hvm, unsigned int pae, int superpages); > > +struct numa_guest_info { > + int num_nodes; > + int *guest_to_host_node; > + int *vcpu_to_node; > + uint64_t *node_mem; > +}; > + > /** > * This function will create a domain for a paravirtualized Linux > * using file names pointing to kernel and ramdisk > @@ -143,17 +150,20 @@ int xc_linux_build_mem(int xc_handle, > int xc_hvm_build(int xc_handle, > uint32_t domid, > int memsize, > + struct numa_guest_info *numa_info, > const char *image_name); > > int xc_hvm_build_target_mem(int xc_handle, > uint32_t domid, > int memsize, > int target, > + struct numa_guest_info *numa_info, > const char *image_name); > > int xc_hvm_build_mem(int xc_handle, > uint32_t domid, > int memsize, > + struct numa_guest_info *numa_info, > const char *image_buffer, > unsigned long image_size); > > diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c > index 457001c..15d1b71 100644 > --- a/tools/libxc/xg_private.c > +++ b/tools/libxc/xg_private.c > @@ -177,6 +177,7 @@ __attribute__((weak)) > int xc_hvm_build(int xc_handle, > uint32_t domid, > int memsize, > + struct numa_guest_info *numainfo, > const char *image_name) > { > errno = ENOSYS; > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index 7196aa8..154253a 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -168,7 +168,7 @@ int build_hvm(struct libxl_ctx *ctx, uint32_t domid, > { > int ret; > > - ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - > info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, > info->kernel); > + ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - > info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, > NULL, info->kernel); > if (ret) { > XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, ret, "hvm building failed"); > return ERROR_FAIL; > diff --git a/tools/python/xen/lowlevel/xc/xc.c > b/tools/python/xen/lowlevel/xc/xc.c > index 1932758..ecd7800 100644 > --- a/tools/python/xen/lowlevel/xc/xc.c > +++ b/tools/python/xen/lowlevel/xc/xc.c > @@ -915,15 +915,19 @@ static PyObject *pyxc_hvm_build(XcObject *self, > int i; > char *image; > int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1; > + int numanodes; > + struct numa_guest_info numainfo; > PyObject *vcpu_avail_handle = NULL; > + PyObject *nodemem_handle = NULL; > uint8_t vcpu_avail[HVM_MAX_VCPUS/8]; > > - static char *kwd_list[] = { "domid", > - "memsize", "image", "target", "vcpus", > - "vcpu_avail", "acpi", "apic", NULL }; > - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOii", kwd_list, > + static char *kwd_list[] = { "domid", "memsize", "image", "target", > + "vcpus", "vcpu_avail", "acpi", "apic", > + "nodes", "nodemem", NULL }; > + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOiiiO", kwd_list, > &dom, &memsize, &image, &target, > &vcpus, > - &vcpu_avail_handle, &acpi, &apic) ) > + &vcpu_avail_handle, &acpi, &apic, > + &numanodes, &nodemem_handle) ) > return NULL; > > memset(vcpu_avail, 0, sizeof(vcpu_avail)); > @@ -954,8 +958,50 @@ static PyObject *pyxc_hvm_build(XcObject *self, > if ( target == -1 ) > target = memsize; > > - if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, > - target, image) != 0 ) > + numainfo.num_nodes = numanodes; > + numainfo.guest_to_host_node = malloc(numanodes * sizeof(int)); > + numainfo.vcpu_to_node = malloc(vcpus * sizeof(int)); > + numainfo.node_mem = malloc(numanodes * sizeof(uint64_t)); > + if (numanodes > 1) > + { > + int vcpu_per_node, remainder; > + int n; > + vcpu_per_node = vcpus / numanodes; > + remainder = vcpus % numanodes; > + n = remainder * (vcpu_per_node + 1); > + for (i = 0; i < vcpus; i++) I don't know the coding style, but you seem to have two different version of it. Here you do the 'for (i= ..') > + { > + if (i < n) > + numainfo.vcpu_to_node[i] = i / (vcpu_per_node + 1); > + else > + numainfo.vcpu_to_node[i] = remainder + (i - n) / > vcpu_per_node; > + } > + for (i = 0; i < numanodes; i++) > + numainfo.guest_to_host_node[i] = i % 2; > + if ( nodemem_handle != NULL && PyList_Check(nodemem_handle) ) > + { > + PyObject *item; > + for ( i = 0; i < numanodes; i++) and here it is ' for ( i = 0 ..')'. I've failed in the past to find the coding style so I am not exactly sure which one is correct. > + { > + item = PyList_GetItem(nodemem_handle, i); > + numainfo.node_mem[i] = PyInt_AsLong(item); > + fprintf(stderr, "NUMA: node %i has %lu kB\n", i, > + numainfo.node_mem[i]); There isn't any other way to print this out? Can't you use xc_dom_printf routines? > + } > + } else { > + unsigned int mempernode; > + if ( nodemem_handle != NULL && PyInt_Check(nodemem_handle) ) > + mempernode = PyInt_AsLong(nodemem_handle); > + else > + mempernode = (memsize << 10) / numanodes; > + fprintf(stderr, "NUMA: setting all nodes to %i KB\n", > mempernode); dittoo.. > + for (i = 0; i < numanodes; i++) dittoo for the coding guideline. > + numainfo.node_mem[i] = mempernode; > + } > + } > + > + if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target, > + &numainfo, image) != 0 ) and I guess here too > return pyxc_error_to_exception(); > > #if !defined(__ia64__) > @@ -970,6 +1016,15 @@ static PyObject *pyxc_hvm_build(XcObject *self, > va_hvm->apic_mode = apic; > va_hvm->nr_vcpus = vcpus; > memcpy(va_hvm->vcpu_online, vcpu_avail, sizeof(vcpu_avail)); > + > + va_hvm->num_nodes = numanodes; > + if (numanodes > 0) { > + for (i = 0; i < vcpus; i++) > + va_hvm->vcpu_to_node[i] = numainfo.vcpu_to_node[i]; > + for (i = 0; i < numanodes; i++) > + va_hvm->node_mem[i] = numainfo.node_mem[i] >> 10; Would it make sense to have 10 be #defined somewhere? > + } > + > for ( i = 0, sum = 0; i < va_hvm->length; i++ ) > sum += ((uint8_t *)va_hvm)[i]; > va_hvm->checksum -= sum; > @@ -1174,7 +1229,7 @@ static PyObject *pyxc_physinfo(XcObject *self) > nr_nodes++; > } > > - ret_obj = > Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}", > + ret_obj = > Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}", what changed in that line? My eyes don't seem to be able to find the difference. > "nr_nodes", nr_nodes, > "max_node_id", info.max_node_id, > "max_cpu_id", info.max_cpu_id, > diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py > index f3b2cc5..f06d6e2 100644 > --- a/tools/python/xen/xend/image.py > +++ b/tools/python/xen/xend/image.py > @@ -961,7 +961,8 @@ class HVMImageHandler(ImageHandler): > vcpus = self.vm.getVCpuCount(), > vcpu_avail = self.vm.getVCpuAvail(), > acpi = self.acpi, > - apic = self.apic) > + apic = self.apic, > + nodes = 0) > rc['notes'] = { 'SUSPEND_CANCEL': 1 } > > rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(), > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |