# HG changeset patch # Parent cfe34cfce34f6be9be7024a0b8534c261a4df9b7 xend/xc: Implement a domain_set_e820_hole function to be used by python code Signed-off-by: Konrad Rzeszutek Wilk diff -r cfe34cfce34f tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Wed Nov 16 16:06:50 2011 -0500 +++ b/tools/python/xen/lowlevel/xc/xc.c Wed Nov 16 17:10:09 2011 -0500 @@ -16,6 +16,7 @@ #include #include #include +#include #include "xenctrl.h" #include @@ -1670,6 +1671,243 @@ static PyObject *pyxc_domain_set_memmap_ return zero; } +#ifdef PRIu64 +static const char *e820_names(int type) +{ + switch (type) { + case E820_RAM: return "RAM"; + case E820_RESERVED: return "Reserved"; + case E820_ACPI: return "ACPI"; + case E820_NVS: return "ACPI NVS"; + case E820_UNUSABLE: return "Unusable"; + default: break; + } + return "Unknown"; +} +#endif +static int e820_sanitize(struct e820entry src[], + uint32_t *nr_entries, + unsigned long map_limitkb, + unsigned long balloon_kb) +{ + uint64_t delta_kb = 0, start = 0, start_kb = 0, last = 0, ram_end; + uint32_t i, idx = 0, nr; + struct e820entry e820[E820MAX]; + + if (!src || !map_limitkb || !balloon_kb || !nr_entries) + return -EINVAL; + + nr = *nr_entries; + if (!nr) + return -EINVAL; + + if (nr > E820MAX) + return -EINVAL; + + /* Weed out anything under 1MB */ + for (i = 0; i < nr; i++) { + if (src[i].addr > 0x100000) + continue; + + src[i].type = 0; + src[i].size = 0; + src[i].addr = -1ULL; + } + + /* Find the lowest and highest entry in E820, skipping over + * undesired entries. */ + start = -1ULL; + last = 0; + for (i = 0; i < nr; i++) { + if ((src[i].type == E820_RAM) || + (src[i].type == E820_UNUSABLE) || + (src[i].type == 0)) + continue; + + start = src[i].addr < start ? src[i].addr : start; + last = src[i].addr + src[i].size > last ? + src[i].addr + src[i].size > last : last; + } + if (start > 1024) + start_kb = start >> 10; + + /* Add the memory RAM region for the guest */ + e820[idx].addr = 0; + e820[idx].size = (uint64_t)map_limitkb << 10; + e820[idx].type = E820_RAM; + + /* .. and trim if neccessary */ + if (start_kb && map_limitkb > start_kb) { + delta_kb = map_limitkb - start_kb; + if (delta_kb) + e820[idx].size -= (uint64_t)(delta_kb << 10); + } + /* Note: We don't touch balloon_kb here. Will add it at the end. */ + ram_end = e820[idx].addr + e820[idx].size; + idx ++; +#ifdef PRIu64 + printf("Memory: %"PRIu64"kB End of RAM: " \ + "0x%"PRIx64" (PFN) Delta: %"PRIu64"kB, PCI start: %"PRIu64"kB " \ + "(0x%"PRIx64" PFN), Balloon %"PRIu64"kB\n", (uint64_t)map_limitkb, + ram_end >> 12, delta_kb, start_kb ,start >> 12, + (uint64_t)balloon_kb); +#endif + + /* This whole code below is to guard against if the Intel IGD is passed into + * the guest. If we don't pass in IGD, this whole code can be ignored. + * + * The reason for this code is that Intel boxes fill their E820 with + * E820_RAM amongst E820_RESERVED and we can't just ditch those E820_RAM. + * That is b/c any "gaps" in the E820 is considered PCI I/O space by + * Linux and it would be utilized by the Intel IGD as I/O space while + * in reality it was an RAM region. + * + * What this means is that we have to walk the E820 and for any region + * that is RAM and below 4GB and above ram_end, needs to change its type + * to E820_UNUSED. We also need to move some of the E820_RAM regions if + * the overlap with ram_end. */ + for (i = 0; i < nr; i++) { + uint64_t end = src[i].addr + src[i].size; + + /* We don't care about E820_UNUSABLE, but we need to + * change the type to zero b/c the loop after this + * sticks E820_UNUSABLE on the guest's E820 but ignores + * the ones with type zero. */ + if ((src[i].type == E820_UNUSABLE) || + /* Any region that is within the "RAM region" can + * be safely ditched. */ + (end < ram_end)) { + src[i].type = 0; + continue; + } + + /* Look only at RAM regions. */ + if (src[i].type != E820_RAM) + continue; + + /* We only care about RAM regions below 4GB. */ + if (src[i].addr >= (1ULL<<32)) + continue; + + /* E820_RAM overlaps with our RAM region. Move it */ + if (src[i].addr < ram_end) { + uint64_t delta; + + src[i].type = E820_UNUSABLE; + delta = ram_end - src[i].addr; + /* The end < ram_end should weed this out */ + if (src[i].size - delta < 0) + src[i].type = 0; + else { + src[i].size -= delta; + src[i].addr = ram_end; + } + if (src[i].addr + src[i].size != end) { + /* We messed up somewhere */ + src[i].type = 0; +#ifdef PRIu64 + printf( "Computed E820 wrongly. Continuing on."); +#endif + } + } + /* Lastly, convert the RAM to UNSUABLE. Look in the Linux kernel + at git commit 2f14ddc3a7146ea4cd5a3d1ecd993f85f2e4f948 + "xen/setup: Inhibit resource API from using System RAM E820 + gaps as PCI mem gaps" for full explanation. */ + if (end > ram_end) + src[i].type = E820_UNUSABLE; + } + + /* Check if there is a region between ram_end and start. */ + if (start > ram_end) { + int add_unusable = 1; + for (i = 0; i < nr && add_unusable; i++) { + if (src[i].type != E820_UNUSABLE) + continue; + if (ram_end != src[i].addr) + continue; + if (start != src[i].addr + src[i].size) { + /* there is one, adjust it */ + src[i].size = start - src[i].addr; + } + add_unusable = 0; + } + /* .. and if not present, add it in. This is to guard against + the Linux guest assuming that the gap between the end of + RAM region and the start of the E820_[ACPI,NVS,RESERVED] + is PCI I/O space. Which it certainly is _not_. */ + if (add_unusable) { + e820[idx].type = E820_UNUSABLE; + e820[idx].addr = ram_end; + e820[idx].size = start - ram_end; + idx++; + } + } + /* Almost done: copy them over, ignoring the undesireable ones */ + for (i = 0; i < nr; i++) { + if ((src[i].type == E820_RAM) || + (src[i].type == 0)) + continue; + + e820[idx].type = src[i].type; + e820[idx].addr = src[i].addr; + e820[idx].size = src[i].size; + idx++; + } + /* At this point we have the mapped RAM + E820 entries from src. */ + if (balloon_kb) { + /* and if we truncated the RAM region, then add it to the end. */ + e820[idx].type = E820_RAM; + e820[idx].addr = (uint64_t)(1ULL << 32) > last ? + (uint64_t)(1ULL << 32) : last; + /* also add the balloon memory to the end. */ + e820[idx].size = (uint64_t)(delta_kb << 10) + + (uint64_t)(balloon_kb << 10); + idx++; + + } + nr = idx; +#ifdef PRIu64 + for (i = 0; i < nr; i++) { + printf(":\t[%"PRIx64" -> %"PRIx64"] %s", + e820[i].addr >> 12, (e820[i].addr + e820[i].size) >> 12, + e820_names(e820[i].type)); + } +#endif + /* Done: copy the sanitized version. */ + *nr_entries = nr; + memcpy(src, e820, nr * sizeof(struct e820entry)); + return 0; +} + +static PyObject *pyxc_domain_set_e820_hole(XcObject *self, PyObject *args) +{ + uint32_t dom, nr; + unsigned int target_kb; + unsigned int balloon_kb; + int rc; + struct e820entry map[E820MAX]; + + if ( !PyArg_ParseTuple(args, "iii", &dom, &target_kb, &balloon_kb) ) + return NULL; + + rc = xc_get_machine_memory_map(self->xc_handle, map, E820MAX); + if (rc < 0) + return pyxc_error_to_exception(self->xc_handle); + + nr = rc; + rc = e820_sanitize(map, &nr, target_kb, balloon_kb); + if (rc) + return pyxc_error_to_exception(self->xc_handle); + + rc = xc_domain_set_memory_map(self->xc_handle, dom, map, nr); + if (rc < 0) + return pyxc_error_to_exception(self->xc_handle); + + Py_INCREF(zero); + return zero; +} + static PyObject *pyxc_domain_ioport_permission(XcObject *self, PyObject *args, PyObject *kwds) @@ -2663,6 +2901,15 @@ static PyMethodDef pyxc_methods[] = { " map_limitkb [int]: .\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "domain_set_e820_hole", + (PyCFunction)pyxc_domain_set_e820_hole, + METH_VARARGS, "\n" + "Set a domain's E820 memory map\n" + " dom [int]: Identifier of domain.\n" + " target_memkb [int]: .\n" + " balloon_kb [int]: .\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + #ifdef __ia64__ { "nvram_init", (PyCFunction)pyxc_nvram_init,