[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [v4][PATCH 14/19] tools/libxl: detect and avoid conflicts with RDM
While building a VM, HVM domain builder provides struct hvm_info_table{} to help hvmloader. Currently it includes two fields to construct guest e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should check them to fix any conflict with RAM. RMRR can reside in address space beyond 4G theoretically, but we never see this in real world. So in order to avoid breaking highmem layout we don't solve highmem conflict. Note this means highmem rmrr could still be supported if no conflict. But in the case of lowmem, RMRR probably scatter the whole RAM space. Especially multiple RMRR entries would worsen this to lead a complicated memory layout. And then its hard to extend hvm_info_table{} to work hvmloader out. So here we're trying to figure out a simple solution to avoid breaking existing layout. So when a conflict occurs, #1. Above a predefined boundary (2G) - move lowmem_end below reserved region to solve conflict; #2. Below a predefined boundary (2G) - Check strict/relaxed policy. "strict" policy leads to fail libxl. Note when both policies are specified on a given region, 'strict' is always preferred. "relaxed" policy issue a warning message and also mask this entry INVALID to indicate we shouldn't expose this entry to hvmloader. Note later we need to provide a parameter to set that predefined boundary dynamically. CC: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> CC: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> CC: Ian Campbell <ian.campbell@xxxxxxxxxx> CC: Wei Liu <wei.liu2@xxxxxxxxxx> Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx> Reviewed-by: Kevin Tian <kevint.tian@xxxxxxxxx> --- v4: * Consistent to use term "RDM". * Unconditionally set *nr_entries to 0 * Grab to all sutffs to provide a parameter to set our predefined boundary dynamically to as a separated patch later tools/libxl/libxl_create.c | 2 +- tools/libxl/libxl_dm.c | 259 +++++++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_dom.c | 17 ++- tools/libxl/libxl_internal.h | 11 +- tools/libxl/libxl_types.idl | 7 ++ 5 files changed, 293 insertions(+), 3 deletions(-) diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 6c8ec63..30e6593 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -460,7 +460,7 @@ int libxl__domain_build(libxl__gc *gc, switch (info->type) { case LIBXL_DOMAIN_TYPE_HVM: - ret = libxl__build_hvm(gc, domid, info, state); + ret = libxl__build_hvm(gc, domid, d_config, state); if (ret) goto out; diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index 33f9ce6..5436bcf 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -90,6 +90,265 @@ const char *libxl__domain_device_model(libxl__gc *gc, return dm; } +static struct xen_reserved_device_memory +*xc_device_get_rdm(libxl__gc *gc, + uint32_t flag, + uint16_t seg, + uint8_t bus, + uint8_t devfn, + unsigned int *nr_entries) +{ + struct xen_reserved_device_memory *xrdm; + int rc; + + /* + * We really can't presume how many entries we can get in advance. + */ + *nr_entries = 0; + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, + NULL, nr_entries); + assert(rc <= 0); + /* "0" means we have no any rdm entry. */ + if (!rc) + goto out; + + if (errno == ENOBUFS) { + xrdm = malloc(*nr_entries * sizeof(xen_reserved_device_memory_t)); + if (!xrdm) { + LOG(ERROR, "Could not allocate RDM buffer!\n"); + goto out; + } + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, + xrdm, nr_entries); + if (rc) { + LOG(ERROR, "Could not get reserved device memory maps.\n"); + *nr_entries = 0; + free(xrdm); + xrdm = NULL; + } + } else + LOG(ERROR, "Could not get reserved device memory maps.\n"); + + out: + return xrdm; +} + +/* + * Check whether there exists rdm hole in the specified memory range. + * Returns true if exists, else returns false. + */ +static bool overlaps_rdm(uint64_t start, uint64_t memsize, + uint64_t rdm_start, uint64_t rdm_size) +{ + return (start + memsize > rdm_start) && (start < rdm_start + rdm_size); +} + +/* + * Check reported RDM regions and handle potential gfn conflicts according + * to user preferred policy. + * + * RDM can reside in address space beyond 4G theoretically, but we never + * see this in real world. So in order to avoid breaking highmem layout + * we don't solve highmem conflict. Note this means highmem rmrr could still + * be supported if no conflict. + * + * But in the case of lowmem, RDM probably scatter the whole RAM space. + * Especially multiple RDM entries would worsen this to lead a complicated + * memory layout. And then its hard to extend hvm_info_table{} to work + * hvmloader out. So here we're trying to figure out a simple solution to + * avoid breaking existing layout. So when a conflict occurs, + * + * #1. Above a predefined boundary (default 2G) + * - Move lowmem_end below reserved region to solve conflict; + * + * #2. Below a predefined boundary (default 2G) + * - Check strict/relaxed policy. + * "strict" policy leads to fail libxl. Note when both policies + * are specified on a given region, 'strict' is always preferred. + * "relaxed" policy issue a warning message and also mask this entry + * INVALID to indicate we shouldn't expose this entry to hvmloader. + */ +int libxl__domain_device_construct_rdm(libxl__gc *gc, + libxl_domain_config *d_config, + uint64_t rdm_mem_boundary, + struct xc_hvm_build_args *args) +{ + int i, j, conflict; + struct xen_reserved_device_memory *xrdm = NULL; + uint32_t type = d_config->b_info.rdm.type; + uint16_t seg; + uint8_t bus, devfn; + uint64_t rdm_start, rdm_size; + uint64_t highmem_end = args->highmem_end ? args->highmem_end : (1ull<<32); + + /* Might not expose rdm. */ + if (type == LIBXL_RDM_RESERVE_TYPE_NONE && !d_config->num_pcidevs) + return 0; + + /* Query all RDM entries in this platform */ + if (type == LIBXL_RDM_RESERVE_TYPE_HOST) { + unsigned int nr_entries; + + /* Collect all rdm info if exist. */ + xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL, + 0, 0, 0, &nr_entries); + if (!nr_entries) + return 0; + + assert(xrdm); + + d_config->num_rdms = nr_entries; + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, + d_config->num_rdms * sizeof(libxl_device_rdm)); + + for (i = 0; i < d_config->num_rdms; i++) { + d_config->rdms[i].start = + (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT; + d_config->rdms[i].size = + (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT; + d_config->rdms[i].flag = d_config->b_info.rdm.reserve; + } + + free(xrdm); + } else + d_config->num_rdms = 0; + + /* Query RDM entries per-device */ + for (i = 0; i < d_config->num_pcidevs; i++) { + unsigned int nr_entries; + bool new = true; + + seg = d_config->pcidevs[i].domain; + bus = d_config->pcidevs[i].bus; + devfn = PCI_DEVFN(d_config->pcidevs[i].dev, d_config->pcidevs[i].func); + nr_entries = 0; + xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL, + seg, bus, devfn, &nr_entries); + /* No RDM to associated with this device. */ + if (!nr_entries) + continue; + + assert(xrdm); + + /* + * Need to check whether this entry is already saved in the array. + * This could come from two cases: + * + * - user may configure to get all RDMs in this platform, which + * is already queried before this point + * - or two assigned devices may share one RDM entry + * + * different policies may be configured on the same RDM due to above + * two cases. We choose a simple policy to always favor stricter policy + */ + for (j = 0; j < d_config->num_rdms; j++) { + if (d_config->rdms[j].start == + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT) + { + if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT) + d_config->rdms[j].flag = d_config->pcidevs[i].rdm_reserve; + new = false; + break; + } + } + + if (new) { + d_config->num_rdms++; + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, + d_config->num_rdms * sizeof(libxl_device_rdm)); + + d_config->rdms[d_config->num_rdms - 1].start = + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT; + d_config->rdms[d_config->num_rdms - 1].size = + (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT; + d_config->rdms[d_config->num_rdms - 1].flag = + d_config->pcidevs[i].rdm_reserve; + } + free(xrdm); + } + + /* + * Next step is to check and avoid potential conflict between RDM entries + * and guest RAM. To avoid intrusive impact to existing memory layout + * {lowmem, mmio, highmem} which is passed around various function blocks, + * below conflicts are not handled which are rare and handling them would + * lead to a more scattered layout: + * - RDM in highmem area (>4G) + * - RDM lower than a defined memory boundary (e.g. 2G) + * Otherwise for conflicts between boundary and 4G, we'll simply move lowmem + * end below reserved region to solve conflict. + * + * If a conflict is detected on a given RDM entry, an error will be + * returned if 'strict' policy is specified. Instead, if 'relaxed' policy + * specified, this conflict is treated just as a warning, but we mark this + * RDM entry as INVALID to indicate that this entry shouldn't be exposed + * to hvmloader. + * + * Firstly we should check the case of rdm < 4G because we may need to + * expand highmem_end. + */ + for (i = 0; i < d_config->num_rdms; i++) { + rdm_start = d_config->rdms[i].start; + rdm_size = d_config->rdms[i].size; + conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size); + + if (!conflict) + continue; + + /* Just check if RDM > our memory boundary. */ + if (rdm_start > rdm_mem_boundary) { + /* + * We will move downwards lowmem_end so we have to expand + * highmem_end. + */ + highmem_end += (args->lowmem_end - rdm_start); + /* Now move downwards lowmem_end. */ + args->lowmem_end = rdm_start; + } + } + + /* Sync highmem_end. */ + args->highmem_end = highmem_end; + + /* + * Finally we can take same policy to check lowmem(< 2G) and + * highmem adjusted above. + */ + for (i = 0; i < d_config->num_rdms; i++) { + rdm_start = d_config->rdms[i].start; + rdm_size = d_config->rdms[i].size; + /* Does this entry conflict with lowmem? */ + conflict = overlaps_rdm(0, args->lowmem_end, + rdm_start, rdm_size); + /* Does this entry conflict with highmem? */ + conflict |= overlaps_rdm((1ULL<<32), + args->highmem_end - (1ULL<<32), + rdm_start, rdm_size); + + if (!conflict) + continue; + + if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) { + LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start); + goto out; + } else { + LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n", + d_config->rdms[i].start); + + /* + * Then mask this INVALID to indicate we shouldn't expose this + * to hvmloader. + */ + d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID; + } + } + + return 0; + + out: + return ERROR_FAIL; +} + const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) { const libxl_vnc_info *vnc = NULL; diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 600393d..34bd466 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -914,13 +914,20 @@ out: } int libxl__build_hvm(libxl__gc *gc, uint32_t domid, - libxl_domain_build_info *info, + libxl_domain_config *d_config, libxl__domain_build_state *state) { libxl_ctx *ctx = libxl__gc_owner(gc); struct xc_hvm_build_args args = {}; int ret, rc = ERROR_FAIL; uint64_t mmio_start, lowmem_end, highmem_end; + libxl_domain_build_info *const info = &d_config->b_info; + /* + * Currently we fix this as 2G to guarantte how to handle + * our rdm policy. But we'll provide a parameter to set + * this dynamically. + */ + uint64_t rdm_mem_boundary = 0x80000000; memset(&args, 0, sizeof(struct xc_hvm_build_args)); /* The params from the configuration file are in Mb, which are then @@ -958,6 +965,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, args.highmem_end = highmem_end; args.mmio_start = mmio_start; + ret = libxl__domain_device_construct_rdm(gc, d_config, + rdm_mem_boundary, + &args); + if (ret) { + LOG(ERROR, "checking reserved device memory failed"); + goto out; + } + if (info->num_vnuma_nodes != 0) { int i; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index eae1dc6..c0acf11 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1011,7 +1011,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t domid, _hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid, libxl_domain_build_info *info, libxl__domain_build_state *state); _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid, - libxl_domain_build_info *info, + libxl_domain_config *d_config, libxl__domain_build_state *state); _hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid, @@ -1519,6 +1519,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, int nr_channels, libxl_device_channel *channels); /* + * This function will fix reserved device memory conflict + * according to user's configuration. + */ +_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc, + libxl_domain_config *d_config, + uint64_t rdm_mem_guard, + struct xc_hvm_build_args *args); + +/* * This function will cause the whole libxl process to hang * if the device model does not respond. It is deprecated. * diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index dd91b38..5ba075d 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -559,6 +559,12 @@ libxl_device_pci = Struct("device_pci", [ ("rdm_reserve", libxl_rdm_reserve_flag), ]) +libxl_device_rdm = Struct("device_rdm", [ + ("start", uint64), + ("size", uint64), + ("flag", libxl_rdm_reserve_flag), + ]) + libxl_device_dtdev = Struct("device_dtdev", [ ("path", string), ]) @@ -589,6 +595,7 @@ libxl_domain_config = Struct("domain_config", [ ("disks", Array(libxl_device_disk, "num_disks")), ("nics", Array(libxl_device_nic, "num_nics")), ("pcidevs", Array(libxl_device_pci, "num_pcidevs")), + ("rdms", Array(libxl_device_rdm, "num_rdms")), ("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")), ("vfbs", Array(libxl_device_vfb, "num_vfbs")), ("vkbs", Array(libxl_device_vkb, "num_vkbs")), -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |