[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [v3][PATCH 13/16] tools/libxl: detect and avoid conflicts with RDM
> From: Chen, Tiejun > Sent: Thursday, June 11, 2015 9:15 AM > > While building a VM, HVM domain builder provides struct hvm_info_table{} > to help hvmloader. Currently it includes two fields to construct guest > e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should > check them to fix any conflict with RAM. > > RMRR can reside in address space beyond 4G theoretically, but we never > see this in real world. So in order to avoid breaking highmem layout > we don't solve highmem conflict. Note this means highmem rmrr could still > be supported if no conflict. > > But in the case of lowmem, RMRR probably scatter the whole RAM space. > Especially multiple RMRR entries would worsen this to lead a complicated > memory layout. And then its hard to extend hvm_info_table{} to work > hvmloader out. So here we're trying to figure out a simple solution to > avoid breaking existing layout. So when a conflict occurs, > > #1. Above a predefined boundary (default 2G) > - move lowmem_end below reserved region to solve conflict; > > #2. Below a predefined boundary (default 2G) > - Check strict/relaxed policy. > "strict" policy leads to fail libxl. Note when both policies > are specified on a given region, 'strict' is always preferred. > "relaxed" policy issue a warning message and also mask this entry > INVALID > to indicate we shouldn't expose this entry to hvmloader. > > Note this predefined boundary can be changes with the parameter > "rdm_mem_boundary" in .cfg file. > > Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx> Reviewed-by: Kevin Tian <kevint.tian@xxxxxxxxx> One comment though. could you be consistent to use RDM in the code? RMRR is just an example of RDM... > --- > docs/man/xl.cfg.pod.5 | 21 ++++ > tools/libxc/xc_hvm_build_x86.c | 5 +- > tools/libxl/libxl.h | 6 + > tools/libxl/libxl_create.c | 6 +- > tools/libxl/libxl_dm.c | 255 > +++++++++++++++++++++++++++++++++++++++++ > tools/libxl/libxl_dom.c | 11 +- > tools/libxl/libxl_internal.h | 11 +- > tools/libxl/libxl_types.idl | 8 ++ > tools/libxl/xl_cmdimpl.c | 3 + > 9 files changed, 322 insertions(+), 4 deletions(-) > > diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 > index 638b350..6fd2370 100644 > --- a/docs/man/xl.cfg.pod.5 > +++ b/docs/man/xl.cfg.pod.5 > @@ -767,6 +767,27 @@ to a given device, and "strict" is default here. > > Note this would override global B<rdm> option. > > +=item B<rdm_mem_boundary=MBYTES> > + > +Number of megabytes to set a boundary for checking rdm conflict. > + > +When RDM conflicts with RAM, RDM probably scatter the whole RAM space. > +Especially multiple RMRR entries would worsen this to lead a complicated > +memory layout. So here we're trying to figure out a simple solution to > +avoid breaking existing layout. So when a conflict occurs, > + > + #1. Above a predefined boundary > + - move lowmem_end below reserved region to solve conflict; > + > + #2. Below a predefined boundary > + - Check strict/relaxed policy. > + "strict" policy leads to fail libxl. Note when both policies > + are specified on a given region, 'strict' is always preferred. > + "relaxed" policy issue a warning message and also mask this entry > INVALID > + to indicate we shouldn't expose this entry to hvmloader. > + > +Here the default is 2G. > + > =back > > =back > diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c > index 0e98c84..5142578 100644 > --- a/tools/libxc/xc_hvm_build_x86.c > +++ b/tools/libxc/xc_hvm_build_x86.c > @@ -21,6 +21,7 @@ > #include <stdlib.h> > #include <unistd.h> > #include <zlib.h> > +#include <assert.h> > > #include "xg_private.h" > #include "xc_private.h" > @@ -270,7 +271,7 @@ static int setup_guest(xc_interface *xch, > > elf_parse_binary(&elf); > v_start = 0; > - v_end = args->mem_size; > + v_end = args->lowmem_end; > > if ( nr_pages > target_pages ) > memflags |= XENMEMF_populate_on_demand; > @@ -754,6 +755,8 @@ int xc_hvm_build_target_mem(xc_interface *xch, > args.mem_size = (uint64_t)memsize << 20; > args.mem_target = (uint64_t)target << 20; > args.image_file_name = image_name; > + if ( args.mmio_size == 0 ) > + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > > return xc_hvm_build(xch, domid, &args); > } > diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h > index 0a7913b..a6212fb 100644 > --- a/tools/libxl/libxl.h > +++ b/tools/libxl/libxl.h > @@ -858,6 +858,12 @@ const char *libxl_defbool_to_string(libxl_defbool b); > #define LIBXL_TIMER_MODE_DEFAULT -1 > #define LIBXL_MEMKB_DEFAULT ~0ULL > > +/* > + * We'd like to set a memory boundary to determine if we need to check > + * any overlap with reserved device memory. > + */ > +#define LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT (2048 * 1024) > + > #define LIBXL_MS_VM_GENID_LEN 16 > typedef struct { > uint8_t bytes[LIBXL_MS_VM_GENID_LEN]; > diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c > index 6c8ec63..0438731 100644 > --- a/tools/libxl/libxl_create.c > +++ b/tools/libxl/libxl_create.c > @@ -109,6 +109,10 @@ void libxl__rdm_setdefault(libxl__gc *gc, > libxl_domain_build_info > *b_info) > { > if (b_info->rdm.reserve == LIBXL_RDM_RESERVE_FLAG_INVALID) > b_info->rdm.reserve = LIBXL_RDM_RESERVE_FLAG_RELAXED; > + > + if (b_info->rdm_mem_boundary_memkb == LIBXL_MEMKB_DEFAULT) > + b_info->rdm_mem_boundary_memkb = > + LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT; > } > > int libxl__domain_build_info_setdefault(libxl__gc *gc, > @@ -460,7 +464,7 @@ int libxl__domain_build(libxl__gc *gc, > > switch (info->type) { > case LIBXL_DOMAIN_TYPE_HVM: > - ret = libxl__build_hvm(gc, domid, info, state); > + ret = libxl__build_hvm(gc, domid, d_config, state); > if (ret) > goto out; > > diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c > index 33f9ce6..d908350 100644 > --- a/tools/libxl/libxl_dm.c > +++ b/tools/libxl/libxl_dm.c > @@ -90,6 +90,261 @@ const char *libxl__domain_device_model(libxl__gc *gc, > return dm; > } > > +static struct xen_reserved_device_memory > +*xc_device_get_rdm(libxl__gc *gc, > + uint32_t flag, > + uint16_t seg, > + uint8_t bus, > + uint8_t devfn, > + unsigned int *nr_entries) > +{ > + struct xen_reserved_device_memory *xrdm; > + int rc; > + > + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, > + NULL, nr_entries); > + assert(rc <= 0); > + /* "0" means we have no any rdm entry. */ > + if (!rc) > + goto out; > + > + if (errno == ENOBUFS) { > + xrdm = malloc(*nr_entries * sizeof(xen_reserved_device_memory_t)); > + if (!xrdm) { > + LOG(ERROR, "Could not allocate RDM buffer!\n"); > + goto out; > + } > + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, > + xrdm, nr_entries); > + if (rc) { > + LOG(ERROR, "Could not get reserved device memory maps.\n"); > + *nr_entries = 0; > + free(xrdm); > + xrdm = NULL; > + } > + } else > + LOG(ERROR, "Could not get reserved device memory maps.\n"); > + > + out: > + return xrdm; > +} > + > +/* > + * Check whether there exists rdm hole in the specified memory range. > + * Returns true if exists, else returns false. > + */ > +static bool overlaps_rdm(uint64_t start, uint64_t memsize, > + uint64_t rdm_start, uint64_t rdm_size) > +{ > + return (start + memsize > rdm_start) && (start < rdm_start + rdm_size); > +} > + > +/* > + * Check reported RDM regions and handle potential gfn conflicts according > + * to user preferred policy. > + * > + * RMRR can reside in address space beyond 4G theoretically, but we never > + * see this in real world. So in order to avoid breaking highmem layout > + * we don't solve highmem conflict. Note this means highmem rmrr could still > + * be supported if no conflict. > + * > + * But in the case of lowmem, RMRR probably scatter the whole RAM space. > + * Especially multiple RMRR entries would worsen this to lead a complicated > + * memory layout. And then its hard to extend hvm_info_table{} to work > + * hvmloader out. So here we're trying to figure out a simple solution to > + * avoid breaking existing layout. So when a conflict occurs, > + * > + * #1. Above a predefined boundary (default 2G) > + * - Move lowmem_end below reserved region to solve conflict; > + * > + * #2. Below a predefined boundary (default 2G) > + * - Check strict/relaxed policy. > + * "strict" policy leads to fail libxl. Note when both policies > + * are specified on a given region, 'strict' is always preferred. > + * "relaxed" policy issue a warning message and also mask this entry > + * INVALID to indicate we shouldn't expose this entry to hvmloader. > + */ > +int libxl__domain_device_construct_rdm(libxl__gc *gc, > + libxl_domain_config *d_config, > + uint64_t rdm_mem_boundary, > + struct xc_hvm_build_args *args) > +{ > + int i, j, conflict; > + struct xen_reserved_device_memory *xrdm = NULL; > + uint32_t type = d_config->b_info.rdm.type; > + uint16_t seg; > + uint8_t bus, devfn; > + uint64_t rdm_start, rdm_size; > + uint64_t highmem_end = args->highmem_end ? args->highmem_end : > (1ull<<32); > + > + /* Might not expose rdm. */ > + if (type == LIBXL_RDM_RESERVE_TYPE_NONE && !d_config->num_pcidevs) > + return 0; > + > + /* Query all RDM entries in this platform */ > + if (type == LIBXL_RDM_RESERVE_TYPE_HOST) { > + unsigned int nr_entries; > + > + /* Collect all rdm info if exist. */ > + xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL, > + 0, 0, 0, &nr_entries); > + if (!nr_entries) > + return 0; > + > + assert(xrdm); > + > + d_config->num_rdms = nr_entries; > + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, > + d_config->num_rdms * > sizeof(libxl_device_rdm)); > + > + for (i = 0; i < d_config->num_rdms; i++) { > + d_config->rdms[i].start = > + (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT; > + d_config->rdms[i].size = > + (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT; > + d_config->rdms[i].flag = d_config->b_info.rdm.reserve; > + } > + > + free(xrdm); > + } else > + d_config->num_rdms = 0; > + > + /* Query RDM entries per-device */ > + for (i = 0; i < d_config->num_pcidevs; i++) { > + unsigned int nr_entries; > + bool new = true; > + > + seg = d_config->pcidevs[i].domain; > + bus = d_config->pcidevs[i].bus; > + devfn = PCI_DEVFN(d_config->pcidevs[i].dev, > d_config->pcidevs[i].func); > + nr_entries = 0; > + xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL, > + seg, bus, devfn, &nr_entries); > + /* No RDM to associated with this device. */ > + if (!nr_entries) > + continue; > + > + assert(xrdm); > + > + /* > + * Need to check whether this entry is already saved in the array. > + * This could come from two cases: > + * > + * - user may configure to get all RMRRs in this platform, which > + * is already queried before this point > + * - or two assigned devices may share one RMRR entry > + * > + * different policies may be configured on the same RMRR due to above > + * two cases. We choose a simple policy to always favor stricter > policy > + */ > + for (j = 0; j < d_config->num_rdms; j++) { > + if (d_config->rdms[j].start == > + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT) > + { > + if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT) > + d_config->rdms[j].flag = > d_config->pcidevs[i].rdm_reserve; > + new = false; > + break; > + } > + } > + > + if (new) { > + d_config->num_rdms++; > + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, > + d_config->num_rdms * > sizeof(libxl_device_rdm)); > + > + d_config->rdms[d_config->num_rdms - 1].start = > + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT; > + d_config->rdms[d_config->num_rdms - 1].size = > + (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT; > + d_config->rdms[d_config->num_rdms - 1].flag = > + d_config->pcidevs[i].rdm_reserve; > + } > + free(xrdm); > + } > + > + /* > + * Next step is to check and avoid potential conflict between RDM entries > + * and guest RAM. To avoid intrusive impact to existing memory layout > + * {lowmem, mmio, highmem} which is passed around various function > blocks, > + * below conflicts are not handled which are rare and handling them would > + * lead to a more scattered layout: > + * - RMRR in highmem area (>4G) > + * - RMRR lower than a defined memory boundary (e.g. 2G) > + * Otherwise for conflicts between boundary and 4G, we'll simply move > lowmem > + * end below reserved region to solve conflict. > + * > + * If a conflict is detected on a given RMRR entry, an error will be > + * returned if 'strict' policy is specified. Instead, if 'relaxed' policy > + * specified, this conflict is treated just as a warning, but we mark > this > + * RMRR entry as INVALID to indicate that this entry shouldn't be exposed > + * to hvmloader. > + * > + * Firstly we should check the case of rdm < 4G because we may need to > + * expand highmem_end. > + */ > + for (i = 0; i < d_config->num_rdms; i++) { > + rdm_start = d_config->rdms[i].start; > + rdm_size = d_config->rdms[i].size; > + conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size); > + > + if (!conflict) > + continue; > + > + /* Just check if RDM > our memory boundary. */ > + if (rdm_start > rdm_mem_boundary) { > + /* > + * We will move downwards lowmem_end so we have to expand > + * highmem_end. > + */ > + highmem_end += (args->lowmem_end - rdm_start); > + /* Now move downwards lowmem_end. */ > + args->lowmem_end = rdm_start; > + } > + } > + > + /* Sync highmem_end. */ > + args->highmem_end = highmem_end; > + > + /* > + * Finally we can take same policy to check lowmem(< 2G) and > + * highmem adjusted above. > + */ > + for (i = 0; i < d_config->num_rdms; i++) { > + rdm_start = d_config->rdms[i].start; > + rdm_size = d_config->rdms[i].size; > + /* Does this entry conflict with lowmem? */ > + conflict = overlaps_rdm(0, args->lowmem_end, > + rdm_start, rdm_size); > + /* Does this entry conflict with highmem? */ > + conflict |= overlaps_rdm((1ULL<<32), > + args->highmem_end - (1ULL<<32), > + rdm_start, rdm_size); > + > + if (!conflict) > + continue; > + > + if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) { > + LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start); > + goto out; > + } else { > + LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n", > + d_config->rdms[i].start); > + > + /* > + * Then mask this INVALID to indicate we shouldn't expose this > + * to hvmloader. > + */ > + d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID; > + } > + } > + > + return 0; > + > + out: > + return ERROR_FAIL; > +} > + > const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) > { > const libxl_vnc_info *vnc = NULL; > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index 867172a..1777b32 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -914,13 +914,14 @@ out: > } > > int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > - libxl_domain_build_info *info, > + libxl_domain_config *d_config, > libxl__domain_build_state *state) > { > libxl_ctx *ctx = libxl__gc_owner(gc); > struct xc_hvm_build_args args = {}; > int ret, rc = ERROR_FAIL; > uint64_t mmio_start, lowmem_end, highmem_end; > + libxl_domain_build_info *const info = &d_config->b_info; > > memset(&args, 0, sizeof(struct xc_hvm_build_args)); > /* The params from the configuration file are in Mb, which are then > @@ -958,6 +959,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > args.highmem_end = highmem_end; > args.mmio_start = mmio_start; > > + ret = libxl__domain_device_construct_rdm(gc, d_config, > + > info->rdm_mem_boundary_memkb*1024, > + &args); > + if (ret) { > + LOG(ERROR, "checking reserved device memory failed"); > + goto out; > + } > + > if (info->num_vnuma_nodes != 0) { > int i; > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index e9ac886..52f3831 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -1011,7 +1011,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t > domid, > _hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid, > libxl_domain_build_info *info, libxl__domain_build_state > *state); > _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > - libxl_domain_build_info *info, > + libxl_domain_config *d_config, > libxl__domain_build_state *state); > > _hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid, > @@ -1519,6 +1519,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, > int nr_channels, libxl_device_channel *channels); > > /* > + * This function will fix reserved device memory conflict > + * according to user's configuration. > + */ > +_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc, > + libxl_domain_config *d_config, > + uint64_t rdm_mem_guard, > + struct xc_hvm_build_args *args); > + > +/* > * This function will cause the whole libxl process to hang > * if the device model does not respond. It is deprecated. > * > diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > index 4dfcaf7..b4282a0 100644 > --- a/tools/libxl/libxl_types.idl > +++ b/tools/libxl/libxl_types.idl > @@ -395,6 +395,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ > ("target_memkb", MemKB), > ("video_memkb", MemKB), > ("shadow_memkb", MemKB), > + ("rdm_mem_boundary_memkb", MemKB), > ("rtc_timeoffset", uint32), > ("exec_ssidref", uint32), > ("exec_ssid_label", string), > @@ -559,6 +560,12 @@ libxl_device_pci = Struct("device_pci", [ > ("rdm_reserve", libxl_rdm_reserve_flag), > ]) > > +libxl_device_rdm = Struct("device_rdm", [ > + ("start", uint64), > + ("size", uint64), > + ("flag", libxl_rdm_reserve_flag), > + ]) > + > libxl_device_dtdev = Struct("device_dtdev", [ > ("path", string), > ]) > @@ -589,6 +596,7 @@ libxl_domain_config = Struct("domain_config", [ > ("disks", Array(libxl_device_disk, "num_disks")), > ("nics", Array(libxl_device_nic, "num_nics")), > ("pcidevs", Array(libxl_device_pci, "num_pcidevs")), > + ("rdms", Array(libxl_device_rdm, "num_rdms")), > ("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")), > ("vfbs", Array(libxl_device_vfb, "num_vfbs")), > ("vkbs", Array(libxl_device_vkb, "num_vkbs")), > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index 4364ba4..85d74fd 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -1374,6 +1374,9 @@ static void parse_config_data(const char *config_source, > if (!xlu_cfg_get_long (config, "videoram", &l, 0)) > b_info->video_memkb = l * 1024; > > + if (!xlu_cfg_get_long (config, "rdm_mem_boundary", &l, 0)) > + b_info->rdm_mem_boundary_memkb = l * 1024; > + > if (!xlu_cfg_get_long(config, "max_event_channels", &l, 0)) > b_info->event_channels = l; > > -- > 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |