[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC][PATCH 04/13] tools/libxl: detect and avoid conflicts with RDM
Sorry for the late review. This series fell through the crack. On Fri, Apr 10, 2015 at 05:21:55PM +0800, Tiejun Chen wrote: > While building a VM, HVM domain builder provides struct hvm_info_table{} > to help hvmloader. Currently it includes two fields to construct guest > e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should > check them to fix any conflict with RAM. > > RMRR can reside in address space beyond 4G theoretically, but we never > see this in real world. So in order to avoid breaking highmem layout How does this break highmem layout? > we don't solve highmem conflict. Note this means highmem rmrr could still > be supported if no conflict. > Aren't you actively trying to avoid conflict in libxl? > But in the case of lowmem, RMRR probably scatter the whole RAM space. > Especially multiple RMRR entries would worsen this to lead a complicated > memory layout. And then its hard to extend hvm_info_table{} to work > hvmloader out. So here we're trying to figure out a simple solution to > avoid breaking existing layout. So when a conflict occurs, > > #1. Above a predefined boundary (default 2G) > - move lowmem_end below reserved region to solve conflict; > I hope this "predefined boundary" is user tunable. I will check later in this patch if it is the case. > #2 Below a predefined boundary (default 2G) > - Check force/try policy. > "force" policy leads to fail libxl. Note when both policies > are specified on a given region, 'force' is always preferred. > "try" policy issue a warning message and also mask this entry INVALID > to indicate we shouldn't expose this entry to hvmloader. > > Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx> > --- > tools/libxc/include/xenctrl.h | 8 ++ > tools/libxc/include/xenguest.h | 3 +- > tools/libxc/xc_domain.c | 40 +++++++++ > tools/libxc/xc_hvm_build_x86.c | 28 +++--- > tools/libxl/libxl_create.c | 2 +- > tools/libxl/libxl_dm.c | 195 > +++++++++++++++++++++++++++++++++++++++++ > tools/libxl/libxl_dom.c | 27 +++++- > tools/libxl/libxl_internal.h | 11 ++- > tools/libxl/libxl_types.idl | 7 ++ > 9 files changed, 303 insertions(+), 18 deletions(-) > > diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h > index 59bbe06..299b95f 100644 > --- a/tools/libxc/include/xenctrl.h > +++ b/tools/libxc/include/xenctrl.h > @@ -2053,6 +2053,14 @@ int xc_get_device_group(xc_interface *xch, > uint32_t *num_sdevs, > uint32_t *sdev_array); > > +struct xen_reserved_device_memory > +*xc_device_get_rdm(xc_interface *xch, > + uint32_t flag, > + uint16_t seg, > + uint8_t bus, > + uint8_t devfn, > + unsigned int *nr_entries); > + > int xc_test_assign_device(xc_interface *xch, > uint32_t domid, > uint32_t machine_sbdf); > diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h > index 40bbac8..0f1c23b 100644 > --- a/tools/libxc/include/xenguest.h > +++ b/tools/libxc/include/xenguest.h > @@ -218,7 +218,8 @@ struct xc_hvm_firmware_module { > }; > > struct xc_hvm_build_args { > - uint64_t mem_size; /* Memory size in bytes. */ > + uint64_t lowmem_size; /* All low memory size in bytes. */ > + uint64_t mem_size; /* All memory size in bytes. */ Do you find the comment confusing? I think there is no need to change the comment of mem_size. > uint64_t mem_target; /* Memory target in bytes. */ > uint64_t mmio_size; /* Size of the MMIO hole in bytes. */ > const char *image_file_name; /* File name of the image to load. */ > diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c > index 4f8383e..85b18ea 100644 > --- a/tools/libxc/xc_domain.c > +++ b/tools/libxc/xc_domain.c > @@ -1665,6 +1665,46 @@ int xc_assign_device( > return do_domctl(xch, &domctl); > } > > +struct xen_reserved_device_memory > +*xc_device_get_rdm(xc_interface *xch, > + uint32_t flag, > + uint16_t seg, > + uint8_t bus, > + uint8_t devfn, > + unsigned int *nr_entries) > +{ > + struct xen_reserved_device_memory *xrdm = NULL; > + int rc = xc_reserved_device_memory_map(xch, flag, seg, bus, devfn, xrdm, > + nr_entries); > + > + if ( rc < 0 ) > + { > + if ( errno == ENOBUFS ) > + { > + if ( (xrdm = malloc(*nr_entries * > + sizeof(xen_reserved_device_memory_t))) == > NULL ) > + { > + PERROR("Could not allocate memory."); > + goto out; > + } Don't you leak origin xrdm in this case? And, this style is not very good. Shouldn't the caller allocate enough memory before hand? > + rc = xc_reserved_device_memory_map(xch, flag, seg, bus, devfn, > xrdm, > + nr_entries); > + if ( rc ) > + { > + PERROR("Could not get reserved device memory maps."); > + free(xrdm); > + xrdm = NULL; > + } > + } > + else { > + PERROR("Could not get reserved device memory maps."); > + } > + } > + > + out: > + return xrdm; > +} > + > int xc_get_device_group( > xc_interface *xch, > uint32_t domid, > diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c > index c81a25b..3f87bb3 100644 > --- a/tools/libxc/xc_hvm_build_x86.c > +++ b/tools/libxc/xc_hvm_build_x86.c > @@ -89,19 +89,16 @@ static int modules_init(struct xc_hvm_build_args *args, > } > > static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, > - uint64_t mmio_start, uint64_t mmio_size) > + uint64_t lowmem_end) > { > struct hvm_info_table *hvm_info = (struct hvm_info_table *) > (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET); > - uint64_t lowmem_end = mem_size, highmem_end = 0; > + uint64_t highmem_end = 0; > uint8_t sum; > int i; > > - if ( lowmem_end > mmio_start ) > - { > - highmem_end = (1ull<<32) + (lowmem_end - mmio_start); > - lowmem_end = mmio_start; > - } > + if ( mem_size > lowmem_end ) > + highmem_end = (1ull<<32) + (mem_size - lowmem_end); > > memset(hvm_info_page, 0, PAGE_SIZE); > > @@ -252,7 +249,7 @@ static int setup_guest(xc_interface *xch, > void *hvm_info_page; > uint32_t *ident_pt; > struct elf_binary elf; > - uint64_t v_start, v_end; > + uint64_t v_start, v_end, v_lowend, lowmem_end; > uint64_t m_start = 0, m_end = 0; > int rc; > xen_capabilities_info_t caps; > @@ -275,6 +272,7 @@ static int setup_guest(xc_interface *xch, > elf_parse_binary(&elf); > v_start = 0; > v_end = args->mem_size; > + v_lowend = lowmem_end = args->lowmem_size; > > if ( xc_version(xch, XENVER_capabilities, &caps) != 0 ) > { > @@ -302,8 +300,11 @@ static int setup_guest(xc_interface *xch, > > for ( i = 0; i < nr_pages; i++ ) > page_array[i] = i; > - for ( i = mmio_start >> PAGE_SHIFT; i < nr_pages; i++ ) > - page_array[i] += mmio_size >> PAGE_SHIFT; > + /* > + * This condition 'lowmem_end <= mmio_start' is always true. > + */ > + for ( i = lowmem_end >> PAGE_SHIFT; i < nr_pages; i++ ) > + page_array[i] += ((1ull << 32) - lowmem_end) >> PAGE_SHIFT; > > /* > * Try to claim pages for early warning of insufficient memory available. > @@ -469,7 +470,7 @@ static int setup_guest(xc_interface *xch, > xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, > HVM_INFO_PFN)) == NULL ) > goto error_out; > - build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size); > + build_hvm_info(hvm_info_page, v_end, v_lowend); > munmap(hvm_info_page, PAGE_SIZE); > > /* Allocate and clear special pages. */ > @@ -588,9 +589,6 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, > if ( args.mem_target == 0 ) > args.mem_target = args.mem_size; > > - if ( args.mmio_size == 0 ) > - args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > - > /* An HVM guest must be initialised with at least 2MB memory. */ > if ( args.mem_size < (2ull << 20) || args.mem_target < (2ull << 20) ) > return -1; > @@ -634,6 +632,8 @@ int xc_hvm_build_target_mem(xc_interface *xch, > args.mem_size = (uint64_t)memsize << 20; > args.mem_target = (uint64_t)target << 20; > args.image_file_name = image_name; > + if ( args.mmio_size == 0 ) > + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > > return xc_hvm_build(xch, domid, &args); > } > diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c > index 9ed40d4..eab86fd 100644 > --- a/tools/libxl/libxl_create.c > +++ b/tools/libxl/libxl_create.c > @@ -430,7 +430,7 @@ int libxl__domain_build(libxl__gc *gc, > > switch (info->type) { > case LIBXL_DOMAIN_TYPE_HVM: > - ret = libxl__build_hvm(gc, domid, info, state); > + ret = libxl__build_hvm(gc, domid, d_config, state); > if (ret) > goto out; > > diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c > index a8b08f2..9ad04ae 100644 > --- a/tools/libxl/libxl_dm.c > +++ b/tools/libxl/libxl_dm.c > @@ -90,6 +90,201 @@ const char *libxl__domain_device_model(libxl__gc *gc, > return dm; > } > > +/* > + * Check whether there exists rdm hole in the specified memory range. > + * Returns 1 if exists, else returns 0. > + */ > +static int check_rdm_hole(uint64_t start, uint64_t memsize, > + uint64_t rdm_start, uint64_t rdm_size) > +{ > + if ( start + memsize <= rdm_start || start >= rdm_start + rdm_size ) > + return 0; > + else > + return 1; > +} > + > +/* > + * Check reported RDM regions and handle potential gfn conflicts according > + * to user preferred policy. > + */ > +int libxl__domain_device_check_rdm(libxl__gc *gc, > + libxl_domain_config *d_config, > + uint64_t rdm_mem_guard, rdm_mem_boundary > + struct xc_hvm_build_args *args) This function does more than "checking", so a better name is needed. May be you should split this function to one "build" function and one "check" function? What do you think? > +{ > + int i, j, conflict; > + libxl_ctx *ctx = libxl__gc_owner(gc); You can just use CTX macro. > + struct xen_reserved_device_memory *xrdm = NULL; > + unsigned int nr_all_rdms = 0; > + uint64_t rdm_start, rdm_size, highmem_end = (1ULL << 32); > + uint32_t type = d_config->b_info.rdm.type; > + uint16_t seg; > + uint8_t bus, devfn; > + > + /* Might not to expose rdm. */ Delete "to". > + if ((type == LIBXL_RDM_RESERVE_TYPE_NONE) && !d_config->num_pcidevs) > + return 0; > + > + /* Collect all rdm info if exist. */ > + xrdm = xc_device_get_rdm(ctx->xch, LIBXL_RDM_RESERVE_TYPE_HOST, > + 0, 0, 0, &nr_all_rdms); > + if (!nr_all_rdms) > + return 0; > + d_config->rdms = libxl__calloc(gc, nr_all_rdms, > + sizeof(libxl_device_rdm)); Note that if you use "gc" here the allocated memory will be, well, garbage collected at some point. If you don't want them to be gc'ed you should use NOGC. > + memset(d_config->rdms, 0, sizeof(libxl_device_rdm)); > + > + /* Query all RDM entries in this platform */ > + if (type == LIBXL_RDM_RESERVE_TYPE_HOST) { > + d_config->num_rdms = nr_all_rdms; > + for (i = 0; i < d_config->num_rdms; i++) { > + d_config->rdms[i].start = > + (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT; > + d_config->rdms[i].size = > + (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT; > + d_config->rdms[i].flag = d_config->b_info.rdm.reserve; > + } > + } else { > + d_config->num_rdms = 0; > + } And you should move d_config->rdms = libxl__calloc inside that `if'. That is, don't allocate memory if you don't need it. > + free(xrdm); > + > + /* Query RDM entries per-device */ > + for (i = 0; i < d_config->num_pcidevs; i++) { > + unsigned int nr_entries = 0; > + bool new = true; > + seg = d_config->pcidevs[i].domain; > + bus = d_config->pcidevs[i].bus; > + devfn = PCI_DEVFN(d_config->pcidevs[i].dev, > d_config->pcidevs[i].func); > + nr_entries = 0; You've already initialised this variable. > + xrdm = xc_device_get_rdm(ctx->xch, LIBXL_RDM_RESERVE_TYPE_NONE, > + seg, bus, devfn, &nr_entries); > + /* No RDM to associated with this device. */ > + if (!nr_entries) > + continue; > + > + /* Need to check whether this entry is already saved in the array. > + * This could come from two cases: > + * > + * - user may configure to get all RMRRs in this platform, which > + * is already queried before this point Formatting. > + * - or two assigned devices may share one RMRR entry > + * > + * different policies may be configured on the same RMRR due to above > + * two cases. We choose a simple policy to always favor stricter > policy > + */ > + for (j = 0; j < d_config->num_rdms; j++) { > + if (d_config->rdms[j].start == > + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT) > + { > + if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_FORCE) > + d_config->rdms[j].flag = > d_config->pcidevs[i].rdm_reserve; > + new = false; > + break; > + } > + } > + > + if (new) { > + if (d_config->num_rdms > nr_all_rdms - 1) { > + LIBXL__LOG(CTX, LIBXL__LOG_ERROR, "Exceed rdm array > boundary!\n"); LOG(ERROR, ...) > + free(xrdm); > + return -1; Please use goto out idiom. > + } > + > + /* > + * This is a new entry. > + */ /* This is a new entry. */ > + d_config->rdms[d_config->num_rdms].start = > + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT; > + d_config->rdms[d_config->num_rdms].size = > + (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT; > + d_config->rdms[d_config->num_rdms].flag = > d_config->pcidevs[i].rdm_reserve; > + d_config->num_rdms++; Does this work? I don't see you reallocate memory. > + } > + free(xrdm); Bug: you free xrdm several times. Please fix obvious bugs and do simple tests before posting. That would save you another round of back and forth. > + } > + > + /* Fix highmem. */ > + if (args->mem_size > args->lowmem_size) > + highmem_end += (args->mem_size - args->lowmem_size); It would be clearer if you just do highmem_end = (1ULL << 32) + (args->mem_size - args->lowmem_size); And please have a blank line in between this assignment and following comment. > + /* Next step is to check and avoid potential conflict between RDM entries > + * and guest RAM. To avoid intrusive impact to existing memory layout > + * {lowmem, mmio, highmem} which is passed around various function > blocks, > + * below conflicts are not handled which are rare and handling them would > + * lead to a more scattered layout: > + * - RMRR in highmem area (>4G) > + * - RMRR lower than a defined memory boundary (e.g. 2G) > + * Otherwise for conflicts between boundary and 4G, we'll simply move > lowmem > + * end below reserved region to solve conflict. > + * > + * If a conflict is detected on a given RMRR entry, an error will be > + * returned. > + * If 'force' policy is specified. Or conflict is treated as a warning if > + * 'try' policy is specified, and we also mark this as INVALID not to > expose > + * this entry to hvmloader. > + * > + * Firstly we should check the case of rdm < 4G because we may need to > + * expand highmem_end. Is this strategy agreed in previous discussion? How future-proof is this strategy? > + */ > + for (i = 0; i < d_config->num_rdms; i++) { > + rdm_start = d_config->rdms[i].start; > + rdm_size = d_config->rdms[i].size; > + conflict = check_rdm_hole(0, args->lowmem_size, rdm_start, rdm_size); > + > + if (!conflict) > + continue; > + > + /* > + * Just check if RDM > our memory boundary > + */ A one line comment is good enough. > + if (d_config->rdms[i].start > rdm_mem_guard) { > + /* > + * We will move downwards lowmem_end so we have to expand > + * highmem_end. > + */ > + highmem_end += (args->lowmem_size - rdm_start); > + /* Now move downwards lowmem_end. */ > + args->lowmem_size = rdm_start; > + } > + } > + > + /* > + * Finally we can take same policy to check lowmem(< 2G) and > + * highmem adjusted above. > + */ > + for (i = 0; i < d_config->num_rdms; i++) { > + rdm_start = d_config->rdms[i].start; > + rdm_size = d_config->rdms[i].size; > + /* Does this entry conflict with lowmem? */ > + conflict = check_rdm_hole(0, args->lowmem_size, > + rdm_start, rdm_size); > + /* Does this entry conflict with highmem? */ > + conflict |= check_rdm_hole((1ULL<<32), highmem_end, > + rdm_start, rdm_size); > + > + if (!conflict) > + continue; > + > + if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_FORCE) { > + LIBXL__LOG(CTX, LIBXL__LOG_ERROR, "RDM conflict at 0x%lx.\n", > + d_config->rdms[i].start); LOG(ERROR, ...) > + return -1; Use goto out style. > + } else { > + LIBXL__LOG(CTX, LIBXL__LOG_WARNING, > + "Ignoring RDM conflict at 0x%lx.\n", > + d_config->rdms[i].start); > + > + /* > + * Then mask this INVALID to indicate we shouldn't expose this > + * to hvmloader. > + */ > + d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID; > + } > + } > + > + return 0; > +} > + > const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) > { > const libxl_vnc_info *vnc = NULL; > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index a16d4a1..ee67282 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -788,12 +788,14 @@ out: > } > > int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > - libxl_domain_build_info *info, > + libxl_domain_config *d_config, > libxl__domain_build_state *state) > { > libxl_ctx *ctx = libxl__gc_owner(gc); > struct xc_hvm_build_args args = {}; > int ret, rc = ERROR_FAIL; > + libxl_domain_build_info *const info = &d_config->b_info; > + uint64_t rdm_mem_boundary, mmio_start; > > memset(&args, 0, sizeof(struct xc_hvm_build_args)); > /* The params from the configuration file are in Mb, which are then > @@ -802,6 +804,8 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > * Do all this in one step here... > */ > args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10; > + args.lowmem_size = (args.mem_size > (1ULL << 32)) ? > + (1ULL << 32) : args.mem_size; > args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) << > 10; > args.claim_enabled = libxl_defbool_val(info->claim_mode); > if (info->u.hvm.mmio_hole_memkb) { > @@ -811,6 +815,27 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START) > args.mmio_size = info->u.hvm.mmio_hole_memkb << 10; > } > + > + if (args.mmio_size == 0) > + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > + mmio_start = (1ull << 32) - args.mmio_size; > + > + if (args.lowmem_size > mmio_start) > + args.lowmem_size = mmio_start; > + > + /* > + * We'd like to set a memory boundary to determine if we need to check > + * any overlap with reserved device memory. > + * > + * TODO: we will add a config parameter for this boundary value next. > + */ Yes, please do so and properly document it. > + rdm_mem_boundary = 0x80000000; > + ret = libxl__domain_device_check_rdm(gc, d_config, rdm_mem_boundary, > &args); > + if (ret) { > + LOG(ERROR, "checking reserved device memory failed"); > + goto out; > + } > + > if (libxl__domain_firmware(gc, info, &args)) { > LOG(ERROR, "initializing domain firmware failed"); > goto out; > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index 934465a..64d05b3 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -985,7 +985,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t > domid, > _hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid, > libxl_domain_build_info *info, libxl__domain_build_state > *state); > _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > - libxl_domain_build_info *info, > + libxl_domain_config *d_config, > libxl__domain_build_state *state); > > _hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid, > @@ -1480,6 +1480,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, > int nr_channels, libxl_device_channel *channels); > > /* > + * This function will fix reserved device memory conflict > + * according to user's configuration. > + */ > +_hidden int libxl__domain_device_check_rdm(libxl__gc *gc, > + libxl_domain_config *d_config, > + uint64_t rdm_mem_guard, > + struct xc_hvm_build_args *args); > + > +/* > * This function will cause the whole libxl process to hang > * if the device model does not respond. It is deprecated. > * > diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > index 5786455..218931b 100644 > --- a/tools/libxl/libxl_types.idl > +++ b/tools/libxl/libxl_types.idl > @@ -541,6 +541,12 @@ libxl_device_pci = Struct("device_pci", [ > ("rdm_reserve", libxl_rdm_reserve_flag), > ]) > > +libxl_device_rdm = Struct("device_rdm", [ > + ("start", uint64), > + ("size", uint64), > + ("flag", bool), > + ]) > + > libxl_device_vtpm = Struct("device_vtpm", [ > ("backend_domid", libxl_domid), > ("backend_domname", string), > @@ -567,6 +573,7 @@ libxl_domain_config = Struct("domain_config", [ > ("disks", Array(libxl_device_disk, "num_disks")), > ("nics", Array(libxl_device_nic, "num_nics")), > ("pcidevs", Array(libxl_device_pci, "num_pcidevs")), > + ("rdms", Array(libxl_device_rdm, "num_rdms")), > ("vfbs", Array(libxl_device_vfb, "num_vfbs")), > ("vkbs", Array(libxl_device_vkb, "num_vkbs")), > ("vtpms", Array(libxl_device_vtpm, "num_vtpms")), > -- > 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |