[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC][v2][PATCH 04/14] tools/libxl: detect and avoid conflicts with RDM
On Fri, May 22, 2015 at 05:35:04PM +0800, Tiejun Chen wrote: > While building a VM, HVM domain builder provides struct hvm_info_table{} > to help hvmloader. Currently it includes two fields to construct guest > e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should > check them to fix any conflict with RAM. > > RMRR can reside in address space beyond 4G theoretically, but we never > see this in real world. So in order to avoid breaking highmem layout > we don't solve highmem conflict. Note this means highmem rmrr could still > be supported if no conflict. > > But in the case of lowmem, RMRR probably scatter the whole RAM space. > Especially multiple RMRR entries would worsen this to lead a complicated > memory layout. And then its hard to extend hvm_info_table{} to work > hvmloader out. So here we're trying to figure out a simple solution to > avoid breaking existing layout. So when a conflict occurs, > > #1. Above a predefined boundary (default 2G) > - move lowmem_end below reserved region to solve conflict; > > #2. Below a predefined boundary (default 2G) > - Check strict/relaxed policy. > "strict" policy leads to fail libxl. Note when both policies > are specified on a given region, 'strict' is always preferred. > "relaxed" policy issue a warning message and also mask this entry > INVALID > to indicate we shouldn't expose this entry to hvmloader. > > Note this predefined boundary can be changes with the parameter > "rdm_mem_boundary" in .cfg file. > > Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx> > --- It would be better you write down what you changed in this version after "---" marker. What we normally do is libxl: implement FOO FOO is needed because ... Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- changes in vN: * bar -> baz * more comments --- The stuff between two "---" will be automatically discarded when committing. > docs/man/xl.cfg.pod.5 | 21 ++++ > tools/libxc/include/xenguest.h | 1 + > tools/libxc/xc_hvm_build_x86.c | 25 ++-- > tools/libxl/libxl_create.c | 2 +- > tools/libxl/libxl_dm.c | 253 > +++++++++++++++++++++++++++++++++++++++++ > tools/libxl/libxl_dom.c | 27 ++++- > tools/libxl/libxl_internal.h | 11 +- > tools/libxl/libxl_types.idl | 8 ++ > tools/libxl/xl_cmdimpl.c | 3 + > 9 files changed, 337 insertions(+), 14 deletions(-) > > diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 > index 12c34c4..80e3930 100644 > --- a/docs/man/xl.cfg.pod.5 > +++ b/docs/man/xl.cfg.pod.5 > @@ -764,6 +764,27 @@ is default. > > Note this would override global B<rdm> option. > > +=item B<rdm_mem_boundary=MBYTES> > + > +Number of megabytes to set a boundary for checking rdm conflict. > + > +When RDM conflicts with RAM, RDM probably scatter the whole RAM space. > +Especially multiple RMRR entries would worsen this to lead a complicated > +memory layout. So here we're trying to figure out a simple solution to > +avoid breaking existing layout. So when a conflict occurs, > + > + #1. Above a predefined boundary > + - move lowmem_end below reserved region to solve conflict; > + > + #2. Below a predefined boundary > + - Check strict/relaxed policy. > + "strict" policy leads to fail libxl. Note when both policies > + are specified on a given region, 'strict' is always preferred. > + "relaxed" policy issue a warning message and also mask this entry > INVALID > + to indicate we shouldn't expose this entry to hvmloader. > + > +Her the default is 2G. Typo "her". I get the idea. I will leave grammar / syntax check to native speakers. > + > =back > > =back > diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h > index 7581263..4cb7e9f 100644 > --- a/tools/libxc/include/xenguest.h > +++ b/tools/libxc/include/xenguest.h > @@ -234,6 +234,7 @@ struct xc_hvm_firmware_module { > }; > > struct xc_hvm_build_args { > + uint64_t lowmem_size; /* All low memory size in bytes. */ You might find this value unnecessary with my patch to consolidate memory layout generation in libxl? > uint64_t mem_size; /* Memory size in bytes. */ > uint64_t mem_target; /* Memory target in bytes. */ > uint64_t mmio_size; /* Size of the MMIO hole in bytes. */ > diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c > index e45ae4a..9a1567a 100644 > --- a/tools/libxc/xc_hvm_build_x86.c > +++ b/tools/libxc/xc_hvm_build_x86.c > @@ -21,6 +21,7 @@ > #include <stdlib.h> > #include <unistd.h> > #include <zlib.h> > +#include <assert.h> > > #include "xg_private.h" > #include "xc_private.h" > @@ -98,11 +99,8 @@ static void build_hvm_info(void *hvm_info_page, uint64_t > mem_size, > uint8_t sum; > int i; > > - if ( lowmem_end > mmio_start ) > - { > - highmem_end = (1ull<<32) + (lowmem_end - mmio_start); > - lowmem_end = mmio_start; > - } > + if ( args->mem_size > lowmem_end ) > + highmem_end = (1ull<<32) + (args->mem_size - lowmem_end); > > memset(hvm_info_page, 0, PAGE_SIZE); > > @@ -279,7 +277,7 @@ static int setup_guest(xc_interface *xch, > > elf_parse_binary(&elf); > v_start = 0; > - v_end = args->mem_size; > + v_end = args->lowmem_size; > > if ( nr_pages > target_pages ) > memflags |= XENMEMF_populate_on_demand; > @@ -344,8 +342,14 @@ static int setup_guest(xc_interface *xch, > > for ( i = 0; i < nr_pages; i++ ) > page_array[i] = i; > - for ( i = mmio_start >> PAGE_SHIFT; i < nr_pages; i++ ) > - page_array[i] += mmio_size >> PAGE_SHIFT; > + /* > + * Actually v_end is args->lowmem_size, and we already adjusted > + * this below mmio_start when we check rdm previously, so here > + * this condition 'v_end <= mmio_start' is always true. > + */ > + assert(v_end <= mmio_start); > + for ( i = v_end >> PAGE_SHIFT; i < nr_pages; i++ ) > + page_array[i] += ((1ull << 32) - v_end) >> PAGE_SHIFT; > > /* > * Try to claim pages for early warning of insufficient memory available. > @@ -664,9 +668,6 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, > if ( args.mem_target == 0 ) > args.mem_target = args.mem_size; > > - if ( args.mmio_size == 0 ) > - args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > - > /* An HVM guest must be initialised with at least 2MB memory. */ > if ( args.mem_size < (2ull << 20) || args.mem_target < (2ull << 20) ) > return -1; > @@ -713,6 +714,8 @@ int xc_hvm_build_target_mem(xc_interface *xch, > args.mem_size = (uint64_t)memsize << 20; > args.mem_target = (uint64_t)target << 20; > args.image_file_name = image_name; > + if ( args.mmio_size == 0 ) > + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > The above hunk can be simplified with my patch mentioned above. > return xc_hvm_build(xch, domid, &args); > } > diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c > index d649ead..a782860 100644 > --- a/tools/libxl/libxl_create.c > +++ b/tools/libxl/libxl_create.c > @@ -451,7 +451,7 @@ int libxl__domain_build(libxl__gc *gc, > > switch (info->type) { > case LIBXL_DOMAIN_TYPE_HVM: > - ret = libxl__build_hvm(gc, domid, info, state); > + ret = libxl__build_hvm(gc, domid, d_config, state); > if (ret) > goto out; > > diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c > index 0c6408d..85e5317 100644 > --- a/tools/libxl/libxl_dm.c > +++ b/tools/libxl/libxl_dm.c > @@ -90,6 +90,259 @@ const char *libxl__domain_device_model(libxl__gc *gc, > return dm; > } > > +static struct xen_reserved_device_memory > +*xc_device_get_rdm(libxl__gc *gc, > + uint32_t flag, > + uint16_t seg, > + uint8_t bus, > + uint8_t devfn, > + unsigned int *nr_entries) > +{ > + struct xen_reserved_device_memory *xrdm = NULL; > + int rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, > + xrdm, nr_entries); > + Please separate declaration and function call. Also change xrdm to NULL in that function call. > + assert( rc <= 0 ); > + /* "0" means we have no any rdm entry. */ > + if ( !rc ) > + goto out; Also set *nr_entries = 0; otherwise you can't distinguish error vs 0 entries. > + > + if ( errno == ENOBUFS ) > + { > + if ( (xrdm = malloc(*nr_entries * > + sizeof(xen_reserved_device_memory_t))) == NULL ) Move xrdm = malloc out of "if". > + { > + LOG(ERROR, "Could not allocate RDM buffer!\n"); > + goto out; > + } > + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, > + xrdm, nr_entries); > + if ( rc ) > + { > + LOG(ERROR, "Could not get reserved device memory maps.\n"); > + *nr_entries = 0; > + free(xrdm); > + xrdm = NULL; > + } > + } > + else > + LOG(ERROR, "Could not get reserved device memory maps.\n"); > + > + out: > + return xrdm; > +} > + > +/* > + * Check whether there exists rdm hole in the specified memory range. > + * Returns true if exists, else returns false. > + */ > +static bool overlaps_rdm(uint64_t start, uint64_t memsize, > + uint64_t rdm_start, uint64_t rdm_size) > +{ > + return (start + memsize > rdm_start) && (start < rdm_start + rdm_size); > +} > + > +/* > + * Check reported RDM regions and handle potential gfn conflicts according > + * to user preferred policy. > + * > + * RMRR can reside in address space beyond 4G theoretically, but we never > + * see this in real world. So in order to avoid breaking highmem layout > + * we don't solve highmem conflict. Note this means highmem rmrr could still > + * be supported if no conflict. > + * > + * But in the case of lowmem, RMRR probably scatter the whole RAM space. > + * Especially multiple RMRR entries would worsen this to lead a complicated > + * memory layout. And then its hard to extend hvm_info_table{} to work > + * hvmloader out. So here we're trying to figure out a simple solution to > + * avoid breaking existing layout. So when a conflict occurs, > + * > + * #1. Above a predefined boundary (default 2G) > + * - Move lowmem_end below reserved region to solve conflict; > + * > + * #2. Below a predefined boundary (default 2G) > + * - Check strict/relaxed policy. > + * "strict" policy leads to fail libxl. Note when both policies > + * are specified on a given region, 'strict' is always preferred. > + * "relaxed" policy issue a warning message and also mask this entry > + * INVALID to indicate we shouldn't expose this entry to hvmloader. > + */ > +int libxl__domain_device_construct_rdm(libxl__gc *gc, > + libxl_domain_config *d_config, > + uint64_t rdm_mem_boundary, > + struct xc_hvm_build_args *args) > +{ > + int i, j, conflict; > + struct xen_reserved_device_memory *xrdm = NULL; > + uint64_t rdm_start, rdm_size, highmem_end = (1ULL << 32); > + uint32_t type = d_config->b_info.rdm.type; > + uint16_t seg; > + uint8_t bus, devfn; > + > + /* Fix highmem. */ > + highmem_end += (args->mem_size - args->lowmem_size); > + > + /* Might not expose rdm. */ > + if (type == LIBXL_RDM_RESERVE_TYPE_NONE) > + return 0; > + > + /* Query all RDM entries in this platform */ > + if (type == LIBXL_RDM_RESERVE_TYPE_HOST) { > + unsigned int nr_entries; > + > + /* Collect all rdm info if exist. */ > + xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL, > + 0, 0, 0, &nr_entries); > + if (!nr_entries) > + return 0; > + > + d_config->num_rdms = nr_entries; > + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, > + d_config->num_rdms * > sizeof(libxl_device_rdm)); > + > + for (i = 0; i < d_config->num_rdms; i++) { > + d_config->rdms[i].start = > + (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT; > + d_config->rdms[i].size = > + (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT; > + d_config->rdms[i].flag = d_config->b_info.rdm.reserve; > + } > + > + free(xrdm); > + } else > + d_config->num_rdms = 0; > + > + /* Query RDM entries per-device */ > + for (i = 0; i < d_config->num_pcidevs; i++) { > + unsigned int nr_entries; > + Stray blank line. > + bool new = true; Need blank line here. > + seg = d_config->pcidevs[i].domain; > + bus = d_config->pcidevs[i].bus; > + devfn = PCI_DEVFN(d_config->pcidevs[i].dev, > d_config->pcidevs[i].func); > + nr_entries = 0; > + xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL, > + seg, bus, devfn, &nr_entries); You didn't check xrdm != NULL; > + /* No RDM to associated with this device. */ > + if (!nr_entries) > + continue; > + > + /* > + * Need to check whether this entry is already saved in the array. > + * This could come from two cases: > + * > + * - user may configure to get all RMRRs in this platform, which > + * is already queried before this point > + * - or two assigned devices may share one RMRR entry > + * > + * different policies may be configured on the same RMRR due to above > + * two cases. We choose a simple policy to always favor stricter > policy > + */ > + for (j = 0; j < d_config->num_rdms; j++) { > + if (d_config->rdms[j].start == > + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT) > + { > + if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT) > + d_config->rdms[j].flag = > d_config->pcidevs[i].rdm_reserve; > + new = false; > + break; > + } > + } > + > + if (new) { > + d_config->num_rdms++; > + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, > + d_config->num_rdms * > sizeof(libxl_device_rdm)); > + > + /* This is a new entry. */ Delete this comment. > + d_config->rdms[d_config->num_rdms].start = > + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT; > + d_config->rdms[d_config->num_rdms].size = > + (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT; > + d_config->rdms[d_config->num_rdms].flag = > d_config->pcidevs[i].rdm_reserve; Line too long. > + } > + free(xrdm); > + } > + > + /* > + * Next step is to check and avoid potential conflict between RDM entries > + * and guest RAM. To avoid intrusive impact to existing memory layout > + * {lowmem, mmio, highmem} which is passed around various function > blocks, > + * below conflicts are not handled which are rare and handling them would > + * lead to a more scattered layout: > + * - RMRR in highmem area (>4G) > + * - RMRR lower than a defined memory boundary (e.g. 2G) > + * Otherwise for conflicts between boundary and 4G, we'll simply move > lowmem > + * end below reserved region to solve conflict. > + * > + * If a conflict is detected on a given RMRR entry, an error will be > + * returned if 'strict' policy is specified. Instead, if 'relaxed' policy > + * specified, this conflict is treated just as a warning, but we mark > this > + * RMRR entry as INVALID to indicate that this entry shouldn't be exposed > + * to hvmloader. > + * > + * Firstly we should check the case of rdm < 4G because we may need to > + * expand highmem_end. > + */ > + for (i = 0; i < d_config->num_rdms; i++) { > + rdm_start = d_config->rdms[i].start; > + rdm_size = d_config->rdms[i].size; > + conflict = overlaps_rdm(0, args->lowmem_size, rdm_start, rdm_size); > + > + if (!conflict) > + continue; > + > + /* Just check if RDM > our memory boundary. */ > + if (rdm_start > rdm_mem_boundary) { > + /* > + * We will move downwards lowmem_end so we have to expand > + * highmem_end. > + */ > + highmem_end += (args->lowmem_size - rdm_start); > + /* Now move downwards lowmem_end. */ > + args->lowmem_size = rdm_start; > + } > + } > + > + /* > + * Finally we can take same policy to check lowmem(< 2G) and > + * highmem adjusted above. > + */ > + for (i = 0; i < d_config->num_rdms; i++) { > + rdm_start = d_config->rdms[i].start; > + rdm_size = d_config->rdms[i].size; > + /* Does this entry conflict with lowmem? */ > + conflict = overlaps_rdm(0, args->lowmem_size, > + rdm_start, rdm_size); > + /* Does this entry conflict with highmem? */ > + conflict |= overlaps_rdm((1ULL<<32), > + highmem_end - (1ULL<<32), > + rdm_start, rdm_size); > + > + if (!conflict) > + continue; > + > + if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) { > + LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start); > + goto out; > + } else { > + LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n", > + d_config->rdms[i].start); > + > + /* > + * Then mask this INVALID to indicate we shouldn't expose this > + * to hvmloader. > + */ > + d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID; > + } > + } > + > + return 0; > + > + out: > + return -1; Please return libxl error code. > +} > + > const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) > { > const libxl_vnc_info *vnc = NULL; > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index a0c9850..84d5465 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -914,12 +914,14 @@ out: > } > > int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > - libxl_domain_build_info *info, > + libxl_domain_config *d_config, > libxl__domain_build_state *state) > { > libxl_ctx *ctx = libxl__gc_owner(gc); > struct xc_hvm_build_args args = {}; > int ret, rc = ERROR_FAIL; > + libxl_domain_build_info *const info = &d_config->b_info; > + uint64_t rdm_mem_boundary, mmio_start; > > memset(&args, 0, sizeof(struct xc_hvm_build_args)); > /* The params from the configuration file are in Mb, which are then > @@ -928,6 +930,7 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > * Do all this in one step here... > */ > args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10; > + args.lowmem_size = min((uint64_t)(1ULL << 32), args.mem_size); > args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) << > 10; > args.claim_enabled = libxl_defbool_val(info->claim_mode); > if (info->u.hvm.mmio_hole_memkb) { > @@ -937,6 +940,28 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START) > args.mmio_size = info->u.hvm.mmio_hole_memkb << 10; > } > + > + if (args.mmio_size == 0) > + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; > + mmio_start = (1ull << 32) - args.mmio_size; > + > + if (args.lowmem_size > mmio_start) > + args.lowmem_size = mmio_start; > + > + /* > + * We'd like to set a memory boundary to determine if we need to check > + * any overlap with reserved device memory. > + */ > + rdm_mem_boundary = 0x80000000; > + if (info->rdm_mem_boundary_memkb) I think you mean info->rdm_mem_boundary_memkb != LIBXL_MEMKB_DEFAULT? Wei. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |