|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC][v2][PATCH 04/14] tools/libxl: detect and avoid conflicts with RDM
On Fri, May 22, 2015 at 05:35:04PM +0800, Tiejun Chen wrote:
> While building a VM, HVM domain builder provides struct hvm_info_table{}
> to help hvmloader. Currently it includes two fields to construct guest
> e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
> check them to fix any conflict with RAM.
>
> RMRR can reside in address space beyond 4G theoretically, but we never
> see this in real world. So in order to avoid breaking highmem layout
> we don't solve highmem conflict. Note this means highmem rmrr could still
> be supported if no conflict.
>
> But in the case of lowmem, RMRR probably scatter the whole RAM space.
> Especially multiple RMRR entries would worsen this to lead a complicated
> memory layout. And then its hard to extend hvm_info_table{} to work
> hvmloader out. So here we're trying to figure out a simple solution to
> avoid breaking existing layout. So when a conflict occurs,
>
> #1. Above a predefined boundary (default 2G)
> - move lowmem_end below reserved region to solve conflict;
>
> #2. Below a predefined boundary (default 2G)
> - Check strict/relaxed policy.
> "strict" policy leads to fail libxl. Note when both policies
> are specified on a given region, 'strict' is always preferred.
> "relaxed" policy issue a warning message and also mask this entry
> INVALID
> to indicate we shouldn't expose this entry to hvmloader.
>
> Note this predefined boundary can be changes with the parameter
> "rdm_mem_boundary" in .cfg file.
>
> Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx>
> ---
It would be better you write down what you changed in this version after
"---" marker.
What we normally do is
libxl: implement FOO
FOO is needed because ...
Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
changes in vN:
* bar -> baz
* more comments
---
The stuff between two "---" will be automatically discarded when
committing.
> docs/man/xl.cfg.pod.5 | 21 ++++
> tools/libxc/include/xenguest.h | 1 +
> tools/libxc/xc_hvm_build_x86.c | 25 ++--
> tools/libxl/libxl_create.c | 2 +-
> tools/libxl/libxl_dm.c | 253
> +++++++++++++++++++++++++++++++++++++++++
> tools/libxl/libxl_dom.c | 27 ++++-
> tools/libxl/libxl_internal.h | 11 +-
> tools/libxl/libxl_types.idl | 8 ++
> tools/libxl/xl_cmdimpl.c | 3 +
> 9 files changed, 337 insertions(+), 14 deletions(-)
>
> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
> index 12c34c4..80e3930 100644
> --- a/docs/man/xl.cfg.pod.5
> +++ b/docs/man/xl.cfg.pod.5
> @@ -764,6 +764,27 @@ is default.
>
> Note this would override global B<rdm> option.
>
> +=item B<rdm_mem_boundary=MBYTES>
> +
> +Number of megabytes to set a boundary for checking rdm conflict.
> +
> +When RDM conflicts with RAM, RDM probably scatter the whole RAM space.
> +Especially multiple RMRR entries would worsen this to lead a complicated
> +memory layout. So here we're trying to figure out a simple solution to
> +avoid breaking existing layout. So when a conflict occurs,
> +
> + #1. Above a predefined boundary
> + - move lowmem_end below reserved region to solve conflict;
> +
> + #2. Below a predefined boundary
> + - Check strict/relaxed policy.
> + "strict" policy leads to fail libxl. Note when both policies
> + are specified on a given region, 'strict' is always preferred.
> + "relaxed" policy issue a warning message and also mask this entry
> INVALID
> + to indicate we shouldn't expose this entry to hvmloader.
> +
> +Her the default is 2G.
Typo "her".
I get the idea. I will leave grammar / syntax check to native speakers.
> +
> =back
>
> =back
> diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
> index 7581263..4cb7e9f 100644
> --- a/tools/libxc/include/xenguest.h
> +++ b/tools/libxc/include/xenguest.h
> @@ -234,6 +234,7 @@ struct xc_hvm_firmware_module {
> };
>
> struct xc_hvm_build_args {
> + uint64_t lowmem_size; /* All low memory size in bytes. */
You might find this value unnecessary with my patch to consolidate
memory layout generation in libxl?
> uint64_t mem_size; /* Memory size in bytes. */
> uint64_t mem_target; /* Memory target in bytes. */
> uint64_t mmio_size; /* Size of the MMIO hole in bytes. */
> diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c
> index e45ae4a..9a1567a 100644
> --- a/tools/libxc/xc_hvm_build_x86.c
> +++ b/tools/libxc/xc_hvm_build_x86.c
> @@ -21,6 +21,7 @@
> #include <stdlib.h>
> #include <unistd.h>
> #include <zlib.h>
> +#include <assert.h>
>
> #include "xg_private.h"
> #include "xc_private.h"
> @@ -98,11 +99,8 @@ static void build_hvm_info(void *hvm_info_page, uint64_t
> mem_size,
> uint8_t sum;
> int i;
>
> - if ( lowmem_end > mmio_start )
> - {
> - highmem_end = (1ull<<32) + (lowmem_end - mmio_start);
> - lowmem_end = mmio_start;
> - }
> + if ( args->mem_size > lowmem_end )
> + highmem_end = (1ull<<32) + (args->mem_size - lowmem_end);
>
> memset(hvm_info_page, 0, PAGE_SIZE);
>
> @@ -279,7 +277,7 @@ static int setup_guest(xc_interface *xch,
>
> elf_parse_binary(&elf);
> v_start = 0;
> - v_end = args->mem_size;
> + v_end = args->lowmem_size;
>
> if ( nr_pages > target_pages )
> memflags |= XENMEMF_populate_on_demand;
> @@ -344,8 +342,14 @@ static int setup_guest(xc_interface *xch,
>
> for ( i = 0; i < nr_pages; i++ )
> page_array[i] = i;
> - for ( i = mmio_start >> PAGE_SHIFT; i < nr_pages; i++ )
> - page_array[i] += mmio_size >> PAGE_SHIFT;
> + /*
> + * Actually v_end is args->lowmem_size, and we already adjusted
> + * this below mmio_start when we check rdm previously, so here
> + * this condition 'v_end <= mmio_start' is always true.
> + */
> + assert(v_end <= mmio_start);
> + for ( i = v_end >> PAGE_SHIFT; i < nr_pages; i++ )
> + page_array[i] += ((1ull << 32) - v_end) >> PAGE_SHIFT;
>
> /*
> * Try to claim pages for early warning of insufficient memory available.
> @@ -664,9 +668,6 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid,
> if ( args.mem_target == 0 )
> args.mem_target = args.mem_size;
>
> - if ( args.mmio_size == 0 )
> - args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
> -
> /* An HVM guest must be initialised with at least 2MB memory. */
> if ( args.mem_size < (2ull << 20) || args.mem_target < (2ull << 20) )
> return -1;
> @@ -713,6 +714,8 @@ int xc_hvm_build_target_mem(xc_interface *xch,
> args.mem_size = (uint64_t)memsize << 20;
> args.mem_target = (uint64_t)target << 20;
> args.image_file_name = image_name;
> + if ( args.mmio_size == 0 )
> + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
>
The above hunk can be simplified with my patch mentioned above.
> return xc_hvm_build(xch, domid, &args);
> }
> diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
> index d649ead..a782860 100644
> --- a/tools/libxl/libxl_create.c
> +++ b/tools/libxl/libxl_create.c
> @@ -451,7 +451,7 @@ int libxl__domain_build(libxl__gc *gc,
>
> switch (info->type) {
> case LIBXL_DOMAIN_TYPE_HVM:
> - ret = libxl__build_hvm(gc, domid, info, state);
> + ret = libxl__build_hvm(gc, domid, d_config, state);
> if (ret)
> goto out;
>
> diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
> index 0c6408d..85e5317 100644
> --- a/tools/libxl/libxl_dm.c
> +++ b/tools/libxl/libxl_dm.c
> @@ -90,6 +90,259 @@ const char *libxl__domain_device_model(libxl__gc *gc,
> return dm;
> }
>
> +static struct xen_reserved_device_memory
> +*xc_device_get_rdm(libxl__gc *gc,
> + uint32_t flag,
> + uint16_t seg,
> + uint8_t bus,
> + uint8_t devfn,
> + unsigned int *nr_entries)
> +{
> + struct xen_reserved_device_memory *xrdm = NULL;
> + int rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
> + xrdm, nr_entries);
> +
Please separate declaration and function call. Also change xrdm to NULL
in that function call.
> + assert( rc <= 0 );
> + /* "0" means we have no any rdm entry. */
> + if ( !rc )
> + goto out;
Also set *nr_entries = 0; otherwise you can't distinguish error vs 0
entries.
> +
> + if ( errno == ENOBUFS )
> + {
> + if ( (xrdm = malloc(*nr_entries *
> + sizeof(xen_reserved_device_memory_t))) == NULL )
Move xrdm = malloc out of "if".
> + {
> + LOG(ERROR, "Could not allocate RDM buffer!\n");
> + goto out;
> + }
> + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
> + xrdm, nr_entries);
> + if ( rc )
> + {
> + LOG(ERROR, "Could not get reserved device memory maps.\n");
> + *nr_entries = 0;
> + free(xrdm);
> + xrdm = NULL;
> + }
> + }
> + else
> + LOG(ERROR, "Could not get reserved device memory maps.\n");
> +
> + out:
> + return xrdm;
> +}
> +
> +/*
> + * Check whether there exists rdm hole in the specified memory range.
> + * Returns true if exists, else returns false.
> + */
> +static bool overlaps_rdm(uint64_t start, uint64_t memsize,
> + uint64_t rdm_start, uint64_t rdm_size)
> +{
> + return (start + memsize > rdm_start) && (start < rdm_start + rdm_size);
> +}
> +
> +/*
> + * Check reported RDM regions and handle potential gfn conflicts according
> + * to user preferred policy.
> + *
> + * RMRR can reside in address space beyond 4G theoretically, but we never
> + * see this in real world. So in order to avoid breaking highmem layout
> + * we don't solve highmem conflict. Note this means highmem rmrr could still
> + * be supported if no conflict.
> + *
> + * But in the case of lowmem, RMRR probably scatter the whole RAM space.
> + * Especially multiple RMRR entries would worsen this to lead a complicated
> + * memory layout. And then its hard to extend hvm_info_table{} to work
> + * hvmloader out. So here we're trying to figure out a simple solution to
> + * avoid breaking existing layout. So when a conflict occurs,
> + *
> + * #1. Above a predefined boundary (default 2G)
> + * - Move lowmem_end below reserved region to solve conflict;
> + *
> + * #2. Below a predefined boundary (default 2G)
> + * - Check strict/relaxed policy.
> + * "strict" policy leads to fail libxl. Note when both policies
> + * are specified on a given region, 'strict' is always preferred.
> + * "relaxed" policy issue a warning message and also mask this entry
> + * INVALID to indicate we shouldn't expose this entry to hvmloader.
> + */
> +int libxl__domain_device_construct_rdm(libxl__gc *gc,
> + libxl_domain_config *d_config,
> + uint64_t rdm_mem_boundary,
> + struct xc_hvm_build_args *args)
> +{
> + int i, j, conflict;
> + struct xen_reserved_device_memory *xrdm = NULL;
> + uint64_t rdm_start, rdm_size, highmem_end = (1ULL << 32);
> + uint32_t type = d_config->b_info.rdm.type;
> + uint16_t seg;
> + uint8_t bus, devfn;
> +
> + /* Fix highmem. */
> + highmem_end += (args->mem_size - args->lowmem_size);
> +
> + /* Might not expose rdm. */
> + if (type == LIBXL_RDM_RESERVE_TYPE_NONE)
> + return 0;
> +
> + /* Query all RDM entries in this platform */
> + if (type == LIBXL_RDM_RESERVE_TYPE_HOST) {
> + unsigned int nr_entries;
> +
> + /* Collect all rdm info if exist. */
> + xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL,
> + 0, 0, 0, &nr_entries);
> + if (!nr_entries)
> + return 0;
> +
> + d_config->num_rdms = nr_entries;
> + d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
> + d_config->num_rdms *
> sizeof(libxl_device_rdm));
> +
> + for (i = 0; i < d_config->num_rdms; i++) {
> + d_config->rdms[i].start =
> + (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT;
> + d_config->rdms[i].size =
> + (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT;
> + d_config->rdms[i].flag = d_config->b_info.rdm.reserve;
> + }
> +
> + free(xrdm);
> + } else
> + d_config->num_rdms = 0;
> +
> + /* Query RDM entries per-device */
> + for (i = 0; i < d_config->num_pcidevs; i++) {
> + unsigned int nr_entries;
> +
Stray blank line.
> + bool new = true;
Need blank line here.
> + seg = d_config->pcidevs[i].domain;
> + bus = d_config->pcidevs[i].bus;
> + devfn = PCI_DEVFN(d_config->pcidevs[i].dev,
> d_config->pcidevs[i].func);
> + nr_entries = 0;
> + xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL,
> + seg, bus, devfn, &nr_entries);
You didn't check xrdm != NULL;
> + /* No RDM to associated with this device. */
> + if (!nr_entries)
> + continue;
> +
> + /*
> + * Need to check whether this entry is already saved in the array.
> + * This could come from two cases:
> + *
> + * - user may configure to get all RMRRs in this platform, which
> + * is already queried before this point
> + * - or two assigned devices may share one RMRR entry
> + *
> + * different policies may be configured on the same RMRR due to above
> + * two cases. We choose a simple policy to always favor stricter
> policy
> + */
> + for (j = 0; j < d_config->num_rdms; j++) {
> + if (d_config->rdms[j].start ==
> + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT)
> + {
> + if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT)
> + d_config->rdms[j].flag =
> d_config->pcidevs[i].rdm_reserve;
> + new = false;
> + break;
> + }
> + }
> +
> + if (new) {
> + d_config->num_rdms++;
> + d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
> + d_config->num_rdms *
> sizeof(libxl_device_rdm));
> +
> + /* This is a new entry. */
Delete this comment.
> + d_config->rdms[d_config->num_rdms].start =
> + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT;
> + d_config->rdms[d_config->num_rdms].size =
> + (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT;
> + d_config->rdms[d_config->num_rdms].flag =
> d_config->pcidevs[i].rdm_reserve;
Line too long.
> + }
> + free(xrdm);
> + }
> +
> + /*
> + * Next step is to check and avoid potential conflict between RDM entries
> + * and guest RAM. To avoid intrusive impact to existing memory layout
> + * {lowmem, mmio, highmem} which is passed around various function
> blocks,
> + * below conflicts are not handled which are rare and handling them would
> + * lead to a more scattered layout:
> + * - RMRR in highmem area (>4G)
> + * - RMRR lower than a defined memory boundary (e.g. 2G)
> + * Otherwise for conflicts between boundary and 4G, we'll simply move
> lowmem
> + * end below reserved region to solve conflict.
> + *
> + * If a conflict is detected on a given RMRR entry, an error will be
> + * returned if 'strict' policy is specified. Instead, if 'relaxed' policy
> + * specified, this conflict is treated just as a warning, but we mark
> this
> + * RMRR entry as INVALID to indicate that this entry shouldn't be exposed
> + * to hvmloader.
> + *
> + * Firstly we should check the case of rdm < 4G because we may need to
> + * expand highmem_end.
> + */
> + for (i = 0; i < d_config->num_rdms; i++) {
> + rdm_start = d_config->rdms[i].start;
> + rdm_size = d_config->rdms[i].size;
> + conflict = overlaps_rdm(0, args->lowmem_size, rdm_start, rdm_size);
> +
> + if (!conflict)
> + continue;
> +
> + /* Just check if RDM > our memory boundary. */
> + if (rdm_start > rdm_mem_boundary) {
> + /*
> + * We will move downwards lowmem_end so we have to expand
> + * highmem_end.
> + */
> + highmem_end += (args->lowmem_size - rdm_start);
> + /* Now move downwards lowmem_end. */
> + args->lowmem_size = rdm_start;
> + }
> + }
> +
> + /*
> + * Finally we can take same policy to check lowmem(< 2G) and
> + * highmem adjusted above.
> + */
> + for (i = 0; i < d_config->num_rdms; i++) {
> + rdm_start = d_config->rdms[i].start;
> + rdm_size = d_config->rdms[i].size;
> + /* Does this entry conflict with lowmem? */
> + conflict = overlaps_rdm(0, args->lowmem_size,
> + rdm_start, rdm_size);
> + /* Does this entry conflict with highmem? */
> + conflict |= overlaps_rdm((1ULL<<32),
> + highmem_end - (1ULL<<32),
> + rdm_start, rdm_size);
> +
> + if (!conflict)
> + continue;
> +
> + if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) {
> + LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start);
> + goto out;
> + } else {
> + LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n",
> + d_config->rdms[i].start);
> +
> + /*
> + * Then mask this INVALID to indicate we shouldn't expose this
> + * to hvmloader.
> + */
> + d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID;
> + }
> + }
> +
> + return 0;
> +
> + out:
> + return -1;
Please return libxl error code.
> +}
> +
> const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
> {
> const libxl_vnc_info *vnc = NULL;
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> index a0c9850..84d5465 100644
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -914,12 +914,14 @@ out:
> }
>
> int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
> - libxl_domain_build_info *info,
> + libxl_domain_config *d_config,
> libxl__domain_build_state *state)
> {
> libxl_ctx *ctx = libxl__gc_owner(gc);
> struct xc_hvm_build_args args = {};
> int ret, rc = ERROR_FAIL;
> + libxl_domain_build_info *const info = &d_config->b_info;
> + uint64_t rdm_mem_boundary, mmio_start;
>
> memset(&args, 0, sizeof(struct xc_hvm_build_args));
> /* The params from the configuration file are in Mb, which are then
> @@ -928,6 +930,7 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
> * Do all this in one step here...
> */
> args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
> + args.lowmem_size = min((uint64_t)(1ULL << 32), args.mem_size);
> args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) <<
> 10;
> args.claim_enabled = libxl_defbool_val(info->claim_mode);
> if (info->u.hvm.mmio_hole_memkb) {
> @@ -937,6 +940,28 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
> if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
> args.mmio_size = info->u.hvm.mmio_hole_memkb << 10;
> }
> +
> + if (args.mmio_size == 0)
> + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
> + mmio_start = (1ull << 32) - args.mmio_size;
> +
> + if (args.lowmem_size > mmio_start)
> + args.lowmem_size = mmio_start;
> +
> + /*
> + * We'd like to set a memory boundary to determine if we need to check
> + * any overlap with reserved device memory.
> + */
> + rdm_mem_boundary = 0x80000000;
> + if (info->rdm_mem_boundary_memkb)
I think you mean info->rdm_mem_boundary_memkb != LIBXL_MEMKB_DEFAULT?
Wei.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |