x86/PoD: prevent guest from being destroyed upon early access to its memory When an external agent (e.g. a monitoring daemon) happens to access the memory of a PoD guest prior to setting the PoD target, that access must fail for there not being any page in the PoD cache, and only the space above the low 2Mb gets scanned for victim pages (while only the low 2Mb got real pages populated so far). To accomodate for this - set the PoD target first - do all physmap population in PoD mode (i.e. not just large [2Mb or 1Gb] pages) - slightly lift the restrictions enforced by p2m_pod_set_mem_target() to accomodate for the changed tools behavior Tested-by: Jürgen Groß (in a 4.0.x based incarnation) Signed-off-by: Jan Beulich --- a/tools/libxc/xc_hvm_build_x86.c +++ b/tools/libxc/xc_hvm_build_x86.c @@ -160,7 +160,7 @@ static int setup_guest(xc_interface *xch int pod_mode = 0; if ( nr_pages > target_pages ) - pod_mode = 1; + pod_mode = XENMEMF_populate_on_demand; memset(&elf, 0, sizeof(elf)); if ( elf_init(&elf, image, image_size) != 0 ) @@ -197,6 +197,22 @@ static int setup_guest(xc_interface *xch for ( i = mmio_start >> PAGE_SHIFT; i < nr_pages; i++ ) page_array[i] += mmio_size >> PAGE_SHIFT; + if ( pod_mode ) + { + /* + * Subtract 0x20 from target_pages for the VGA "hole". Xen will + * adjust the PoD cache size so that domain tot_pages will be + * target_pages - 0x20 after this call. + */ + rc = xc_domain_set_pod_target(xch, dom, target_pages - 0x20, + NULL, NULL, NULL); + if ( rc != 0 ) + { + PERROR("Could not set PoD target for HVM guest.\n"); + goto error_out; + } + } + /* * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. * @@ -208,7 +224,7 @@ static int setup_guest(xc_interface *xch * ensure that we can be preempted and hence dom0 remains responsive. */ rc = xc_domain_populate_physmap_exact( - xch, dom, 0xa0, 0, 0, &page_array[0x00]); + xch, dom, 0xa0, 0, pod_mode, &page_array[0x00]); cur_pages = 0xc0; stat_normal_pages = 0xc0; while ( (rc == 0) && (nr_pages > cur_pages) ) @@ -247,8 +263,7 @@ static int setup_guest(xc_interface *xch sp_extents[i] = page_array[cur_pages+(i< 0 ) { @@ -285,8 +300,7 @@ static int setup_guest(xc_interface *xch sp_extents[i] = page_array[cur_pages+(i< 0 ) { @@ -302,19 +316,12 @@ static int setup_guest(xc_interface *xch if ( count != 0 ) { rc = xc_domain_populate_physmap_exact( - xch, dom, count, 0, 0, &page_array[cur_pages]); + xch, dom, count, 0, pod_mode, &page_array[cur_pages]); cur_pages += count; stat_normal_pages += count; } } - /* Subtract 0x20 from target_pages for the VGA "hole". Xen will - * adjust the PoD cache size so that domain tot_pages will be - * target_pages - 0x20 after this call. */ - if ( pod_mode ) - rc = xc_domain_set_pod_target(xch, dom, target_pages - 0x20, - NULL, NULL, NULL); - if ( rc != 0 ) { PERROR("Could not allocate memory for HVM guest."); --- a/xen/arch/x86/mm/p2m-pod.c +++ b/xen/arch/x86/mm/p2m-pod.c @@ -344,8 +344,9 @@ p2m_pod_set_mem_target(struct domain *d, pod_lock(p2m); - /* P == B: Nothing to do. */ - if ( p2m->pod.entry_count == 0 ) + /* P == B: Nothing to do (unless the guest is being created). */ + populated = d->tot_pages - p2m->pod.count; + if ( populated > 0 && p2m->pod.entry_count == 0 ) goto out; /* Don't do anything if the domain is being torn down */ @@ -357,13 +358,11 @@ p2m_pod_set_mem_target(struct domain *d, if ( target < d->tot_pages ) goto out; - populated = d->tot_pages - p2m->pod.count; - pod_target = target - populated; /* B < T': Set the cache size equal to # of outstanding entries, * let the balloon driver fill in the rest. */ - if ( pod_target > p2m->pod.entry_count ) + if ( populated > 0 && pod_target > p2m->pod.entry_count ) pod_target = p2m->pod.entry_count; ASSERT( pod_target >= p2m->pod.count );