|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
This is a bit messy still. On 64-bit Xen we could just use
virt_to_page() on the map pointers rather than storing their
page_info pointers separately.
On 32-bit, maybe we could use the existing linear mappings to look up
the MFN instead. Or would it be easier to add a function to eth
map_domain_page() family that does va->mfn for mapped frames?
Keir, any opinion?
Tim.
At 08:41 -0500 on 24 Nov (1322124091), Andres Lagar-Cavilla wrote:
> xen/arch/x86/hvm/hvm.c | 69
> ++++++++++++++++++++++++-------------
> xen/arch/x86/hvm/nestedhvm.c | 4 +-
> xen/arch/x86/hvm/svm/nestedsvm.c | 23 ++++++------
> xen/arch/x86/hvm/vmx/vvmx.c | 36 +++++++++++--------
> xen/include/asm-x86/hvm/hvm.h | 9 +++-
> xen/include/asm-x86/hvm/vcpu.h | 1 +
> xen/include/asm-x86/hvm/vmx/vvmx.h | 1 +
> 7 files changed, 88 insertions(+), 55 deletions(-)
>
>
> The nested hvm code maps pages of the guest hvm. These maps live beyond
> a hypervisor entry/exit pair, and thus their liveness cannot be ensured
> with get_gfn/put_gfn critical sections. Ensure their liveness by
> increasing the page ref count, instead.
>
> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
> return 0;
> }
>
> -/* We leave this function holding a lock on the p2m entry */
> -static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
> +/* On non-NULL return, we leave this function holding an additional
> + * ref on the underlying mfn, if any */
> +static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
> + struct page_info **page)
> {
> + void *map;
> unsigned long mfn;
> p2m_type_t p2mt;
> + struct page_info *pg;
> struct domain *d = current->domain;
>
> mfn = mfn_x(writable
> @@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
> if ( writable )
> paging_mark_dirty(d, mfn);
>
> - return map_domain_page(mfn);
> + pg = mfn_to_page(mfn);
> + if (page)
> + {
> + page_get_owner_and_reference(pg);
> + *page = pg;
> + }
> +
> + map = map_domain_page(mfn);
> + put_gfn(d, gfn);
> + return map;
> }
>
> -void *hvm_map_guest_frame_rw(unsigned long gfn)
> +void *hvm_map_guest_frame_rw(unsigned long gfn,
> + struct page_info **page)
> {
> - return __hvm_map_guest_frame(gfn, 1);
> + return __hvm_map_guest_frame(gfn, 1, page);
> }
>
> -void *hvm_map_guest_frame_ro(unsigned long gfn)
> +void *hvm_map_guest_frame_ro(unsigned long gfn,
> + struct page_info **page)
> {
> - return __hvm_map_guest_frame(gfn, 0);
> + return __hvm_map_guest_frame(gfn, 0, page);
> }
>
> -void hvm_unmap_guest_frame(void *p)
> +void hvm_unmap_guest_frame(void *p, struct page_info *page)
> {
> if ( p )
> + {
> unmap_domain_page(p);
> + if ( page )
> + put_page(page);
> + }
> }
>
> -static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
> +static void *hvm_map_entry(unsigned long va, struct page_info **page)
> {
> + unsigned long gfn;
> uint32_t pfec;
> char *v;
>
> @@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
> * treat it as a kernel-mode read (i.e. no access checks).
> */
> pfec = PFEC_page_present;
> - *gfn = paging_gva_to_gfn(current, va, &pfec);
> + gfn = paging_gva_to_gfn(current, va, &pfec);
> if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
> goto fail;
>
> - v = hvm_map_guest_frame_rw(*gfn);
> + v = hvm_map_guest_frame_rw(gfn, page);
> if ( v == NULL )
> goto fail;
>
> @@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
> return NULL;
> }
>
> -static void hvm_unmap_entry(void *p, unsigned long gfn)
> +static void hvm_unmap_entry(void *p, struct page_info *page)
> {
> - hvm_unmap_guest_frame(p);
> - if ( p && (gfn != INVALID_GFN) )
> - put_gfn(current->domain, gfn);
> + hvm_unmap_guest_frame(p, page);
> }
>
> static int hvm_load_segment_selector(
> @@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
> int fault_type = TRAP_invalid_tss;
> struct cpu_user_regs *regs = guest_cpu_user_regs();
> struct vcpu *v = current;
> - unsigned long pdesc_gfn = INVALID_GFN;
> + struct page_info *pdesc_pg = NULL;
>
> if ( regs->eflags & X86_EFLAGS_VM )
> {
> @@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
> if ( ((sel & 0xfff8) + 7) > desctab.limit )
> goto fail;
>
> - pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
> + pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
> if ( pdesc == NULL )
> goto hvm_map_fail;
>
> @@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
> desc.b |= 0x100;
>
> skip_accessed_flag:
> - hvm_unmap_entry(pdesc, pdesc_gfn);
> + hvm_unmap_entry(pdesc, pdesc_pg);
>
> segr.base = (((desc.b << 0) & 0xff000000u) |
> ((desc.b << 16) & 0x00ff0000u) |
> @@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
> return 0;
>
> unmap_and_fail:
> - hvm_unmap_entry(pdesc, pdesc_gfn);
> + hvm_unmap_entry(pdesc, pdesc_pg);
> fail:
> hvm_inject_exception(fault_type, sel & 0xfffc, 0);
> hvm_map_fail:
> @@ -2021,7 +2039,8 @@ void hvm_task_switch(
> struct cpu_user_regs *regs = guest_cpu_user_regs();
> struct segment_register gdt, tr, prev_tr, segr;
> struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
> - unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
> + unsigned long eflags;
> + struct page_info *optss_pg = NULL, *nptss_pg = NULL;
> int exn_raised, rc;
> struct {
> u16 back_link,__blh;
> @@ -2047,11 +2066,13 @@ void hvm_task_switch(
> goto out;
> }
>
> - optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8),
> &optss_gfn);
> + optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8),
> + &optss_pg);
> if ( optss_desc == NULL )
> goto out;
>
> - nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
> + nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8),
> + &nptss_pg);
> if ( nptss_desc == NULL )
> goto out;
>
> @@ -2216,8 +2237,8 @@ void hvm_task_switch(
> }
>
> out:
> - hvm_unmap_entry(optss_desc, optss_gfn);
> - hvm_unmap_entry(nptss_desc, nptss_gfn);
> + hvm_unmap_entry(optss_desc, optss_pg);
> + hvm_unmap_entry(nptss_desc, nptss_pg);
> }
>
> #define HVMCOPY_from_guest (0u<<0)
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
> --- a/xen/arch/x86/hvm/nestedhvm.c
> +++ b/xen/arch/x86/hvm/nestedhvm.c
> @@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
> nv->nv_ioportED = 0;
>
> if (nv->nv_vvmcx)
> - hvm_unmap_guest_frame(nv->nv_vvmcx);
> + hvm_unmap_guest_frame(nv->nv_vvmcx,
> + nv->nv_vvmcx_pg);
> nv->nv_vvmcx = NULL;
> nv->nv_vvmcxaddr = VMCX_EADDR;
> + nv->nv_vvmcx_pg = NULL;
> nv->nv_flushp2m = 0;
> nv->nv_p2m = NULL;
>
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
> @@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
> if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
> ASSERT(nv->nv_vvmcx != NULL);
> ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
> - hvm_unmap_guest_frame(nv->nv_vvmcx);
> + hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
> nv->nv_vvmcx = NULL;
> nv->nv_vvmcxaddr = VMCX_EADDR;
> + nv->nv_vvmcx_pg = NULL;
> }
>
> if (nv->nv_vvmcx == NULL) {
> - nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
> + nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
> + &nv->nv_vvmcx_pg);
> if (nv->nv_vvmcx == NULL)
> return 0;
> nv->nv_vvmcxaddr = vmcbaddr;
> - /* put_gfn here even though the map survives beyond this caller.
> - * The map can likely survive beyond a hypervisor exit, thus we
> - * need to put the gfn */
> - put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
> }
>
> return 1;
> @@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
> unsigned int i;
> enum hvm_copy_result ret;
> unsigned long *ns_viomap;
> + struct page_info *ns_viomap_pg = NULL;
> bool_t ioport_80, ioport_ed;
>
> ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> @@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
> svm->ns_oiomap_pa = svm->ns_iomap_pa;
> svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
>
> - ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
> + ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
> + &ns_viomap_pg);
> ASSERT(ns_viomap != NULL);
> ioport_80 = test_bit(0x80, ns_viomap);
> ioport_ed = test_bit(0xed, ns_viomap);
> - hvm_unmap_guest_frame(ns_viomap);
> - put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
> + hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
>
> svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
>
> @@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t
> uint16_t port;
> bool_t enabled;
> unsigned long gfn = 0; /* gcc ... */
> + struct page_info *io_bitmap_pg = NULL;
>
> ioinfo.bytes = exitinfo1;
> port = ioinfo.fields.port;
> @@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t
> break;
> }
>
> - io_bitmap = hvm_map_guest_frame_ro(gfn);
> + io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
> if (io_bitmap == NULL) {
> gdprintk(XENLOG_ERR,
> "IOIO intercept: mapping of permission map failed\n");
> @@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t
> }
>
> enabled = test_bit(port, io_bitmap);
> - hvm_unmap_guest_frame(io_bitmap);
> - put_gfn(current->domain, gfn);
> + hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
>
> if (!enabled)
> return NESTEDHVM_VMEXIT_HOST;
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
> nvmx->vmxon_region_pa = 0;
> nvcpu->nv_vvmcx = NULL;
> nvcpu->nv_vvmcxaddr = VMCX_EADDR;
> + nvcpu->nv_vvmcx_pg = NULL;
> nvmx->intr.intr_info = 0;
> nvmx->intr.error_code = 0;
> nvmx->iobitmap[0] = NULL;
> nvmx->iobitmap[1] = NULL;
> + nvmx->iobitmap_pg[0] = NULL;
> + nvmx->iobitmap_pg[1] = NULL;
> return 0;
> out:
> return -ENOMEM;
> @@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu
>
> index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
> if (nvmx->iobitmap[index])
> - hvm_unmap_guest_frame (nvmx->iobitmap[index]);
> + {
> + hvm_unmap_guest_frame (nvmx->iobitmap[index],
> + nvmx->iobitmap_pg[index]);
> + nvmx->iobitmap_pg[index] = NULL;
> + }
> gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
> - nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
> - /* See comment in nestedsvm_vmcb_map re putting this gfn and
> - * liveness of the map it backs */
> - put_gfn(current->domain, gpa >> PAGE_SHIFT);
> + nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
> + &nvmx->iobitmap_pg[index]);
> }
>
> static inline void map_io_bitmap_all(struct vcpu *v)
> @@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
>
> __clear_current_vvmcs(v);
> if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
> - hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
> - nvcpu->nv_vvmcx == NULL;
> + hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
> + nvcpu->nv_vvmcx = NULL;
> nvcpu->nv_vvmcxaddr = VMCX_EADDR;
> + nvcpu->nv_vvmcx_pg = NULL;
> for (i=0; i<2; i++) {
> if ( nvmx->iobitmap[i] ) {
> - hvm_unmap_guest_frame(nvmx->iobitmap[i]);
> + hvm_unmap_guest_frame(nvmx->iobitmap[i],
> + nvmx->iobitmap_pg[i]);
> nvmx->iobitmap[i] = NULL;
> + nvmx->iobitmap_pg[i] = NULL;
> }
> }
> }
> @@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
>
> if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
> {
> - nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
> + nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
> + &nvcpu->nv_vvmcx_pg);
> nvcpu->nv_vvmcxaddr = gpa;
> map_io_bitmap_all (v);
> - /* See comment in nestedsvm_vmcb_map regarding putting this
> - * gfn and liveness of the map that uses it */
> - put_gfn(current->domain, gpa >> PAGE_SHIFT);
> }
>
> vmreturn(regs, VMSUCCEED);
> @@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
> else
> {
> /* Even if this VMCS isn't the current one, we must clear it. */
> - vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
> + struct page_info *vvmcs_pg = NULL;
> + vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
> if ( vvmcs )
> __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
> - hvm_unmap_guest_frame(vvmcs);
> - put_gfn(current->domain, gpa >> PAGE_SHIFT);
> + hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
> }
>
> vmreturn(regs, VMSUCCEED);
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -26,6 +26,7 @@
> #include <asm/hvm/asid.h>
> #include <public/domctl.h>
> #include <public/hvm/save.h>
> +#include <asm/mm.h>
>
> /* Interrupt acknowledgement sources. */
> enum hvm_intsrc {
> @@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
> unsigned int addr_size,
> unsigned long *linear_addr);
>
> -void *hvm_map_guest_frame_rw(unsigned long gfn);
> -void *hvm_map_guest_frame_ro(unsigned long gfn);
> -void hvm_unmap_guest_frame(void *p);
> +void *hvm_map_guest_frame_rw(unsigned long gfn,
> + struct page_info **page);
> +void *hvm_map_guest_frame_ro(unsigned long gfn,
> + struct page_info **page);
> +void hvm_unmap_guest_frame(void *p, struct page_info *page);
>
> static inline void hvm_set_info_guest(struct vcpu *v)
> {
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
> --- a/xen/include/asm-x86/hvm/vcpu.h
> +++ b/xen/include/asm-x86/hvm/vcpu.h
> @@ -77,6 +77,7 @@ struct nestedvcpu {
> void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
>
> uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
> + struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
> uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
> uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
>
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
> --- a/xen/include/asm-x86/hvm/vmx/vvmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
> @@ -26,6 +26,7 @@
> struct nestedvmx {
> paddr_t vmxon_region_pa;
> void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */
> + struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
> /* deferred nested interrupt */
> struct {
> unsigned long intr_info;
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |