[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
This is a bit messy still. On 64-bit Xen we could just use virt_to_page() on the map pointers rather than storing their page_info pointers separately. On 32-bit, maybe we could use the existing linear mappings to look up the MFN instead. Or would it be easier to add a function to eth map_domain_page() family that does va->mfn for mapped frames? Keir, any opinion? Tim. At 08:41 -0500 on 24 Nov (1322124091), Andres Lagar-Cavilla wrote: > xen/arch/x86/hvm/hvm.c | 69 > ++++++++++++++++++++++++------------- > xen/arch/x86/hvm/nestedhvm.c | 4 +- > xen/arch/x86/hvm/svm/nestedsvm.c | 23 ++++++------ > xen/arch/x86/hvm/vmx/vvmx.c | 36 +++++++++++-------- > xen/include/asm-x86/hvm/hvm.h | 9 +++- > xen/include/asm-x86/hvm/vcpu.h | 1 + > xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + > 7 files changed, 88 insertions(+), 55 deletions(-) > > > The nested hvm code maps pages of the guest hvm. These maps live beyond > a hypervisor entry/exit pair, and thus their liveness cannot be ensured > with get_gfn/put_gfn critical sections. Ensure their liveness by > increasing the page ref count, instead. > > Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> > > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr( > return 0; > } > > -/* We leave this function holding a lock on the p2m entry */ > -static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable) > +/* On non-NULL return, we leave this function holding an additional > + * ref on the underlying mfn, if any */ > +static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable, > + struct page_info **page) > { > + void *map; > unsigned long mfn; > p2m_type_t p2mt; > + struct page_info *pg; > struct domain *d = current->domain; > > mfn = mfn_x(writable > @@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig > if ( writable ) > paging_mark_dirty(d, mfn); > > - return map_domain_page(mfn); > + pg = mfn_to_page(mfn); > + if (page) > + { > + page_get_owner_and_reference(pg); > + *page = pg; > + } > + > + map = map_domain_page(mfn); > + put_gfn(d, gfn); > + return map; > } > > -void *hvm_map_guest_frame_rw(unsigned long gfn) > +void *hvm_map_guest_frame_rw(unsigned long gfn, > + struct page_info **page) > { > - return __hvm_map_guest_frame(gfn, 1); > + return __hvm_map_guest_frame(gfn, 1, page); > } > > -void *hvm_map_guest_frame_ro(unsigned long gfn) > +void *hvm_map_guest_frame_ro(unsigned long gfn, > + struct page_info **page) > { > - return __hvm_map_guest_frame(gfn, 0); > + return __hvm_map_guest_frame(gfn, 0, page); > } > > -void hvm_unmap_guest_frame(void *p) > +void hvm_unmap_guest_frame(void *p, struct page_info *page) > { > if ( p ) > + { > unmap_domain_page(p); > + if ( page ) > + put_page(page); > + } > } > > -static void *hvm_map_entry(unsigned long va, unsigned long *gfn) > +static void *hvm_map_entry(unsigned long va, struct page_info **page) > { > + unsigned long gfn; > uint32_t pfec; > char *v; > > @@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long > * treat it as a kernel-mode read (i.e. no access checks). > */ > pfec = PFEC_page_present; > - *gfn = paging_gva_to_gfn(current, va, &pfec); > + gfn = paging_gva_to_gfn(current, va, &pfec); > if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) ) > goto fail; > > - v = hvm_map_guest_frame_rw(*gfn); > + v = hvm_map_guest_frame_rw(gfn, page); > if ( v == NULL ) > goto fail; > > @@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long > return NULL; > } > > -static void hvm_unmap_entry(void *p, unsigned long gfn) > +static void hvm_unmap_entry(void *p, struct page_info *page) > { > - hvm_unmap_guest_frame(p); > - if ( p && (gfn != INVALID_GFN) ) > - put_gfn(current->domain, gfn); > + hvm_unmap_guest_frame(p, page); > } > > static int hvm_load_segment_selector( > @@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector( > int fault_type = TRAP_invalid_tss; > struct cpu_user_regs *regs = guest_cpu_user_regs(); > struct vcpu *v = current; > - unsigned long pdesc_gfn = INVALID_GFN; > + struct page_info *pdesc_pg = NULL; > > if ( regs->eflags & X86_EFLAGS_VM ) > { > @@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector( > if ( ((sel & 0xfff8) + 7) > desctab.limit ) > goto fail; > > - pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn); > + pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg); > if ( pdesc == NULL ) > goto hvm_map_fail; > > @@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector( > desc.b |= 0x100; > > skip_accessed_flag: > - hvm_unmap_entry(pdesc, pdesc_gfn); > + hvm_unmap_entry(pdesc, pdesc_pg); > > segr.base = (((desc.b << 0) & 0xff000000u) | > ((desc.b << 16) & 0x00ff0000u) | > @@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector( > return 0; > > unmap_and_fail: > - hvm_unmap_entry(pdesc, pdesc_gfn); > + hvm_unmap_entry(pdesc, pdesc_pg); > fail: > hvm_inject_exception(fault_type, sel & 0xfffc, 0); > hvm_map_fail: > @@ -2021,7 +2039,8 @@ void hvm_task_switch( > struct cpu_user_regs *regs = guest_cpu_user_regs(); > struct segment_register gdt, tr, prev_tr, segr; > struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; > - unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN; > + unsigned long eflags; > + struct page_info *optss_pg = NULL, *nptss_pg = NULL; > int exn_raised, rc; > struct { > u16 back_link,__blh; > @@ -2047,11 +2066,13 @@ void hvm_task_switch( > goto out; > } > > - optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), > &optss_gfn); > + optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), > + &optss_pg); > if ( optss_desc == NULL ) > goto out; > > - nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn); > + nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), > + &nptss_pg); > if ( nptss_desc == NULL ) > goto out; > > @@ -2216,8 +2237,8 @@ void hvm_task_switch( > } > > out: > - hvm_unmap_entry(optss_desc, optss_gfn); > - hvm_unmap_entry(nptss_desc, nptss_gfn); > + hvm_unmap_entry(optss_desc, optss_pg); > + hvm_unmap_entry(nptss_desc, nptss_pg); > } > > #define HVMCOPY_from_guest (0u<<0) > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c > --- a/xen/arch/x86/hvm/nestedhvm.c > +++ b/xen/arch/x86/hvm/nestedhvm.c > @@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v) > nv->nv_ioportED = 0; > > if (nv->nv_vvmcx) > - hvm_unmap_guest_frame(nv->nv_vvmcx); > + hvm_unmap_guest_frame(nv->nv_vvmcx, > + nv->nv_vvmcx_pg); > nv->nv_vvmcx = NULL; > nv->nv_vvmcxaddr = VMCX_EADDR; > + nv->nv_vvmcx_pg = NULL; > nv->nv_flushp2m = 0; > nv->nv_p2m = NULL; > > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c > --- a/xen/arch/x86/hvm/svm/nestedsvm.c > +++ b/xen/arch/x86/hvm/svm/nestedsvm.c > @@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u > if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) { > ASSERT(nv->nv_vvmcx != NULL); > ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR); > - hvm_unmap_guest_frame(nv->nv_vvmcx); > + hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg); > nv->nv_vvmcx = NULL; > nv->nv_vvmcxaddr = VMCX_EADDR; > + nv->nv_vvmcx_pg = NULL; > } > > if (nv->nv_vvmcx == NULL) { > - nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT); > + nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT, > + &nv->nv_vvmcx_pg); > if (nv->nv_vvmcx == NULL) > return 0; > nv->nv_vvmcxaddr = vmcbaddr; > - /* put_gfn here even though the map survives beyond this caller. > - * The map can likely survive beyond a hypervisor exit, thus we > - * need to put the gfn */ > - put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT); > } > > return 1; > @@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru > unsigned int i; > enum hvm_copy_result ret; > unsigned long *ns_viomap; > + struct page_info *ns_viomap_pg = NULL; > bool_t ioport_80, ioport_ed; > > ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm; > @@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru > svm->ns_oiomap_pa = svm->ns_iomap_pa; > svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa; > > - ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT); > + ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, > + &ns_viomap_pg); > ASSERT(ns_viomap != NULL); > ioport_80 = test_bit(0x80, ns_viomap); > ioport_ed = test_bit(0xed, ns_viomap); > - hvm_unmap_guest_frame(ns_viomap); > - put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT); > + hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg); > > svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); > > @@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t > uint16_t port; > bool_t enabled; > unsigned long gfn = 0; /* gcc ... */ > + struct page_info *io_bitmap_pg = NULL; > > ioinfo.bytes = exitinfo1; > port = ioinfo.fields.port; > @@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t > break; > } > > - io_bitmap = hvm_map_guest_frame_ro(gfn); > + io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg); > if (io_bitmap == NULL) { > gdprintk(XENLOG_ERR, > "IOIO intercept: mapping of permission map failed\n"); > @@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t > } > > enabled = test_bit(port, io_bitmap); > - hvm_unmap_guest_frame(io_bitmap); > - put_gfn(current->domain, gfn); > + hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg); > > if (!enabled) > return NESTEDHVM_VMEXIT_HOST; > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v) > nvmx->vmxon_region_pa = 0; > nvcpu->nv_vvmcx = NULL; > nvcpu->nv_vvmcxaddr = VMCX_EADDR; > + nvcpu->nv_vvmcx_pg = NULL; > nvmx->intr.intr_info = 0; > nvmx->intr.error_code = 0; > nvmx->iobitmap[0] = NULL; > nvmx->iobitmap[1] = NULL; > + nvmx->iobitmap_pg[0] = NULL; > + nvmx->iobitmap_pg[1] = NULL; > return 0; > out: > return -ENOMEM; > @@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu > > index = vmcs_reg == IO_BITMAP_A ? 0 : 1; > if (nvmx->iobitmap[index]) > - hvm_unmap_guest_frame (nvmx->iobitmap[index]); > + { > + hvm_unmap_guest_frame (nvmx->iobitmap[index], > + nvmx->iobitmap_pg[index]); > + nvmx->iobitmap_pg[index] = NULL; > + } > gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg); > - nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT); > - /* See comment in nestedsvm_vmcb_map re putting this gfn and > - * liveness of the map it backs */ > - put_gfn(current->domain, gpa >> PAGE_SHIFT); > + nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, > + &nvmx->iobitmap_pg[index]); > } > > static inline void map_io_bitmap_all(struct vcpu *v) > @@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu > > __clear_current_vvmcs(v); > if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR ) > - hvm_unmap_guest_frame(nvcpu->nv_vvmcx); > - nvcpu->nv_vvmcx == NULL; > + hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg); > + nvcpu->nv_vvmcx = NULL; > nvcpu->nv_vvmcxaddr = VMCX_EADDR; > + nvcpu->nv_vvmcx_pg = NULL; > for (i=0; i<2; i++) { > if ( nvmx->iobitmap[i] ) { > - hvm_unmap_guest_frame(nvmx->iobitmap[i]); > + hvm_unmap_guest_frame(nvmx->iobitmap[i], > + nvmx->iobitmap_pg[i]); > nvmx->iobitmap[i] = NULL; > + nvmx->iobitmap_pg[i] = NULL; > } > } > } > @@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_ > > if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR ) > { > - nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT); > + nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, > + &nvcpu->nv_vvmcx_pg); > nvcpu->nv_vvmcxaddr = gpa; > map_io_bitmap_all (v); > - /* See comment in nestedsvm_vmcb_map regarding putting this > - * gfn and liveness of the map that uses it */ > - put_gfn(current->domain, gpa >> PAGE_SHIFT); > } > > vmreturn(regs, VMSUCCEED); > @@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_ > else > { > /* Even if this VMCS isn't the current one, we must clear it. */ > - vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT); > + struct page_info *vvmcs_pg = NULL; > + vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg); > if ( vvmcs ) > __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0); > - hvm_unmap_guest_frame(vvmcs); > - put_gfn(current->domain, gpa >> PAGE_SHIFT); > + hvm_unmap_guest_frame(vvmcs, vvmcs_pg); > } > > vmreturn(regs, VMSUCCEED); > diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -26,6 +26,7 @@ > #include <asm/hvm/asid.h> > #include <public/domctl.h> > #include <public/hvm/save.h> > +#include <asm/mm.h> > > /* Interrupt acknowledgement sources. */ > enum hvm_intsrc { > @@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr( > unsigned int addr_size, > unsigned long *linear_addr); > > -void *hvm_map_guest_frame_rw(unsigned long gfn); > -void *hvm_map_guest_frame_ro(unsigned long gfn); > -void hvm_unmap_guest_frame(void *p); > +void *hvm_map_guest_frame_rw(unsigned long gfn, > + struct page_info **page); > +void *hvm_map_guest_frame_ro(unsigned long gfn, > + struct page_info **page); > +void hvm_unmap_guest_frame(void *p, struct page_info *page); > > static inline void hvm_set_info_guest(struct vcpu *v) > { > diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h > --- a/xen/include/asm-x86/hvm/vcpu.h > +++ b/xen/include/asm-x86/hvm/vcpu.h > @@ -77,6 +77,7 @@ struct nestedvcpu { > void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */ > > uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */ > + struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */ > uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */ > uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */ > > diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h > --- a/xen/include/asm-x86/hvm/vmx/vvmx.h > +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h > @@ -26,6 +26,7 @@ > struct nestedvmx { > paddr_t vmxon_region_pa; > void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */ > + struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */ > /* deferred nested interrupt */ > struct { > unsigned long intr_info; > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |