[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out



This is a bit messy still.  On 64-bit Xen we could just use
virt_to_page() on the map pointers rather than storing their 
page_info pointers separately. 

On 32-bit, maybe we could use the existing linear mappings to look up
the MFN instead.  Or would it be easier to add a function to eth
map_domain_page() family that does va->mfn for mapped frames?
Keir, any opinion?

Tim.

At 08:41 -0500 on 24 Nov (1322124091), Andres Lagar-Cavilla wrote:
>  xen/arch/x86/hvm/hvm.c             |  69 
> ++++++++++++++++++++++++-------------
>  xen/arch/x86/hvm/nestedhvm.c       |   4 +-
>  xen/arch/x86/hvm/svm/nestedsvm.c   |  23 ++++++------
>  xen/arch/x86/hvm/vmx/vvmx.c        |  36 +++++++++++--------
>  xen/include/asm-x86/hvm/hvm.h      |   9 +++-
>  xen/include/asm-x86/hvm/vcpu.h     |   1 +
>  xen/include/asm-x86/hvm/vmx/vvmx.h |   1 +
>  7 files changed, 88 insertions(+), 55 deletions(-)
> 
> 
> The nested hvm code maps pages of the guest hvm. These maps live beyond
> a hypervisor entry/exit pair, and thus their liveness cannot be ensured
> with get_gfn/put_gfn critical sections. Ensure their liveness by
> increasing the page ref count, instead.
> 
> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
> 
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
>      return 0;
>  }
>  
> -/* We leave this function holding a lock on the p2m entry */
> -static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
> +/* On non-NULL return, we leave this function holding an additional 
> + * ref on the underlying mfn, if any */
> +static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
> +                                    struct page_info **page)
>  {
> +    void *map;
>      unsigned long mfn;
>      p2m_type_t p2mt;
> +    struct page_info *pg;
>      struct domain *d = current->domain;
>  
>      mfn = mfn_x(writable
> @@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
>      if ( writable )
>          paging_mark_dirty(d, mfn);
>  
> -    return map_domain_page(mfn);
> +    pg = mfn_to_page(mfn);
> +    if (page)
> +    {
> +        page_get_owner_and_reference(pg);
> +        *page = pg;
> +    }
> +
> +    map = map_domain_page(mfn);
> +    put_gfn(d, gfn);
> +    return map;
>  }
>  
> -void *hvm_map_guest_frame_rw(unsigned long gfn)
> +void *hvm_map_guest_frame_rw(unsigned long gfn,
> +                             struct page_info **page)
>  {
> -    return __hvm_map_guest_frame(gfn, 1);
> +    return __hvm_map_guest_frame(gfn, 1, page);
>  }
>  
> -void *hvm_map_guest_frame_ro(unsigned long gfn)
> +void *hvm_map_guest_frame_ro(unsigned long gfn,
> +                             struct page_info **page)
>  {
> -    return __hvm_map_guest_frame(gfn, 0);
> +    return __hvm_map_guest_frame(gfn, 0, page);
>  }
>  
> -void hvm_unmap_guest_frame(void *p)
> +void hvm_unmap_guest_frame(void *p, struct page_info *page)
>  {
>      if ( p )
> +    {
>          unmap_domain_page(p);
> +        if ( page )
> +            put_page(page);
> +    }
>  }
>  
> -static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
> +static void *hvm_map_entry(unsigned long va, struct page_info **page)
>  {
> +    unsigned long gfn;
>      uint32_t pfec;
>      char *v;
>  
> @@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
>       * treat it as a kernel-mode read (i.e. no access checks).
>       */
>      pfec = PFEC_page_present;
> -    *gfn = paging_gva_to_gfn(current, va, &pfec);
> +    gfn = paging_gva_to_gfn(current, va, &pfec);
>      if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
>          goto fail;
>  
> -    v = hvm_map_guest_frame_rw(*gfn);
> +    v = hvm_map_guest_frame_rw(gfn, page);
>      if ( v == NULL )
>          goto fail;
>  
> @@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
>      return NULL;
>  }
>  
> -static void hvm_unmap_entry(void *p, unsigned long gfn)
> +static void hvm_unmap_entry(void *p, struct page_info *page)
>  {
> -    hvm_unmap_guest_frame(p);
> -    if ( p && (gfn != INVALID_GFN) )
> -        put_gfn(current->domain, gfn);
> +    hvm_unmap_guest_frame(p, page);
>  }
>  
>  static int hvm_load_segment_selector(
> @@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
>      int fault_type = TRAP_invalid_tss;
>      struct cpu_user_regs *regs = guest_cpu_user_regs();
>      struct vcpu *v = current;
> -    unsigned long pdesc_gfn = INVALID_GFN;
> +    struct page_info *pdesc_pg = NULL;
>  
>      if ( regs->eflags & X86_EFLAGS_VM )
>      {
> @@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
>      if ( ((sel & 0xfff8) + 7) > desctab.limit )
>          goto fail;
>  
> -    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
> +    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
>      if ( pdesc == NULL )
>          goto hvm_map_fail;
>  
> @@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
>      desc.b |= 0x100;
>  
>   skip_accessed_flag:
> -    hvm_unmap_entry(pdesc, pdesc_gfn);
> +    hvm_unmap_entry(pdesc, pdesc_pg);
>  
>      segr.base = (((desc.b <<  0) & 0xff000000u) |
>                   ((desc.b << 16) & 0x00ff0000u) |
> @@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
>      return 0;
>  
>   unmap_and_fail:
> -    hvm_unmap_entry(pdesc, pdesc_gfn);
> +    hvm_unmap_entry(pdesc, pdesc_pg);
>   fail:
>      hvm_inject_exception(fault_type, sel & 0xfffc, 0);
>   hvm_map_fail:
> @@ -2021,7 +2039,8 @@ void hvm_task_switch(
>      struct cpu_user_regs *regs = guest_cpu_user_regs();
>      struct segment_register gdt, tr, prev_tr, segr;
>      struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
> -    unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
> +    unsigned long eflags;
> +    struct page_info *optss_pg = NULL, *nptss_pg = NULL;
>      int exn_raised, rc;
>      struct {
>          u16 back_link,__blh;
> @@ -2047,11 +2066,13 @@ void hvm_task_switch(
>          goto out;
>      }
>  
> -    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), 
> &optss_gfn);
> +    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), 
> +                                &optss_pg);
>      if ( optss_desc == NULL )
>          goto out;
>  
> -    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
> +    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), 
> +                                &nptss_pg);
>      if ( nptss_desc == NULL )
>          goto out;
>  
> @@ -2216,8 +2237,8 @@ void hvm_task_switch(
>      }
>  
>   out:
> -    hvm_unmap_entry(optss_desc, optss_gfn);
> -    hvm_unmap_entry(nptss_desc, nptss_gfn);
> +    hvm_unmap_entry(optss_desc, optss_pg);
> +    hvm_unmap_entry(nptss_desc, nptss_pg);
>  }
>  
>  #define HVMCOPY_from_guest (0u<<0)
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
> --- a/xen/arch/x86/hvm/nestedhvm.c
> +++ b/xen/arch/x86/hvm/nestedhvm.c
> @@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
>      nv->nv_ioportED = 0;
>  
>      if (nv->nv_vvmcx)
> -        hvm_unmap_guest_frame(nv->nv_vvmcx);
> +        hvm_unmap_guest_frame(nv->nv_vvmcx, 
> +                              nv->nv_vvmcx_pg);
>      nv->nv_vvmcx = NULL;
>      nv->nv_vvmcxaddr = VMCX_EADDR;
> +    nv->nv_vvmcx_pg  = NULL;
>      nv->nv_flushp2m = 0;
>      nv->nv_p2m = NULL;
>  
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
> @@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
>      if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
>          ASSERT(nv->nv_vvmcx != NULL);
>          ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
> -        hvm_unmap_guest_frame(nv->nv_vvmcx);
> +        hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
>          nv->nv_vvmcx = NULL;
>          nv->nv_vvmcxaddr = VMCX_EADDR;
> +        nv->nv_vvmcx_pg = NULL;
>      }
>  
>      if (nv->nv_vvmcx == NULL) {
> -        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
> +        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
> +                                                &nv->nv_vvmcx_pg);
>          if (nv->nv_vvmcx == NULL)
>              return 0;
>          nv->nv_vvmcxaddr = vmcbaddr;
> -        /* put_gfn here even though the map survives beyond this caller.
> -         * The map can likely survive beyond a hypervisor exit, thus we
> -         * need to put the gfn */
> -        put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
>      }
>  
>      return 1;
> @@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
>      unsigned int i;
>      enum hvm_copy_result ret;
>      unsigned long *ns_viomap;
> +    struct page_info *ns_viomap_pg = NULL;
>      bool_t ioport_80, ioport_ed;
>  
>      ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> @@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
>      svm->ns_oiomap_pa = svm->ns_iomap_pa;
>      svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
>  
> -    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
> +    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
> +                                        &ns_viomap_pg);
>      ASSERT(ns_viomap != NULL);
>      ioport_80 = test_bit(0x80, ns_viomap);
>      ioport_ed = test_bit(0xed, ns_viomap);
> -    hvm_unmap_guest_frame(ns_viomap);
> -    put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
> +    hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
>  
>      svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
>  
> @@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
>      uint16_t port;
>      bool_t enabled;
>      unsigned long gfn = 0; /* gcc ... */
> +    struct page_info *io_bitmap_pg = NULL;
>  
>      ioinfo.bytes = exitinfo1;
>      port = ioinfo.fields.port;
> @@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
>          break;
>      }
>  
> -    io_bitmap = hvm_map_guest_frame_ro(gfn);
> +    io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
>      if (io_bitmap == NULL) {
>          gdprintk(XENLOG_ERR,
>              "IOIO intercept: mapping of permission map failed\n");
> @@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
>      }
>  
>      enabled = test_bit(port, io_bitmap);
> -    hvm_unmap_guest_frame(io_bitmap);
> -    put_gfn(current->domain, gfn);
> +    hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
>  
>      if (!enabled)
>          return NESTEDHVM_VMEXIT_HOST;
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
>      nvmx->vmxon_region_pa = 0;
>      nvcpu->nv_vvmcx = NULL;
>      nvcpu->nv_vvmcxaddr = VMCX_EADDR;
> +    nvcpu->nv_vvmcx_pg = NULL;
>      nvmx->intr.intr_info = 0;
>      nvmx->intr.error_code = 0;
>      nvmx->iobitmap[0] = NULL;
>      nvmx->iobitmap[1] = NULL;
> +    nvmx->iobitmap_pg[0] = NULL;
> +    nvmx->iobitmap_pg[1] = NULL;
>      return 0;
>  out:
>      return -ENOMEM;
> @@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu 
>  
>      index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
>      if (nvmx->iobitmap[index])
> -        hvm_unmap_guest_frame (nvmx->iobitmap[index]); 
> +    {
> +        hvm_unmap_guest_frame (nvmx->iobitmap[index],
> +                               nvmx->iobitmap_pg[index]); 
> +        nvmx->iobitmap_pg[index] = NULL;
> +    }
>      gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
> -    nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
> -    /* See comment in nestedsvm_vmcb_map re putting this gfn and 
> -     * liveness of the map it backs */
> -    put_gfn(current->domain, gpa >> PAGE_SHIFT);
> +    nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
> +                                             &nvmx->iobitmap_pg[index]);
>  }
>  
>  static inline void map_io_bitmap_all(struct vcpu *v)
> @@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
>  
>      __clear_current_vvmcs(v);
>      if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
> -        hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
> -    nvcpu->nv_vvmcx == NULL;
> +        hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
> +    nvcpu->nv_vvmcx = NULL;
>      nvcpu->nv_vvmcxaddr = VMCX_EADDR;
> +    nvcpu->nv_vvmcx_pg = NULL;
>      for (i=0; i<2; i++) {
>          if ( nvmx->iobitmap[i] ) {
> -            hvm_unmap_guest_frame(nvmx->iobitmap[i]); 
> +            hvm_unmap_guest_frame(nvmx->iobitmap[i], 
> +                                  nvmx->iobitmap_pg[i]); 
>              nvmx->iobitmap[i] = NULL;
> +            nvmx->iobitmap_pg[i] = NULL;
>          }
>      }
>  }
> @@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
>  
>      if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
>      {
> -        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
> +        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
> +                                                 &nvcpu->nv_vvmcx_pg);
>          nvcpu->nv_vvmcxaddr = gpa;
>          map_io_bitmap_all (v);
> -        /* See comment in nestedsvm_vmcb_map regarding putting this 
> -         * gfn and liveness of the map that uses it */
> -        put_gfn(current->domain, gpa >> PAGE_SHIFT);
>      }
>  
>      vmreturn(regs, VMSUCCEED);
> @@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
>      else 
>      {
>          /* Even if this VMCS isn't the current one, we must clear it. */
> -        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
> +        struct page_info *vvmcs_pg = NULL;
> +        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
>          if ( vvmcs ) 
>              __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
> -        hvm_unmap_guest_frame(vvmcs);
> -        put_gfn(current->domain, gpa >> PAGE_SHIFT);
> +        hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
>      }
>  
>      vmreturn(regs, VMSUCCEED);
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -26,6 +26,7 @@
>  #include <asm/hvm/asid.h>
>  #include <public/domctl.h>
>  #include <public/hvm/save.h>
> +#include <asm/mm.h>
>  
>  /* Interrupt acknowledgement sources. */
>  enum hvm_intsrc {
> @@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
>      unsigned int addr_size,
>      unsigned long *linear_addr);
>  
> -void *hvm_map_guest_frame_rw(unsigned long gfn);
> -void *hvm_map_guest_frame_ro(unsigned long gfn);
> -void hvm_unmap_guest_frame(void *p);
> +void *hvm_map_guest_frame_rw(unsigned long gfn,
> +                             struct page_info **page);
> +void *hvm_map_guest_frame_ro(unsigned long gfn,
> +                             struct page_info **page);
> +void hvm_unmap_guest_frame(void *p, struct page_info *page);
>  
>  static inline void hvm_set_info_guest(struct vcpu *v)
>  {
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
> --- a/xen/include/asm-x86/hvm/vcpu.h
> +++ b/xen/include/asm-x86/hvm/vcpu.h
> @@ -77,6 +77,7 @@ struct nestedvcpu {
>      void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
>  
>      uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
> +    struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
>      uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
>      uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
>  
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
> --- a/xen/include/asm-x86/hvm/vmx/vvmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
> @@ -26,6 +26,7 @@
>  struct nestedvmx {
>      paddr_t    vmxon_region_pa;
>      void       *iobitmap[2];         /* map (va) of L1 guest I/O bitmap */
> +    struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
>      /* deferred nested interrupt */
>      struct {
>          unsigned long intr_info;
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.