[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Deadlocks by p2m_lock and event_lock



Works by tested.

Thanks,
-Xudong


> -----Original Message-----
> From: Andres Lagar-Cavilla [mailto:andres@xxxxxxxxxxxxxxxx]
> Sent: Wednesday, March 14, 2012 11:11 PM
> To: Hao, Xudong
> Cc: Tim Deegan; Keir Fraser; xen-devel@xxxxxxxxxxxxxxxxxxx; Zhang, Xiantao;
> JBeulich@xxxxxxxx
> Subject: RE: [Xen-devel] Deadlocks by p2m_lock and event_lock
> 
> Can you give this a try? (Tim, Keir, Jan, if Xudong reports success, can you
> please apply?)
> 
> Thanks,
> Andres
> 
> # HG changeset patch
> # User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> # Date 1331737660
> 14400 # Node ID fe10f0433f6279091c193127d95d4d39b44a72ed
> # Parent  5d20d2f6ffed0a49f030f04a8870f1926babbcbf
> x86/mm: Fix deadlock between p2m and event channel locks.
> 
> The hvm io emulation code holds the p2m lock for the duration of the 
> emulation,
> which may include sending an event to qemu. On a separate path,
> map_domain_pirq grabs the event channel and p2m locks in opposite order.
> 
> Fix this by ensuring liveness of the ram_gfn used by io emulation, with a page
> ref.
> 
> Reported-by: "Hao, Xudong" <xudong.hao@xxxxxxxxx>
> Signed-off-by: "Hao, Xudong" <xudong.hao@xxxxxxxxx>
> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
> 
> diff -r 5d20d2f6ffed -r fe10f0433f62 xen/arch/x86/hvm/emulate.c
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -77,6 +77,17 @@ static int hvmemul_do_io(
>          return X86EMUL_RETRY;
>      }
> 
> +    /* Maintain a ref on the mfn to ensure liveness. Put the gfn
> +     * to avoid potential deadlock wrt event channel lock, later. */
> +    if ( mfn_valid(mfn_x(ram_mfn)) )
> +        if ( !get_page(mfn_to_page(mfn_x(ram_mfn)),
> +             curr->domain) )
> +        {
> +            put_gfn(curr->domain, ram_gfn);
> +            return X86EMUL_RETRY;
> +        }
> +    put_gfn(curr->domain, ram_gfn);
> +
>      /*
>       * Weird-sized accesses have undefined behaviour: we discard writes
>       * and read all-ones.
> @@ -87,7 +98,8 @@ static int hvmemul_do_io(
>          ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
>          if ( dir == IOREQ_READ )
>              memset(p_data, ~0, size);
> -        put_gfn(curr->domain, ram_gfn);
> +        if ( mfn_valid(mfn_x(ram_mfn)) )
> +            put_page(mfn_to_page(mfn_x(ram_mfn)));
>          return X86EMUL_UNHANDLEABLE;
>      }
> 
> @@ -108,7 +120,8 @@ static int hvmemul_do_io(
>              unsigned int bytes = vio->mmio_large_write_bytes;
>              if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
>              {
> -                put_gfn(curr->domain, ram_gfn);
> +                if ( mfn_valid(mfn_x(ram_mfn)) )
> +                    put_page(mfn_to_page(mfn_x(ram_mfn)));
>                  return X86EMUL_OKAY;
>              }
>          }
> @@ -120,7 +133,8 @@ static int hvmemul_do_io(
>              {
>                  memcpy(p_data, &vio->mmio_large_read[addr - pa],
>                         size);
> -                put_gfn(curr->domain, ram_gfn);
> +                if ( mfn_valid(mfn_x(ram_mfn)) )
> +                    put_page(mfn_to_page(mfn_x(ram_mfn)));
>                  return X86EMUL_OKAY;
>              }
>          }
> @@ -134,7 +148,8 @@ static int hvmemul_do_io(
>          vio->io_state = HVMIO_none;
>          if ( p_data == NULL )
>          {
> -            put_gfn(curr->domain, ram_gfn);
> +            if ( mfn_valid(mfn_x(ram_mfn)) )
> +                put_page(mfn_to_page(mfn_x(ram_mfn)));
>              return X86EMUL_UNHANDLEABLE;
>          }
>          goto finish_access;
> @@ -144,11 +159,13 @@ static int hvmemul_do_io(
>               (addr == (vio->mmio_large_write_pa +
>                         vio->mmio_large_write_bytes)) )
>          {
> -            put_gfn(curr->domain, ram_gfn);
> +            if ( mfn_valid(mfn_x(ram_mfn)) )
> +                put_page(mfn_to_page(mfn_x(ram_mfn)));
>              return X86EMUL_RETRY;
>          }
>      default:
> -        put_gfn(curr->domain, ram_gfn);
> +        if ( mfn_valid(mfn_x(ram_mfn)) )
> +            put_page(mfn_to_page(mfn_x(ram_mfn)));
>          return X86EMUL_UNHANDLEABLE;
>      }
> 
> @@ -156,7 +173,8 @@ static int hvmemul_do_io(
>      {
>          gdprintk(XENLOG_WARNING, "WARNING: io already pending
> (%d)?\n",
>                   p->state);
> -        put_gfn(curr->domain, ram_gfn);
> +        if ( mfn_valid(mfn_x(ram_mfn)) )
> +            put_page(mfn_to_page(mfn_x(ram_mfn)));
>          return X86EMUL_UNHANDLEABLE;
>      }
> 
> @@ -208,7 +226,8 @@ static int hvmemul_do_io(
> 
>      if ( rc != X86EMUL_OKAY )
>      {
> -        put_gfn(curr->domain, ram_gfn);
> +        if ( mfn_valid(mfn_x(ram_mfn)) )
> +            put_page(mfn_to_page(mfn_x(ram_mfn)));
>          return rc;
>      }
> 
> @@ -244,7 +263,8 @@ static int hvmemul_do_io(
>          }
>      }
> 
> -    put_gfn(curr->domain, ram_gfn);
> +    if ( mfn_valid(mfn_x(ram_mfn)) )
> +        put_page(mfn_to_page(mfn_x(ram_mfn)));
>      return X86EMUL_OKAY;
>  }
> 
> 
> 
> 
> > I prefer to the 2nd, I made a patch and testing show it works.
> >
> > diff -r 5d20d2f6ffed xen/arch/x86/hvm/emulate.c
> > --- a/xen/arch/x86/hvm/emulate.c    Fri Mar 09 16:54:24 2012 +0000
> > +++ b/xen/arch/x86/hvm/emulate.c    Wed Mar 14 15:11:52 2012 -0400
> > @@ -60,20 +60,23 @@
> >      ioreq_t *p = get_ioreq(curr);
> >      unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
> >      p2m_type_t p2mt;
> > -    mfn_t ram_mfn;
> > +    unsigned long ram_mfn;
> >      int rc;
> >
> >      /* Check for paged out page */
> > -    ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
> > +    ram_mfn = mfn_x(get_gfn_unshare(curr->domain, ram_gfn, &p2mt));
> > +    if ( mfn_valid(ram_mfn) )
> > +        get_page(mfn_to_page(ram_mfn), curr->domain);
> > +    put_gfn(curr->domain, ram_gfn);
> >      if ( p2m_is_paging(p2mt) )
> >      {
> > -        put_gfn(curr->domain, ram_gfn);
> > +        put_page(mfn_to_page(ram_mfn));
> >          p2m_mem_paging_populate(curr->domain, ram_gfn);
> >          return X86EMUL_RETRY;
> >      }
> >      if ( p2m_is_shared(p2mt) )
> >      {
> > -        put_gfn(curr->domain, ram_gfn);
> > +        put_page(mfn_to_page(ram_mfn));
> >          return X86EMUL_RETRY;
> >      }
> >
> > @@ -87,7 +90,7 @@
> >          ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
> >          if ( dir == IOREQ_READ )
> >              memset(p_data, ~0, size);
> > -        put_gfn(curr->domain, ram_gfn);
> > +        put_page(mfn_to_page(ram_mfn));
> >          return X86EMUL_UNHANDLEABLE;
> >      }
> >
> > @@ -108,7 +111,7 @@
> >              unsigned int bytes = vio->mmio_large_write_bytes;
> >              if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
> >              {
> > -                put_gfn(curr->domain, ram_gfn);
> > +                put_page(mfn_to_page(ram_mfn));
> >                  return X86EMUL_OKAY;
> >              }
> >          }
> > @@ -120,7 +123,7 @@
> >              {
> >                  memcpy(p_data, &vio->mmio_large_read[addr - pa],
> >                         size);
> > -                put_gfn(curr->domain, ram_gfn);
> > +                put_page(mfn_to_page(ram_mfn));
> >                  return X86EMUL_OKAY;
> >              }
> >          }
> > @@ -134,7 +137,7 @@
> >          vio->io_state = HVMIO_none;
> >          if ( p_data == NULL )
> >          {
> > -            put_gfn(curr->domain, ram_gfn);
> > +            put_page(mfn_to_page(ram_mfn));
> >              return X86EMUL_UNHANDLEABLE;
> >          }
> >          goto finish_access;
> > @@ -144,11 +147,11 @@
> >               (addr == (vio->mmio_large_write_pa +
> >                         vio->mmio_large_write_bytes)) )
> >          {
> > -            put_gfn(curr->domain, ram_gfn);
> > +            put_page(mfn_to_page(ram_mfn));
> >              return X86EMUL_RETRY;
> >          }
> >      default:
> > -        put_gfn(curr->domain, ram_gfn);
> > +        put_page(mfn_to_page(ram_mfn));
> >          return X86EMUL_UNHANDLEABLE;
> >      }
> >
> > @@ -156,7 +159,7 @@
> >      {
> >          gdprintk(XENLOG_WARNING, "WARNING: io already pending
> (%d)?\n",
> >                   p->state);
> > -        put_gfn(curr->domain, ram_gfn);
> > +        put_page(mfn_to_page(ram_mfn));
> >          return X86EMUL_UNHANDLEABLE;
> >      }
> >
> > @@ -208,7 +211,7 @@
> >
> >      if ( rc != X86EMUL_OKAY )
> >      {
> > -        put_gfn(curr->domain, ram_gfn);
> > +        put_page(mfn_to_page(ram_mfn));
> >          return rc;
> >      }
> >
> > @@ -244,7 +247,7 @@
> >          }
> >      }
> >
> > -    put_gfn(curr->domain, ram_gfn);
> > +    put_page(mfn_to_page(ram_mfn));
> >      return X86EMUL_OKAY;
> >  }
> >
> >
> > Thanks,
> > -Xudong
> >
> >
> >> -----Original Message-----
> >> From: Andres Lagar-Cavilla [mailto:andres@xxxxxxxxxxxxxxxx]
> >> Sent: Wednesday, March 14, 2012 2:46 AM
> >> To: Hao, Xudong
> >> Cc: Tim Deegan; Keir Fraser; xen-devel@xxxxxxxxxxxxxxxxxxx; Zhang,
> >> Xiantao; JBeulich@xxxxxxxx
> >> Subject: RE: [Xen-devel] Deadlocks by p2m_lock and event_lock
> >>
> >> > Hi, Tim and Andres
> >> > The patch fix part of this issue. In handle_mmio, function
> >> > hvmemul_do_io() is called and p2m lock was held again by calling
> >> > get_gfn_unshare(), still trigger a deadlocks.
> >>
> >> Typically hvmemul_do_io gets the zero gfn, because in many cases
> >> that's the 'rma_gpa' it is passed. However, in the case of mmio, and
> >> particularly stdvga, ram_gpa is the data to be copied to the
> >> framebuffer. So it is in principle ok to get_gfn in hvmemul_do_io.
> >>
> >> There are two solutions
> >> 1. msix_capability_init does not call p2m_change_entry_type_global.
> >> See my previous email. If we want to resync the
> >> EPT/NPT/traditional/VTD/IOMMU/superduper TLBs, we can just do that
> >> explicitly. I hope.
> >>
> >> 2. hvmemul_do_io does gets a ref on the mfn underlying ram_gpa, and
> >> holds that for the critical section, instead of the p2m lock. One way
> >> to achieve this is
> >>
> >>     /* Check for paged out page */
> >>     ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
> >>     if ( this or that )
> >>     { ... handle ... }
> >>     if ( mfn_valid(ram_mfn) )
> >>         get_page(mfn_to_page(ram_mfn, curr->domain));
> >>     put_gfn(curr->domain, ram_gfn)
> >>
> >>     /* replace all put_gfn in all exit paths by put_page */
> >>
> >> This will ensure the target page is live and sane while not holding
> >> the p2m lock.
> >> Xudong, did that make sense? Do you think you could try that and
> >> report back?
> >>
> >> Thanks!
> >> Andres
> >>
> >> >
> >> > (XEN) Xen call trace:
> >> > (XEN)    [<ffff82c4801261a3>] _spin_lock+0x1b/0xa8
> >> > (XEN)    [<ffff82c4801070d3>]
> notify_via_xen_event_channel+0x21/0x106
> >> > (XEN)    [<ffff82c4801b6883>] hvm_buffered_io_send+0x1f1/0x21b
> >> > (XEN)    [<ffff82c4801bbd3a>] stdvga_intercept_mmio+0x491/0x4c7
> >> > (XEN)    [<ffff82c4801b5d58>] hvm_io_intercept+0x218/0x244
> >> > (XEN)    [<ffff82c4801aa931>] hvmemul_do_io+0x55a/0x716
> >> > (XEN)    [<ffff82c4801aab1a>] hvmemul_do_mmio+0x2d/0x2f
> >> > (XEN)    [<ffff82c4801ab239>] hvmemul_write+0x181/0x1a2
> >> > (XEN)    [<ffff82c4801963f0>] x86_emulate+0xcad3/0xfbdf
> >> > (XEN)    [<ffff82c4801a9d2e>] hvm_emulate_one+0x120/0x1af
> >> > (XEN)    [<ffff82c4801b63cb>] handle_mmio+0x4e/0x1d1
> >> > (XEN)    [<ffff82c4801afd72>]
> hvm_hap_nested_page_fault+0x210/0x37f
> >> > (XEN)    [<ffff82c4801d2419>] vmx_vmexit_handler+0x1523/0x17d0
> >> >
> >> > Thanks,
> >> > -Xudong
> >> >
> >> >> -----Original Message-----
> >> >> From: Tim Deegan [mailto:tim@xxxxxxx]
> >> >> Sent: Saturday, March 10, 2012 12:56 AM
> >> >> To: Andres Lagar-Cavilla
> >> >> Cc: Hao, Xudong; Keir Fraser; xen-devel@xxxxxxxxxxxxxxxxxxx;
> >> >> Zhang, Xiantao; JBeulich@xxxxxxxx
> >> >> Subject: Re: [Xen-devel] Deadlocks by p2m_lock and event_lock
> >> >>
> >> >> At 08:29 -0800 on 09 Mar (1331281767), Andres Lagar-Cavilla wrote:
> >> >> > >> I don't know about the event lock, but it seems unwise to
> >> >> > >> call in to handle_mmio with a gfn lock held.  How about
> >> >> > >> fixing the other
> >> >> path?
> >> >> > >>
> >> >> > >> diff -r 04673ecb9d78 xen/arch/x86/hvm/hvm.c
> >> >> > >> --- a/xen/arch/x86/hvm/hvm.c       Thu Mar 08 16:40:05 2012 +0000
> >> >> > >> +++ b/xen/arch/x86/hvm/hvm.c       Fri Mar 09 11:15:25 2012 +0000
> >> >> > >> @@ -1324,10 +1324,11 @@ int
> >> hvm_hap_nested_page_fault(unsigned l
> >> >> > >>      if ( (p2mt == p2m_mmio_dm) ||
> >> >> > >>           (access_w && (p2mt == p2m_ram_ro)) )
> >> >> > >>      {
> >> >> > >> +        put_gfn(p2m->domain, gfn);
> >> >> > >>          if ( !handle_mmio() )
> >> >> > >>              hvm_inject_exception(TRAP_gp_fault, 0, 0);
> >> >> > >>          rc = 1;
> >> >> > >> -        goto out_put_gfn;
> >> >> > >> +        goto out;
> >> >> > >>      }
> >> >> > >>
> >> >> > >>  #ifdef __x86_64__
> >> >> > >> @@ -1379,6 +1380,7 @@ int
> hvm_hap_nested_page_fault(unsigned
> >> >> > >> l
> >> >> > >>
> >> >> > >>  out_put_gfn:
> >> >> > >>      put_gfn(p2m->domain, gfn);
> >> >> > >> +out:
> >> >> > >>      if ( paged )
> >> >> > >>          p2m_mem_paging_populate(v->domain, gfn);
> >> >> > >>      if ( req_ptr )
> >> >> > >
> >> >> > > Yes, that's fine to release the p2m lock earlier than
> >> handle_mmio.
> >> >> >
> >> >> > Ack
> >> >>
> >> >> OK, applied.
> >> >>
> >> >> Tim.
> >> >
> >>
> >
> >
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.