[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Bug on shadow page mode



> -----Original Message-----
> From: Tim Deegan [mailto:tim@xxxxxxx]
> Sent: Thursday, April 04, 2013 6:18 PM
> To: Jan Beulich
> Cc: Hao, Xudong; xen-devel (xen-devel@xxxxxxxxxxxxx)
> Subject: Re: [Xen-devel] Bug on shadow page mode
> 
> Hi,
> 
> At 12:50 +0100 on 02 Apr (1364907054), Jan Beulich wrote:
> > > (XEN) Xen call trace:
> > > (XEN)    [<ffff82c4c01e637f>] guest_walk_tables_4_levels+0x135/0x6a6
> > > (XEN)    [<ffff82c4c020d8cc>] sh_page_fault__guest_4+0x505/0x2015
> > > (XEN)    [<ffff82c4c01d2135>] vmx_vmexit_handler+0x86c/0x1748
> > > (XEN)
> > > (XEN) Pagetable walk from ffff82c406a00000:
> > > (XEN)  L4[0x105] = 000000007f26e063 ffffffffffffffff
> > > (XEN)  L3[0x110] = 000000005ce30063 ffffffffffffffff
> > > (XEN)  L2[0x035] = 0000000014aab063 ffffffffffffffff
> > > (XEN)  L1[0x000] = 0000000000000000 ffffffffffffffff
> >
> > Tim,
> >
> > I'm afraid this is something for you. From what I can tell, despite
> > sh_walk_guest_tables() being called from sh_page_fault() without
> > the paging lock held, there doesn't appear to be a way for this to
> > race sh_update_cr3(). And with the way the latter updates
> > guest_vtable, the only way for a page fault to happen upon use
> > of that cached mapping would be between the call to
> > sh_unmap_domain_page_global() and the immediately following
> > one to sh_map_domain_page_global() (i.e. while the pointer is
> > stale).
> 
> Hmmm.  So the only way I can see that happening is if some foreign agent
> resets the vcpu's state while it's actually running, which AFAICT
> shouldn't happen.  I looked at reversing the unmap and map, but realised
> that won't solve the problem -- in such a race sh_walk_guest_tables()
> could cache the old value.
> 
> For testing, here's a patch that gets rid of guest_vtable altogether.
> I'm not entirely happy with it, partly because it will have a perf hit
> on large-RAM machines, and partly because I'm worried that whatever path
> caused this crash might cause other problems.  But it would be good to
> confirm that it stops the crashes.
> 

This fixed the crash too, but I think you will use the other fixing. :)

> Cheers,
> 
> Tim.
> 
> commit 5469971c30eca85f270cbd9bc46f63ec0fd543c0
> Author: Tim Deegan <tim@xxxxxxx>
> Date:   Thu Apr 4 11:16:28 2013 +0100
> 
>     x86/mm/shadow: Don't keep a permanent mapping of the top-level guest
> table.
> 
>     This addresses two issues.  First, a crash seen where
>     sh_walk_guest_tables() found that guest_vtable was not a valid
>     mapping.  Second, the lack of any error handling if
>     map_domain_page_global() were to fail.
> 
>     Signed-off-by: Tim Deegan <tim@xxxxxxx>
> 
> diff --git a/xen/arch/x86/mm/shadow/multi.c
> b/xen/arch/x86/mm/shadow/multi.c
> index a593f76..8ca4b9b 100644
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -174,15 +174,26 @@ static inline uint32_t
>  sh_walk_guest_tables(struct vcpu *v, unsigned long va, walk_t *gw,
>                       uint32_t pfec)
>  {
> -    return guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec,
> +    uint32_t rc;
> +    mfn_t top_mfn;
> +    void *top_map;
> +
>  #if GUEST_PAGING_LEVELS == 3 /* PAE */
> -                             _mfn(INVALID_MFN),
> -                             v->arch.paging.shadow.gl3e
> +    top_mfn = _mfn(INVALID_MFN);
> +    top_map = v->arch.paging.shadow.gl3e;
>  #else /* 32 or 64 */
> -                             pagetable_get_mfn(v->arch.guest_table),
> -                             v->arch.paging.shadow.guest_vtable
> +    top_mfn = pagetable_get_mfn(v->arch.guest_table);
> +    top_map = sh_map_domain_page(top_mfn);
> +#endif
> +
> +    rc = guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec,
> +                           top_mfn, top_map);
> +
> +#if GUEST_PAGING_LEVELS != 3
> +    sh_unmap_domain_page(top_map);
>  #endif
> -                             );
> +
> +    return rc;
>  }
> 
>  /* This validation is called with lock held, and after write permission
> @@ -219,24 +230,20 @@ shadow_check_gwalk(struct vcpu *v, unsigned long
> va, walk_t *gw, int version)
>       * logic simple.
>       */
>      perfc_incr(shadow_check_gwalk);
> -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
> -#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
> -    l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable;
> +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit... */
> +    l4p = sh_map_domain_page(gw->l4mfn);
>      mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4);
> +    sh_unmap_domain_page(l4p);
>      l3p = sh_map_domain_page(gw->l3mfn);
>      mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3);
>      sh_unmap_domain_page(l3p);
> -#else
> +#elif GUEST_PAGING_LEVELS >= 3 /* PAE... */
>      mismatch |= (gw->l3e.l3 !=
> 
> v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);
>  #endif
>      l2p = sh_map_domain_page(gw->l2mfn);
>      mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
>      sh_unmap_domain_page(l2p);
> -#else
> -    l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable;
> -    mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
> -#endif
>      if ( !(guest_supports_superpages(v) &&
>             (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
>      {
> @@ -3784,7 +3791,7 @@ sh_update_linear_entries(struct vcpu *v)
>  }
> 
> 
> -/* Removes vcpu->arch.paging.shadow.guest_vtable and
> vcpu->arch.shadow_table[].
> +/* Removes vcpu->arch.shadow_table[].
>   * Does all appropriate management/bookkeeping/refcounting/etc...
>   */
>  static void
> @@ -3794,24 +3801,6 @@ sh_detach_old_tables(struct vcpu *v)
>      int i = 0;
> 
>      ////
> -    //// vcpu->arch.paging.shadow.guest_vtable
> -    ////
> -
> -#if GUEST_PAGING_LEVELS == 3
> -    /* PAE guests don't have a mapping of the guest top-level table */
> -    ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
> -#else
> -    if ( v->arch.paging.shadow.guest_vtable )
> -    {
> -        struct domain *d = v->domain;
> -        if ( shadow_mode_external(d) || shadow_mode_translate(d) )
> -
> sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
> -        v->arch.paging.shadow.guest_vtable = NULL;
> -    }
> -#endif // !NDEBUG
> -
> -
> -    ////
>      //// vcpu->arch.shadow_table[]
>      ////
> 
> @@ -3970,26 +3959,12 @@ sh_update_cr3(struct vcpu *v, int do_locking)
> 
> 
>      ////
> -    //// vcpu->arch.paging.shadow.guest_vtable
> +    //// vcpu->arch.paging.shadow.gl3e[]
>      ////
> -#if GUEST_PAGING_LEVELS == 4
> -    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
> -    {
> -        if ( v->arch.paging.shadow.guest_vtable )
> -
> sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
> -        v->arch.paging.shadow.guest_vtable =
> sh_map_domain_page_global(gmfn);
> -        /* PAGING_LEVELS==4 implies 64-bit, which means that
> -         * map_domain_page_global can't fail */
> -        BUG_ON(v->arch.paging.shadow.guest_vtable == NULL);
> -    }
> -    else
> -        v->arch.paging.shadow.guest_vtable = __linear_l4_table;
> -#elif GUEST_PAGING_LEVELS == 3
> +#if GUEST_PAGING_LEVELS == 3
>       /* On PAE guests we don't use a mapping of the guest's own top-level
>        * table.  We cache the current state of that table and shadow that,
>        * until the next CR3 write makes us refresh our cache. */
> -     ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
> -
>       if ( shadow_mode_external(d) )
>           /* Find where in the page the l3 table is */
>           guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]);
> @@ -4005,20 +3980,6 @@ sh_update_cr3(struct vcpu *v, int do_locking)
>       for ( i = 0; i < 4 ; i++ )
>           v->arch.paging.shadow.gl3e[i] = gl3e[i];
>       sh_unmap_domain_page(gl3e);
> -#elif GUEST_PAGING_LEVELS == 2
> -    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
> -    {
> -        if ( v->arch.paging.shadow.guest_vtable )
> -
> sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
> -        v->arch.paging.shadow.guest_vtable =
> sh_map_domain_page_global(gmfn);
> -        /* Does this really need map_domain_page_global?  Handle the
> -         * error properly if so. */
> -        BUG_ON(v->arch.paging.shadow.guest_vtable == NULL); /* XXX */
> -    }
> -    else
> -        v->arch.paging.shadow.guest_vtable = __linear_l2_table;
> -#else
> -#error this should never happen
>  #endif
> 
> 
> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
> index 6f9744a..4921c99 100644
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -122,8 +122,6 @@ struct shadow_vcpu {
>      l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
>      /* PAE guests: per-vcpu cache of the top-level *guest* entries */
>      l3_pgentry_t gl3e[4] __attribute__((__aligned__(32)));
> -    /* Non-PAE guests: pointer to guest top-level pagetable */
> -    void *guest_vtable;
>      /* Last MFN that we emulated a write to as unshadow heuristics. */
>      unsigned long last_emulated_mfn_for_unshadow;
>      /* MFN of the last shadow that we shot a writeable mapping in */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.