[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V2] X86/vMCE: handle broken page with regard to migration



Ian Campbell wrote:
> On Thu, 2012-11-15 at 01:52 +0000, Liu Jinsong wrote:
>> Generally, there are 2 cases:
>> 1. broken page occurs before migration
>> 2. broken page occurs during migration
>> 
>> At the sender
>>   For case 1, the broken page would be mapped but not copied to
>> target 
> 
> Do you mean "will be mapped"? "would be" is past tense which suggests
> it was the behaviour before this patch, which I don't think is what
> you meant (here and elsewhere in the commit message).

Yes.

> 
>>   (otherwise it may trigger more serious error, say, SRAR error).
>>   While its pfn_type and pfn number would be transferred to target
>>   so that target take appropriate action.
>> 
>>   For case 2, mce handler marks the broken page to dirty bitmap, so
>>   that at copypages stage of migration, its pfn_type and pfn number
>>   would be transferred to target and then take appropriate action.
> 
> What happens if the page is marked broken during the final pass?
> 

Have updated the patch to handle this case, by doing one more iteration so that 
the broken page's pfn_type and pfn number got chance to be transferred to the 
target. Will send out soon.

>> At the target
>>   When migration target would populate pages for guest. As for the
>>   case of broken page wrt migration, we prefer keep the
>>   corresponding page, for the sake of seamless migration.
> 
> I don't understanding this paragraph, what do you mean by "keep the
> corresponding page"? My understanding is the patch explicitly doesn't
> keep the page, it marks it as broken.
> 
> Did you perhaps mean "... we prefer to keep the type of the page for
> the sake..." ?
> 

Yes.

Thanks,
Jinsong

>>   At target it would set p2m as p2m_ram_broken for broken page.
>>   Guest MCE may have good chance to handle its broken page, while if
>>   guest access the broken page again it would kill itself as
>> expected. 
>> 
>> Suggested-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
>> Signed-off-by: Liu Jinsong <jinsong.liu@xxxxxxxxx>
>> Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
>> Acked-by: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
>> Acked-by: Jan Beulich <JBeulich@xxxxxxxx>
>> 
>> diff -r 8b93ac0c93f3 -r da7faf53790e tools/libxc/xc_domain.c
>> --- a/tools/libxc/xc_domain.c        Tue Nov 13 11:19:17 2012 +0000
>> +++ b/tools/libxc/xc_domain.c        Thu Nov 15 09:42:24 2012 +0800 @@
>>      -283,6 +283,22 @@ return ret;
>>  }
>> 
>> +/* set broken page p2m */
>> +int xc_set_broken_page_p2m(xc_interface *xch,
>> +                           uint32_t domid,
>> +                           unsigned long pfn)
>> +{
>> +    int ret;
>> +    DECLARE_DOMCTL;
>> +
>> +    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
>> +    domctl.domain = (domid_t)domid;
>> +    domctl.u.set_broken_page_p2m.pfn = pfn;
>> +    ret = do_domctl(xch, &domctl);
>> +
>> +    return ret ? -1 : 0;
>> +}
>> +
>>  /* get info from hvm guest for save */
>>  int xc_domain_hvm_getcontext(xc_interface *xch,
>>                               uint32_t domid,
>> diff -r 8b93ac0c93f3 -r da7faf53790e tools/libxc/xc_domain_restore.c
>> --- a/tools/libxc/xc_domain_restore.c        Tue Nov 13 11:19:17 2012 +0000
>> +++ b/tools/libxc/xc_domain_restore.c        Thu Nov 15 09:42:24 2012 +0800
>> @@ -1023,9 +1023,15 @@ 
>> 
>>      countpages = count;
>>      for (i = oldcount; i < buf->nr_pages; ++i)
>> -        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
>> XEN_DOMCTL_PFINFO_XTAB 
>> -            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
>> XEN_DOMCTL_PFINFO_XALLOC) +    { +        unsigned long pagetype;
>> +
>> +        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
>> +        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
>> +             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
>> +             pagetype == XEN_DOMCTL_PFINFO_XALLOC )             
>> --countpages; +    }
>> 
>>      if (!countpages)
>>          return count;
>> @@ -1267,6 +1273,17 @@
>>              /* a bogus/unmapped/allocate-only page: skip it */     
>> continue; 
>> 
>> +        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN ) +        {
>> +            if ( xc_set_broken_page_p2m(xch, dom, pfn) ) +         
>> { +                ERROR("Set p2m for broken page failed, "
>> +                      "dom=%d, pfn=%lx\n", dom, pfn);
>> +                goto err_mapped;
>> +            }
>> +            continue;
>> +        }
>> +
>>          if (pfn_err[i])
>>          {
>>              ERROR("unexpected PFN mapping failure pfn %lx map_mfn
>> %lx p2m_mfn %lx", 
>> diff -r 8b93ac0c93f3 -r da7faf53790e tools/libxc/xc_domain_save.c
>> --- a/tools/libxc/xc_domain_save.c   Tue Nov 13 11:19:17 2012 +0000
>> +++ b/tools/libxc/xc_domain_save.c   Thu Nov 15 09:42:24 2012 +0800 @@
>>                  -1277,6 +1277,13 @@ if ( !hvm )
>>                      gmfn = pfn_to_mfn(gmfn);
>> 
>> +                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN ) +   
>> { +                    pfn_type[j] |= pfn_batch[j];
>> +                    ++run;
>> +                    continue;
>> +                }
>> +
>>                  if ( pfn_err[j] )
>>                  {
>>                      if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB ) @@
>>                      -1371,8 +1378,12 @@ }
>>                  }
>> 
>> -                /* skip pages that aren't present or are alloc-only
>> */ +                /* +                 * skip pages that aren't
>> present, +                 * or are broken, or are alloc-only +     
>>                  */ if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
>> +                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
>>                      || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
>>                      continue;
>> 
>> diff -r 8b93ac0c93f3 -r da7faf53790e tools/libxc/xenctrl.h
>> --- a/tools/libxc/xenctrl.h  Tue Nov 13 11:19:17 2012 +0000
>> +++ b/tools/libxc/xenctrl.h  Thu Nov 15 09:42:24 2012 +0800 @@ -575,6
>>                            +575,17 @@ xc_domaininfo_t *info);
>> 
>>  /**
>> + * This function set p2m for broken page
>> + * &parm xch a handle to an open hypervisor interface
>> + * @parm domid the domain id which broken page belong to
>> + * @parm pfn the pfn number of the broken page
>> + * @return 0 on success, -1 on failure
>> + */
>> +int xc_set_broken_page_p2m(xc_interface *xch,
>> +                           uint32_t domid,
>> +                           unsigned long pfn);
>> +
>> +/**
>>   * This function returns information about the context of a hvm
>> domain 
>>   * @parm xch a handle to an open hypervisor interface
>>   * @parm domid the domain to get information from
>> diff -r 8b93ac0c93f3 -r da7faf53790e
>> xen/arch/x86/cpu/mcheck/mcaction.c ---
>> a/xen/arch/x86/cpu/mcheck/mcaction.c Tue Nov 13 11:19:17 2012 +0000
>>  +++ b/xen/arch/x86/cpu/mcheck/mcaction.c    Thu Nov 15 09:42:24 2012
>>  +0800 @@ -1,5 +1,6 @@ #include <xen/types.h> #include <xen/sched.h>
>> +#include <asm/p2m.h>
>>  #include "mcaction.h"
>>  #include "vmce.h"
>>  #include "mce.h"
>> @@ -91,6 +92,24 @@
>>                      goto vmce_failed;
>>                  }
>> 
>> +                if ( is_hvm_domain(d) &&
>> !d->arch.hvm_domain.dirty_vram && +                    
>> paging_mode_log_dirty(d) ) +                {
>> +                    /*
>> +                     * vMCE occur during migration +               
>> * +                     *   At sender, it marks broken page to dirty
>> bitmap, +                     *   so that at copypages stage of
>> migration, broken +                     *   page's pfn_type and pfn
>> number would be transferred +                     *   to target and
>> then take appropriate action. +                     * +             
>> *   At target, it would set p2m as p2m_ram_broken for +             
>> *   broken page, so that if guest access the broken page +          
>> *   again, it would kill itself as expected. +                    
>> */ +                    paging_mark_dirty(d, mfn); +                }
>> +
>>                  if ( unmmap_broken_page(d, _mfn(mfn), gfn) )       
>>                      { printk("Unmap broken memory %lx for DOM%d
>> failed\n", 
>> diff -r 8b93ac0c93f3 -r da7faf53790e xen/arch/x86/domctl.c
>> --- a/xen/arch/x86/domctl.c  Tue Nov 13 11:19:17 2012 +0000
>> +++ b/xen/arch/x86/domctl.c  Thu Nov 15 09:42:24 2012 +0800 @@
>>                  -209,12 +209,18 @@ for ( j = 0; j < k; j++ )
>>                  {
>>                      unsigned long type = 0;
>> +                    p2m_type_t t;
>> 
>> -                    page = get_page_from_gfn(d, arr[j], NULL,
>> P2M_ALLOC); +                    page = get_page_from_gfn(d, arr[j],
>> &t, P2M_ALLOC); 
>> 
>>                      if ( unlikely(!page) ||
>>                           unlikely(is_xen_heap_page(page)) )
>> -                        type = XEN_DOMCTL_PFINFO_XTAB; +           
>> { +                        if ( p2m_is_broken(t) )
>> +                            type = XEN_DOMCTL_PFINFO_BROKEN; +     
>> else +                            type = XEN_DOMCTL_PFINFO_XTAB; +  
>>                      } else
>>                      {
>>                          switch( page->u.inuse.type_info &
>> PGT_type_mask ) @@ -235,6 +241,9 @@ 
>> 
>>                          if ( page->u.inuse.type_info & PGT_pinned )
>>                              type |= XEN_DOMCTL_PFINFO_LPINTAB; +
>> +                        if ( page->count_info & PGC_broken )
>> +                            type = XEN_DOMCTL_PFINFO_BROKEN;       
>> } 
>> 
>>                      if ( page )
>> @@ -1568,6 +1577,29 @@
>>      }
>>      break;
>> 
>> +    case XEN_DOMCTL_set_broken_page_p2m:
>> +    {
>> +        struct domain *d;
>> +
>> +        d = rcu_lock_domain_by_id(domctl->domain); +        if ( d
>> != NULL ) +        {
>> +            p2m_type_t pt;
>> +            unsigned long pfn = domctl->u.set_broken_page_p2m.pfn;
>> +            mfn_t mfn = get_gfn_query(d, pfn, &pt); +
>> +            if ( unlikely(!mfn_valid(mfn_x(mfn)) || !p2m_is_ram(pt)
>> || +                         (p2m_change_type(d, pfn, pt,
>> p2m_ram_broken) != pt)) ) +                ret = -EINVAL;
>> +
>> +            put_gfn(d, pfn);
>> +            rcu_unlock_domain(d);
>> +        }
>> +        else
>> +            ret = -ESRCH;
>> +    }
>> +    break;
>> +
>>      default:
>>          ret = iommu_do_domctl(domctl, u_domctl);
>>          break;
>> diff -r 8b93ac0c93f3 -r da7faf53790e xen/include/public/domctl.h
>> --- a/xen/include/public/domctl.h    Tue Nov 13 11:19:17 2012 +0000
>> +++ b/xen/include/public/domctl.h    Thu Nov 15 09:42:24 2012 +0800 @@
>>  -136,6 +136,7 @@ #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
>>  #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
>>  #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page
>> */ +#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
>>  #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
>> 
>>  struct xen_domctl_getpageframeinfo {
>> @@ -834,6 +835,12 @@
>>  typedef struct xen_domctl_set_access_required
>>  xen_domctl_set_access_required_t;
>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); 
>> 
>> +struct xen_domctl_set_broken_page_p2m {
>> +    uint64_aligned_t pfn;
>> +};
>> +typedef struct xen_domctl_set_broken_page_p2m
>> xen_domctl_set_broken_page_p2m_t;
>>  +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); +
>>      struct xen_domctl { uint32_t cmd;
>>  #define XEN_DOMCTL_createdomain                   1 @@ -899,6
>>  +906,7 @@ #define XEN_DOMCTL_set_access_required           64
>>  #define XEN_DOMCTL_audit_p2m                     65
>>  #define XEN_DOMCTL_set_virq_handler              66
>> +#define XEN_DOMCTL_set_broken_page_p2m           67
>>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002 @@ -954,6
>>          +962,7 @@ struct xen_domctl_audit_p2m         audit_p2m;
>>          struct xen_domctl_set_virq_handler  set_virq_handler;
>>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
>> +        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>>          uint8_t                             pad[128];


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.