[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [Patch 7] Xen/MCE: Abort live migration when vMCE occur



>>> On 27.07.12 at 17:24, "Liu, Jinsong" <jinsong.liu@xxxxxxxxx> wrote:
> Xen/MCE: Abort live migration when vMCE occur
> 
> This patch monitor the critical area of live migration (from vMCE point of 
> view,
> the copypages stage of migration is the critical area while other areas are 
> not).
> 
> If a vMCE occur at the critical area of live migration, abort and try 
> migration later.
> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
> 
> diff -r 8869ba37b577 tools/libxc/xc_domain.c
> --- a/tools/libxc/xc_domain.c Thu Jul 19 22:14:08 2012 +0800
> +++ b/tools/libxc/xc_domain.c Thu Jul 26 22:52:09 2012 +0800
> @@ -283,6 +283,37 @@
>      return ret;
>  }
>  
> +/* Start vmce monitor */
> +int xc_domain_vmce_monitor_strat(xc_interface *xch,
> +                                 uint32_t domid)
> +{
> +    int ret;
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_vmce_monitor_start;
> +    domctl.domain = (domid_t)domid;
> +    ret = do_domctl(xch, &domctl);
> +
> +    return ret ? -1 : 0;
> +}
> +
> +/* End vmce monitor */
> +int xc_domain_vmce_monitor_end(xc_interface *xch,
> +                               uint32_t domid,
> +                               int *vmce_while_migrate)
> +{
> +    int ret;
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_vmce_monitor_end;
> +    domctl.domain = (domid_t)domid;
> +    ret = do_domctl(xch, &domctl);
> +    if ( !ret )
> +        *vmce_while_migrate = domctl.u.vmce_monitor.vmce_while_migrate;
> +
> +    return ret ? -1 : 0;
> +}
> +
>  /* get info from hvm guest for save */
>  int xc_domain_hvm_getcontext(xc_interface *xch,
>                               uint32_t domid,
> diff -r 8869ba37b577 tools/libxc/xc_domain_save.c
> --- a/tools/libxc/xc_domain_save.c    Thu Jul 19 22:14:08 2012 +0800
> +++ b/tools/libxc/xc_domain_save.c    Thu Jul 26 22:52:09 2012 +0800
> @@ -895,6 +895,8 @@
>       */
>      int compressing = 0;
>  
> +    int vmce_while_migrate = 0;
> +
>      int completed = 0;
>  
>      if ( hvm && !callbacks->switch_qemu_logdirty )
> @@ -1109,6 +1111,12 @@
>          goto out;
>      }
>  
> +    if ( xc_domain_vmce_monitor_strat(xch, dom) )
> +    {
> +        PERROR("Error when start vmce monitor\n");
> +        goto out;
> +    }
> +
>    copypages:
>  #define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), 
> (len))
>  #define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, 
> (fd), (buf), (len))
> @@ -1571,6 +1579,17 @@
>  
>      DPRINTF("All memory is saved\n");
>  
> +    if ( xc_domain_vmce_monitor_end(xch, dom, &vmce_while_migrate) )
> +    {
> +        PERROR("Error when end vmce monitor\n");
> +        goto out;
> +    }
> +    else if ( vmce_while_migrate )
> +    {
> +        fprintf(stderr, "vMCE occurred, abort this time and try later.\n");
> +        goto out;
> +    }
> +
>      /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
>       * separate output buffer and flush it after the compressed page 
> chunks.
>       */
> diff -r 8869ba37b577 tools/libxc/xenctrl.h
> --- a/tools/libxc/xenctrl.h   Thu Jul 19 22:14:08 2012 +0800
> +++ b/tools/libxc/xenctrl.h   Thu Jul 26 22:52:09 2012 +0800
> @@ -568,6 +568,26 @@
>                            xc_domaininfo_t *info);
>  
>  /**
> + * This function start monitor vmce event.
> + * @parm xch a handle to an open hypervisor interface
> + * @parm domid the domain id monitored
> + * @return 0 on success, -1 on failure
> + */
> +int xc_domain_vmce_monitor_strat(xc_interface *xch,
> +                                 uint32_t domid);
> +
> +/**
> + * This function end monitor vmce event
> + * @parm xch a handle to an open hypervisor interface
> + * @parm domid the domain id monitored
> + * @parm vmce_while_migrate a pointer return whether vMCE occur when 
> migrate 
> + * @return 0 on success, -1 on failure
> + */
> +int xc_domain_vmce_monitor_end(xc_interface *xch,
> +                               uint32_t domid,
> +                               int *vmce_while_migrate);
> +
> +/**
>   * This function returns information about the context of a hvm domain
>   * @parm xch a handle to an open hypervisor interface
>   * @parm domid the domain to get information from
> diff -r 8869ba37b577 xen/arch/x86/cpu/mcheck/mce_intel.c
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c     Thu Jul 19 22:14:08 2012 +0800
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c     Thu Jul 26 22:52:09 2012 +0800
> @@ -688,6 +688,12 @@
>                      goto vmce_failed;
>                  }
>  
> +                if ( unlikely(d->arch.vmce_monitor) )
> +                {
> +                    /* vMCE occur when guest migration */
> +                    d->arch.vmce_while_migrate = 1;
> +                }
> +
>                  /* We will inject vMCE to DOMU*/
>                  if ( inject_vmce(d) < 0 )
>                  {
> diff -r 8869ba37b577 xen/arch/x86/domctl.c
> --- a/xen/arch/x86/domctl.c   Thu Jul 19 22:14:08 2012 +0800
> +++ b/xen/arch/x86/domctl.c   Thu Jul 26 22:52:09 2012 +0800
> @@ -1517,6 +1517,41 @@
>      }
>      break;
>  
> +    case XEN_DOMCTL_vmce_monitor_start:
> +    {
> +        struct domain *d;
> +
> +        d = rcu_lock_domain_by_id(domctl->domain);
> +        if ( d != NULL )
> +        {
> +            d->arch.vmce_while_migrate = 0;
> +            d->arch.vmce_monitor = 1;
> +            rcu_unlock_domain(d);
> +        }
> +        else
> +            ret = -ESRCH;
> +    }
> +    break;
> +
> +    case XEN_DOMCTL_vmce_monitor_end:
> +    {
> +        struct domain *d;
> +
> +        d = rcu_lock_domain_by_id(domctl->domain);
> +        if ( d != NULL)
> +        {
> +            d->arch.vmce_monitor = 0;
> +            domctl->u.vmce_monitor.vmce_while_migrate =
> +                                      d->arch.vmce_while_migrate;
> +            rcu_unlock_domain(d);
> +            if ( copy_to_guest(u_domctl, domctl, 1) )
> +                ret = -EFAULT;
> +        }
> +        else
> +            ret = -ESRCH;
> +    }
> +    break;
> +
>      default:
>          ret = iommu_do_domctl(domctl, u_domctl);
>          break;
> diff -r 8869ba37b577 xen/include/asm-x86/domain.h
> --- a/xen/include/asm-x86/domain.h    Thu Jul 19 22:14:08 2012 +0800
> +++ b/xen/include/asm-x86/domain.h    Thu Jul 26 22:52:09 2012 +0800
> @@ -292,6 +292,10 @@
>      bool_t has_32bit_shinfo;
>      /* Domain cannot handle spurious page faults? */
>      bool_t suppress_spurious_page_faults;
> +    /* Monitoring guest memory copy of migration */
> +    bool_t vmce_monitor;
> +    /* Whether vMCE occur during guest memory copy of migration */
> +    bool_t vmce_while_migrate;

Let's name this what it is - e.g. vmce_while_monitor -, at the
hypervisor level this got nothing to do with migration (it's only
the tools who want to use it for this purpose).

Given the constant growth of struct domain, I also wonder
whether a single variable s8 wouldn't suffice: e.g. 0 - not
monitoring, > 0 - monitoring, < 0 - vMCE occurred while
monitoring.

Jan

>  
>      /* Continuable domain_relinquish_resources(). */
>      enum {
> diff -r 8869ba37b577 xen/include/public/domctl.h
> --- a/xen/include/public/domctl.h     Thu Jul 19 22:14:08 2012 +0800
> +++ b/xen/include/public/domctl.h     Thu Jul 26 22:52:09 2012 +0800
> @@ -850,6 +850,12 @@
>  typedef struct xen_domctl_set_access_required 
> xen_domctl_set_access_required_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t);
>  
> +struct xen_domctl_vmce_monitor {
> +    uint8_t vmce_while_migrate;
> +};
> +typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -915,6 +921,8 @@
>  #define XEN_DOMCTL_set_access_required           64
>  #define XEN_DOMCTL_audit_p2m                     65
>  #define XEN_DOMCTL_set_virq_handler              66
> +#define XEN_DOMCTL_vmce_monitor_start            67
> +#define XEN_DOMCTL_vmce_monitor_end              68
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -970,6 +978,7 @@
>          struct xen_domctl_set_access_required access_required;
>          struct xen_domctl_audit_p2m         audit_p2m;
>          struct xen_domctl_set_virq_handler  set_virq_handler;
> +        struct xen_domctl_vmce_monitor      vmce_monitor;
>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.