[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86 mce: Clean Intel's MCE handler code
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1276154326 -3600 # Node ID 704bcd622dc2ee4acb799d1bbd08ca1b28af0552 # Parent 2d2812de6792e51c722e51baf6b16e4b776f41b3 x86 mce: Clean Intel's MCE handler code Add intel_mce_type check according to Intel's SDM. Reduce intel_memerr_dhandler()'s indent to make code easily read. And add a page_off action when we offline the page, so that dom0 can knows about the action taken by xen hypervisor. Add a default delay mce handler, which will crash if unknow SRAR error or fatal error, otherwise, system continue. Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx> --- xen/arch/x86/cpu/mcheck/mce_intel.c | 239 +++++++++++++++++++++++++----------- xen/arch/x86/cpu/mcheck/x86_mca.h | 3 2 files changed, 174 insertions(+), 68 deletions(-) diff -r 2d2812de6792 -r 704bcd622dc2 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jun 10 08:18:11 2010 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jun 10 08:18:46 2010 +0100 @@ -501,10 +501,76 @@ intel_get_extended_msrs(struct mcinfo_gl return mc_ext; } -#define INTEL_MAX_RECOVERY 2 +enum intel_mce_type +{ + intel_mce_invalid, + intel_mce_fatal, + intel_mce_corrected, + intel_mce_ucr_ucna, + intel_mce_ucr_srao, + intel_mce_ucr_srar, +}; + +static enum intel_mce_type intel_check_mce_type(uint64_t status) +{ + if (!(status & MCi_STATUS_VAL)) + return intel_mce_invalid; + + if (status & MCi_STATUS_PCC) + return intel_mce_fatal; + + /* Corrected error? */ + if (!(status & MCi_STATUS_UC)) + return intel_mce_corrected; + + if (!ser_support) + return intel_mce_fatal; + + if (status & MCi_STATUS_S) + { + if (status & MCi_STATUS_AR) + { + if (status & MCi_STATUS_OVER) + return intel_mce_fatal; + else + return intel_mce_ucr_srar; + } else + return intel_mce_ucr_srao; + } + else + return intel_mce_ucr_ucna; + + /* Any type not included abovoe ? */ + return intel_mce_fatal; +} + static int is_async_memerr(uint64_t status) { return (status & 0xFFFF) == 0x17A || (status & 0xFFF0) == 0xC0; +} + +struct mcinfo_recovery *mci_add_pageoff_action(int bank, struct mc_info *mi, + uint64_t mfn, uint32_t status) +{ + struct mcinfo_recovery *rec; + + if (!mi) + return NULL; + + rec = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_recovery)); + if (!rec) + { + mi->flags |= MCINFO_FLAGS_UNCOMPLETE; + return NULL; + } + + memset(rec, 0, sizeof(struct mcinfo_recovery)); + + rec->mc_bank = bank; + rec->action_types = MC_ACTION_PAGE_OFFLINE; + rec->action_info.page_retire.mfn = mfn; + rec->action_info.page_retire.status = status; + return rec; } static void intel_memerr_dhandler(int bnum, @@ -516,79 +582,116 @@ static void intel_memerr_dhandler(int bn struct domain *d; unsigned long mfn, gfn; uint32_t status; + uint64_t mc_status, mc_misc; mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); result->result = MCA_NEED_RESET; - if (bank->mc_addr != 0) { - mfn = bank->mc_addr >> PAGE_SHIFT; - if (!offline_page(mfn, 1, &status)) { - /* This is free page */ - if (status & PG_OFFLINE_OFFLINED) - result->result = MCA_RECOVERED; - else if (status & PG_OFFLINE_PENDING) { - /* This page has owner */ - if (status & PG_OFFLINE_OWNED) { - result->result |= MCA_OWNER; - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; - mce_printk(MCE_QUIET, "MCE: This error page is ownded" - " by DOM %d\n", result->owner); - /* Fill vMCE# injection and vMCE# MSR virtualization " - * "related data */ - bank->mc_domid = result->owner; - /* XXX: Cannot handle shared pages yet - * (this should identify all domains and gfn mapping to - * the mfn in question) */ - BUG_ON( result->owner == DOMID_COW ); - if ( result->owner != DOMID_XEN ) { - - d = get_domain_by_id(result->owner); - if ( mca_ctl_conflict(bank, d) ) - { - /* Guest has different MCE ctl with hypervisor */ - if ( d ) - put_domain(d); - return; - } - - ASSERT(d); - gfn = - get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT); - bank->mc_addr = gfn << PAGE_SHIFT | - (bank->mc_addr & (PAGE_SIZE -1 )); - if ( fill_vmsr_data(bank, d, - global->mc_gstatus) == -1 ) - { - mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d " - "failed\n", result->owner); - put_domain(d); - domain_crash(d); - return; - } - /* We will inject vMCE to DOMU*/ - if ( inject_vmce(d) < 0 ) - { - mce_printk(MCE_QUIET, "inject vMCE to DOM%d" - " failed\n", d->domain_id); - put_domain(d); - domain_crash(d); - return; - } - /* Impacted domain go on with domain's recovery job - * if the domain has its own MCA handler. - * For xen, it has contained the error and finished - * its own recovery job. - */ - result->result = MCA_RECOVERED; - put_domain(d); - } - } - } - } - } + + mc_status = bank->mc_status; + mc_misc = bank->mc_misc; + if (!(mc_status & MCi_STATUS_ADDRV) || + !(mc_status & MCi_STATUS_MISCV) || + ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) + { + result->result |= MCA_NO_ACTION; + dprintk(XENLOG_WARNING, + "No physical address provided for memory error\n"); + return; + } + + mfn = bank->mc_addr >> PAGE_SHIFT; + if (offline_page(mfn, 1, &status)) + { + dprintk(XENLOG_WARNING, + "Failed to offline page %lx for MCE error\n", mfn); + return; + } + + mci_add_pageoff_action(bnum, binfo->mi, mfn, status); + + /* This is free page */ + if (status & PG_OFFLINE_OFFLINED) + result->result = MCA_RECOVERED; + else if (status & PG_OFFLINE_PENDING) { + /* This page has owner */ + if (status & PG_OFFLINE_OWNED) { + result->result |= MCA_OWNER; + result->owner = status >> PG_OFFLINE_OWNER_SHIFT; + mce_printk(MCE_QUIET, "MCE: This error page is ownded" + " by DOM %d\n", result->owner); + /* Fill vMCE# injection and vMCE# MSR virtualization " + * "related data */ + bank->mc_domid = result->owner; + /* XXX: Cannot handle shared pages yet + * (this should identify all domains and gfn mapping to + * the mfn in question) */ + BUG_ON( result->owner == DOMID_COW ); + if ( result->owner != DOMID_XEN ) { + d = get_domain_by_id(result->owner); + if ( mca_ctl_conflict(bank, d) ) + { + /* Guest has different MCE ctl with hypervisor */ + if ( d ) + put_domain(d); + return; + } + + ASSERT(d); + gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT); + bank->mc_addr = gfn << PAGE_SHIFT | + (bank->mc_addr & (PAGE_SIZE -1 )); + if ( fill_vmsr_data(bank, d, + global->mc_gstatus) == -1 ) + { + mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d " + "failed\n", result->owner); + put_domain(d); + domain_crash(d); + return; + } + /* We will inject vMCE to DOMU*/ + if ( inject_vmce(d) < 0 ) + { + mce_printk(MCE_QUIET, "inject vMCE to DOM%d" + " failed\n", d->domain_id); + put_domain(d); + domain_crash(d); + return; + } + /* Impacted domain go on with domain's recovery job + * if the domain has its own MCA handler. + * For xen, it has contained the error and finished + * its own recovery job. + */ + result->result = MCA_RECOVERED; + put_domain(d); + } + } + } +} + +static int default_check(uint64_t status) +{ + return 1; +} + +static void intel_default_dhandler(int bnum, + struct mca_binfo *binfo, + struct mca_handle_result *result) +{ + uint64_t status = binfo->mib->mc_status; + enum intel_mce_type type; + + type = intel_check_mce_type(status); + + if (type == intel_mce_fatal || type == intel_mce_ucr_srar) + result->result = MCA_RESET; + else if (type == intel_mce_ucr_srao) + result->result = MCA_NO_ACTION; } struct mca_error_handler intel_mce_dhandlers[] = - {{is_async_memerr, intel_memerr_dhandler}}; + {{is_async_memerr, intel_memerr_dhandler}, {default_check, intel_default_dhandler}}; static void intel_machine_check(struct cpu_user_regs * regs, long error_code) { diff -r 2d2812de6792 -r 704bcd622dc2 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Thu Jun 10 08:18:11 2010 +0100 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Thu Jun 10 08:18:46 2010 +0100 @@ -87,6 +87,9 @@ /*Intel Specific bitfield*/ #define CMCI_THRESHOLD 0x2 + +#define MCi_MISC_ADDRMOD_MASK (0x7UL << 6) +#define MCi_MISC_PHYSMOD (0x2UL << 6) #include <asm/domain.h> _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |