[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
I don't seem to have this one in my queue. You should resend. -- Keir On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@xxxxxxxxx> wrote: > Jan and Keir, > > Any comments? > > Thanks > Jinsong > > > Liu, Jinsong wrote: >> MCA: simplify mce actoin, and some update of mem err handler for >> future SRAR extension >> >> This patch simplify mce_action(). Basically the 2 set of mce status >> flags MCA_... and MCER_... are highly functional similar. This patch >> remove the redundant middle-level flag MCA_..., and its related data >> structure and function, so that mce_action() logic is more clean and >> simpler. >> This patch also update mem err handler. intel_memerr_dhandler() will >> be commonly used by SRAO and SRAR, so for the extension of future >> SRAR case, this patch remove the default fail flag from >> intel_memerr_dhandler() outside to SRAO/SRAR level, since only >> SRAO/SRAR handler itself has the knowledge of how to handle the >> failure when mem err handler fail. >> >> Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> >> >> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c >> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 >> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 >> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly >> mce_dh static unsigned int __read_mostly mce_dhandler_num; >> static unsigned int __read_mostly mce_uhandler_num; >> >> -enum mce_result >> -{ >> - MCER_NOERROR, >> - MCER_RECOVERED, >> - /* Not recoverd, but can continue */ >> - MCER_CONTINUE, >> - MCER_RESET, >> -}; >> - >> /* Maybe called in MCE context, no lock, no printk */ >> static enum mce_result mce_action(struct cpu_user_regs *regs, >> mctelem_cookie_t mctc) >> { >> struct mc_info *local_mi; >> - enum mce_result ret = MCER_NOERROR; >> + enum mce_result bank_result = MCER_NOERROR; >> + enum mce_result worst_result = MCER_NOERROR; >> struct mcinfo_common *mic = NULL; >> - struct mca_handle_result mca_res; >> struct mca_binfo binfo; >> const struct mca_error_handler *handlers = mce_dhandlers; >> unsigned int i, handler_num = mce_dhandler_num; >> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct >> /* Processing bank information */ >> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); >> >> - for ( ; ret != MCER_RESET && mic && mic->size; >> + for ( ; bank_result != MCER_RESET && mic && mic->size; >> mic = x86_mcinfo_next(mic) ) >> { >> if (mic->type != MC_TYPE_BANK) { >> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct >> } >> binfo.mib = (struct mcinfo_bank*)mic; >> binfo.bank = binfo.mib->mc_bank; >> - memset(&mca_res, 0x0f, sizeof(mca_res)); >> + bank_result = MCER_NOERROR; >> for ( i = 0; i < handler_num; i++ ) { >> if (handlers[i].owned_error(binfo.mib->mc_status)) >> { >> - handlers[i].recovery_handler(&binfo, &mca_res); >> - >> - if (mca_res.result & MCA_OWNER) >> - binfo.mib->mc_domid = mca_res.owner; >> - >> - if (mca_res.result == MCA_NEED_RESET) >> - ret = MCER_RESET; >> - else if (mca_res.result == MCA_RECOVERED) >> - { >> - if (ret < MCER_RECOVERED) >> - ret = MCER_RECOVERED; >> - } >> - else if (mca_res.result == MCA_NO_ACTION) >> - { >> - if (ret < MCER_CONTINUE) >> - ret = MCER_CONTINUE; >> - } >> + handlers[i].recovery_handler(&binfo, &bank_result); >> + if (worst_result < bank_result) >> + worst_result = bank_result; >> break; >> } >> } >> ASSERT(i != handler_num); >> } >> >> - return ret; >> + return worst_result; >> } >> >> /* >> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ >> >> static void intel_memerr_dhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> struct mcinfo_bank *bank = binfo->mib; >> struct mcinfo_global *global = binfo->mig; >> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( >> uint64_t mc_status, mc_misc; >> >> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); >> - result->result = MCA_NEED_RESET; >> >> mc_status = bank->mc_status; >> mc_misc = bank->mc_misc; >> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( >> !(mc_status & MCi_STATUS_MISCV) || >> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) >> { >> - result->result |= MCA_NO_ACTION; >> dprintk(XENLOG_WARNING, >> "No physical address provided for memory error\n"); >> return; >> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( >> >> /* This is free page */ >> if (status & PG_OFFLINE_OFFLINED) >> - result->result = MCA_RECOVERED; >> + *result = MCER_RECOVERED; >> else if (status & PG_OFFLINE_PENDING) { >> /* This page has owner */ >> if (status & PG_OFFLINE_OWNED) { >> - result->result |= MCA_OWNER; >> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; >> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; >> mce_printk(MCE_QUIET, "MCE: This error page is ownded" >> - " by DOM %d\n", result->owner); >> - /* Fill vMCE# injection and vMCE# MSR virtualization " >> - * "related data */ >> - bank->mc_domid = result->owner; >> + " by DOM %d\n", bank->mc_domid); >> /* XXX: Cannot handle shared pages yet >> * (this should identify all domains and gfn mapping to >> * the mfn in question) */ >> - BUG_ON( result->owner == DOMID_COW ); >> - if ( result->owner != DOMID_XEN ) { >> - d = get_domain_by_id(result->owner); >> + BUG_ON( bank->mc_domid == DOMID_COW ); >> + if ( bank->mc_domid != DOMID_XEN ) { >> + d = get_domain_by_id(bank->mc_domid); >> ASSERT(d); >> gfn = get_gpfn_from_mfn((bank->mc_addr) >> >> PAGE_SHIFT); >> >> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( >> global->mc_gstatus) == -1 ) >> { >> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d >> " - "failed\n", result->owner); >> + "failed\n", bank->mc_domid); >> goto vmce_failed; >> } >> >> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( >> * For xen, it has contained the error and finished >> * its own recovery job. >> */ >> - result->result = MCA_RECOVERED; >> + *result = MCER_RECOVERED; >> put_domain(d); >> >> return; >> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta >> >> static void intel_srao_dhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> uint64_t status = binfo->mib->mc_status; >> >> /* For unknown srao error code, no action required */ >> + *result = MCER_CONTINUE; >> + >> if ( status & MCi_STATUS_VAL ) >> { >> switch ( status & INTEL_MCCOD_MASK ) >> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t >> >> static void intel_default_mce_dhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> uint64_t status = binfo->mib->mc_status; >> enum intel_mce_type type; >> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( >> type = intel_check_mce_type(status); >> >> if (type == intel_mce_fatal || type == intel_mce_ucr_srar) >> - result->result = MCA_NEED_RESET; >> - else if (type == intel_mce_ucr_srao) >> - result->result = MCA_NO_ACTION; >> + *result = MCER_RESET; >> + else >> + *result = MCER_CONTINUE; >> } >> >> static const struct mca_error_handler intel_mce_dhandlers[] = { >> @@ -764,7 +737,7 @@ static const struct mca_error_handler in >> >> static void intel_default_mce_uhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> uint64_t status = binfo->mib->mc_status; >> enum intel_mce_type type; >> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( >> /* Panic if no handler for SRAR error */ >> case intel_mce_ucr_srar: >> case intel_mce_fatal: >> - result->result = MCA_NEED_RESET; >> + *result = MCER_RESET; >> break; >> default: >> - result->result = MCA_NO_ACTION; >> + *result = MCER_CONTINUE; >> break; >> } >> } >> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h >> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 >> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 >> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban >> void mcabanks_free(struct mca_banks *banks); >> extern struct mca_banks *mca_allbanks; >> >> -/* Below interfaces are defined for MCA internal processing: >> - * a. pre_handler will be called early in MCA ISR context, mainly >> for early >> - * need_reset detection for avoiding log missing. Also, it is >> used to judge >> - * impacted DOMAIN if possible. >> - * b. mca_error_handler is actually a (error_action_index, >> - * recovery_hanlder pointer) pair. The defined recovery_handler >> - * performs the actual recovery operations such as page_offline, >> cpu_offline >> - * in softIRQ context when the per_bank MCA error matching the >> corresponding >> - * mca_code index. If pre_handler can't judge the impacted domain, >> - * recovery_handler must figure it out. >> -*/ >> - >> -/* MCA error has been recovered successfully by the recovery action*/ >> -#define MCA_RECOVERED (0x1 << 0) >> -/* MCA error impact the specified DOMAIN in owner field below */ >> -#define MCA_OWNER (0x1 << 1) >> -/* MCA error can't be recovered and need reset */ >> -#define MCA_NEED_RESET (0x1 << 2) >> -/* MCA error did not have any action yet */ >> -#define MCA_NO_ACTION (0x1 << 3) >> - >> -struct mca_handle_result >> -{ >> - uint32_t result; >> - /* Used one result & MCA_OWNER */ >> - domid_t owner; >> - /* Used by mca_error_handler, result & MCA_RECOVRED */ >> - struct recovery_action *action; >> -}; >> - >> /*Keep bank so that we can get staus even if mib is NULL */ >> struct mca_binfo { >> int bank; >> @@ -163,8 +133,14 @@ struct mca_binfo { >> struct cpu_user_regs *regs; >> }; >> >> -extern void (*mca_prehandler)( struct cpu_user_regs *regs, >> - struct mca_handle_result *result); >> +enum mce_result >> +{ >> + MCER_NOERROR, >> + MCER_RECOVERED, >> + /* Not recoverd, but can continue */ >> + MCER_CONTINUE, >> + MCER_RESET, >> +}; >> >> struct mca_error_handler >> { >> @@ -175,7 +151,7 @@ struct mca_error_handler >> */ >> int (*owned_error)(uint64_t status); >> void (*recovery_handler)(struct mca_binfo *binfo, >> - struct mca_handle_result *result); >> + enum mce_result *result); >> }; >> >> /* Global variables */ > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |