[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [Patch 3/6] Xen/MCE: vMCE emulation
Xen/MCE: vMCE emulation This patch provides virtual MCE support to guest. It emulates a simple and clean MCE MSRs interface to guest by faking caps to guest if needed and masking caps if unnecessary: 1. Providing a well-defined MCG_CAP to guest, filter out un-necessary caps and provide only guest needed caps; 2. Disabling MCG_CTL to avoid model specific; 3. Sticking all 1's to MCi_CTL to guest to avoid model specific; 4. Enabling CMCI cap but never really inject to guest to prevent polling periodically; 5. Masking MSCOD field of MCi_STATUS to avoid model specific; 6. Keeping natural semantics by per-vcpu instead of per-domain variables; 7. Using bank1 and reserving bank0 to work around 'bank0 quirk' of some very old processors; 8. Cleaning some vMCE# injection logic which shared by Intel and AMD but useless under new vMCE implement; 9. Keeping compatilbe w/ old xen version which has been backported to SLES11 SP2, so that old vMCE would not blocked when migrate to new vMCE; Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> diff -r 585fe3e5a608 xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.h Thu Jul 19 21:42:55 2012 +0800 @@ -55,8 +55,8 @@ u64 mce_cap_init(void); extern unsigned int firstbank; -int intel_mce_rdmsr(const struct vcpu *, uint32_t msr, uint64_t *val); -int intel_mce_wrmsr(struct vcpu *, uint32_t msr, uint64_t val); +void intel_vmce_mci_ctl2_rdmsr(const struct vcpu *, uint32_t msr, uint64_t *val); +void intel_vmce_mci_ctl2_wrmsr(struct vcpu *, uint32_t msr, uint64_t val); struct mcinfo_extended *intel_get_extended_msrs( struct mcinfo_global *mig, struct mc_info *mi); @@ -168,13 +168,12 @@ int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, uint64_t gstatus); int inject_vmce(struct domain *d); -int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global); static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr) { if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) ) + msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) ) return 1; return 0; } @@ -182,7 +181,7 @@ static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr) { if ( (msr >= MSR_IA32_MC0_CTL && - msr < MSR_IA32_MCx_CTL(v->arch.mcg_cap & MCG_CAP_COUNT)) || + msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) || mce_vendor_bank_msr(v, msr) ) return 1; return 0; diff -r 585fe3e5a608 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jul 19 21:42:55 2012 +0800 @@ -1396,33 +1396,26 @@ } /* intel specific MCA MSR */ -int intel_mce_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) +void intel_vmce_mci_ctl2_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) { - int ret = 0; + int bank = msr - MSR_IA32_MC0_CTL2; - if ( msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) ) + if ( (bank >= 0) && (bank < GUEST_BANK_NUM) ) { - mce_printk(MCE_QUIET, "We have disabled CMCI capability, " - "Guest should not write this MSR!\n"); - ret = 1; + v->arch.vmce.bank[bank].mci_ctl2 = val; + mce_printk(MCE_VERBOSE, "MCE: wr MC%u_CTL2 %"PRIx64"\n", + bank, val); } - - return ret; } -int intel_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) +void intel_vmce_mci_ctl2_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) { - int ret = 0; + int bank = msr - MSR_IA32_MC0_CTL2; - if ( msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) ) + if ( (bank >= 0) && (bank < GUEST_BANK_NUM) ) { - mce_printk(MCE_QUIET, "We have disabled CMCI capability, " - "Guest should not read this MSR!\n"); - ret = 1; + *val = v->arch.vmce.bank[bank].mci_ctl2; + mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL2 0x%"PRIx64"\n", + bank, *val); } - - return ret; } - diff -r 585fe3e5a608 xen/arch/x86/cpu/mcheck/vmce.c --- a/xen/arch/x86/cpu/mcheck/vmce.c Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/arch/x86/cpu/mcheck/vmce.c Thu Jul 19 21:42:55 2012 +0800 @@ -1,5 +1,22 @@ /* - * vmce.c - virtual MCE support + * vmce.c - provide software emulated vMCE support to guest + * + * Copyright (C) 2010, 2011 Jiang, Yunhong <yunhong.jiang@xxxxxxxxx> + * Copyright (C) 2012, 2013 Liu, Jinsong <jinsong.liu@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <xen/init.h> @@ -19,44 +36,24 @@ #include "mce.h" #include "x86_mca.h" -/* - * Emulalte 2 banks for guest - * Bank0: reserved for 'bank0 quirk' occur at some very old processors: - * 1). Intel cpu whose family-model value < 06-1A; - * 2). AMD K7 - * Bank1: used to transfer error info to guest - */ -#define GUEST_BANK_NUM 2 -#define GUEST_MCG_CAP (MCG_TES_P | MCG_SER_P | GUEST_BANK_NUM) - -#define dom_vmce(x) ((x)->arch.vmca_msrs) - -int vmce_init_msr(struct domain *d) -{ - dom_vmce(d) = xmalloc(struct domain_mca_msrs); - if ( !dom_vmce(d) ) - return -ENOMEM; - - dom_vmce(d)->mcg_status = 0x0; - dom_vmce(d)->nr_injection = 0; - - INIT_LIST_HEAD(&dom_vmce(d)->impact_header); - spin_lock_init(&dom_vmce(d)->lock); - - return 0; -} - -void vmce_destroy_msr(struct domain *d) -{ - if ( !dom_vmce(d) ) - return; - xfree(dom_vmce(d)); - dom_vmce(d) = NULL; -} - void vmce_init_vcpu(struct vcpu *v) { - v->arch.mcg_cap = GUEST_MCG_CAP; + int i; + + /* global MCA MSRs init */ + v->arch.vmce.mcg_cap = GUEST_MCG_CAP; + v->arch.vmce.mcg_status = 0; + + /* per-bank MCA MSRs init */ + for ( i = 0; i < GUEST_BANK_NUM; i++ ) + { + v->arch.vmce.bank[i].mci_status = 0; + v->arch.vmce.bank[i].mci_addr = 0; + v->arch.vmce.bank[i].mci_misc = 0; + v->arch.vmce.bank[i].mci_ctl2 = 0; + } + + spin_lock_init(&v->arch.vmce.lock); } int vmce_restore_vcpu(struct vcpu *v, uint64_t caps) @@ -70,16 +67,18 @@ return -EPERM; } - v->arch.mcg_cap = caps; + v->arch.vmce.mcg_cap = caps; return 0; } -static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) +/* + * For historic version reason, bank number may greater than GUEST_BANK_NUM, + * when migratie from old vMCE version to new vMCE. + */ +static int bank_mce_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) { int ret = 1; unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4; - struct domain_mca_msrs *vmce = dom_vmce(v->domain); - struct bank_entry *entry; *val = 0; @@ -92,53 +91,40 @@ bank, *val); break; case MSR_IA32_MC0_STATUS: - /* Only error bank is read. Non-error banks simply return. */ - if ( !list_empty(&vmce->impact_header) ) + if ( bank < GUEST_BANK_NUM ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_status; + *val = v->arch.vmce.bank[bank].mci_status; + if ( *val ) mce_printk(MCE_VERBOSE, - "MCE: rd MC%u_STATUS in vMCE# context " - "value 0x%"PRIx64"\n", bank, *val); - } + "MCE: rdmsr MC%u_STATUS in vMCE# context " + "0x%"PRIx64"\n", bank, *val); } break; case MSR_IA32_MC0_ADDR: - if ( !list_empty(&vmce->impact_header) ) + if ( bank < GUEST_BANK_NUM ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_addr; + *val = v->arch.vmce.bank[bank].mci_addr; + if ( *val ) mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_ADDR in vMCE# context " "0x%"PRIx64"\n", bank, *val); - } } break; case MSR_IA32_MC0_MISC: - if ( !list_empty(&vmce->impact_header) ) + if ( bank < GUEST_BANK_NUM ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_misc; + *val = v->arch.vmce.bank[bank].mci_misc; + if ( *val ) mce_printk(MCE_VERBOSE, - "MCE: rd MC%u_MISC in vMCE# context " + "MCE: rdmsr MC%u_MISC in vMCE# context " "0x%"PRIx64"\n", bank, *val); - } } break; default: switch ( boot_cpu_data.x86_vendor ) { case X86_VENDOR_INTEL: - ret = intel_mce_rdmsr(v, msr, val); + intel_vmce_mci_ctl2_rdmsr(v, msr, val); break; default: ret = 0; @@ -157,29 +143,27 @@ */ int vmce_rdmsr(uint32_t msr, uint64_t *val) { - const struct vcpu *cur = current; - struct domain_mca_msrs *vmce = dom_vmce(cur->domain); + struct vcpu *cur = current; int ret = 1; *val = 0; - spin_lock(&vmce->lock); + spin_lock(&cur->arch.vmce.lock); switch ( msr ) { case MSR_IA32_MCG_STATUS: - *val = vmce->mcg_status; + *val = cur->arch.vmce.mcg_status; if (*val) mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); break; case MSR_IA32_MCG_CAP: - *val = cur->arch.mcg_cap; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", - *val); + *val = cur->arch.vmce.mcg_cap; + mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", *val); break; case MSR_IA32_MCG_CTL: - if ( cur->arch.mcg_cap & MCG_CTL_P ) + if ( cur->arch.vmce.mcg_cap & MCG_CTL_P ) { *val = ~0UL; mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", *val); @@ -195,24 +179,19 @@ break; } - spin_unlock(&vmce->lock); + spin_unlock(&cur->arch.vmce.lock); + return ret; } +/* + * For historic version reason, bank number may greater than GUEST_BANK_NUM, + * when migratie from old vMCE version to new vMCE. + */ static int bank_mce_wrmsr(struct vcpu *v, u32 msr, u64 val) { int ret = 1; unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4; - struct domain_mca_msrs *vmce = dom_vmce(v->domain); - struct bank_entry *entry = NULL; - - /* Give the first entry of the list, it corresponds to current - * vMCE# injection. When vMCE# is finished processing by the - * the guest, this node will be deleted. - * Only error bank is written. Non-error banks simply return. - */ - if ( !list_empty(&vmce->impact_header) ) - entry = list_entry(vmce->impact_header.next, struct bank_entry, list); switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { @@ -223,56 +202,52 @@ */ break; case MSR_IA32_MC0_STATUS: - if ( entry && (entry->bank == bank) ) + if ( val ) { - entry->mci_status = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", + mce_printk(MCE_QUIET, + "MCE: wr MC%u_STATUS w/ non-zero cause #GP\n", bank); + ret = -1; + } + if ( bank < GUEST_BANK_NUM ) + { + v->arch.vmce.bank[bank].mci_status = val; + mce_printk(MCE_VERBOSE, "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); } - else - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); break; case MSR_IA32_MC0_ADDR: - if ( !~val ) + if ( val ) { mce_printk(MCE_QUIET, - "MCE: wr MC%u_ADDR with all 1s will cause #GP\n", bank); + "MCE: wr MC%u_ADDR w/ non-zero cause #GP\n", bank); ret = -1; } - else if ( entry && (entry->bank == bank) ) + else if ( bank < GUEST_BANK_NUM ) { - entry->mci_addr = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val); - } - else + v->arch.vmce.bank[bank].mci_addr = val; mce_printk(MCE_VERBOSE, "MCE: wr MC%u_ADDR %"PRIx64"\n", bank, val); + } break; case MSR_IA32_MC0_MISC: - if ( !~val ) + if ( val ) { mce_printk(MCE_QUIET, - "MCE: wr MC%u_MISC with all 1s will cause #GP\n", bank); + "MCE: wr MC%u_MISC w/ non-zero cause #GP\n", bank); ret = -1; } - else if ( entry && (entry->bank == bank) ) + else if ( bank < GUEST_BANK_NUM ) { - entry->mci_misc = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val); - } - else + v->arch.vmce.bank[bank].mci_misc = val; mce_printk(MCE_VERBOSE, "MCE: wr MC%u_MISC %"PRIx64"\n", bank, val); + } break; default: switch ( boot_cpu_data.x86_vendor ) { case X86_VENDOR_INTEL: - ret = intel_mce_wrmsr(v, msr, val); + intel_vmce_mci_ctl2_wrmsr(v, msr, val); break; default: ret = 0; @@ -292,57 +267,38 @@ int vmce_wrmsr(u32 msr, u64 val) { struct vcpu *cur = current; - struct bank_entry *entry = NULL; - struct domain_mca_msrs *vmce = dom_vmce(cur->domain); int ret = 1; - spin_lock(&vmce->lock); + spin_lock(&cur->arch.vmce.lock); switch ( msr ) { case MSR_IA32_MCG_CTL: - if ( !(cur->arch.mcg_cap & MCG_CTL_P) ) + /* If MCG_CTL exist then stick to all 1's. If not exist then GP# */ + if ( !(cur->arch.vmce.mcg_cap & MCG_CTL_P) ) { mce_printk(MCE_QUIET, "MCE: no MCG_CTL\n"); ret = -1; } break; case MSR_IA32_MCG_STATUS: - vmce->mcg_status = val; + cur->arch.vmce.mcg_status = val; mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val); - /* For HVM guest, this is the point for deleting vMCE injection node */ - if ( is_hvm_vcpu(cur) && (vmce->nr_injection > 0) ) - { - vmce->nr_injection--; /* Should be 0 */ - if ( !list_empty(&vmce->impact_header) ) - { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->mci_status & MCi_STATUS_VAL ) - mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have " - "been cleared before write MCG_STATUS MSR\n"); - - mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " - "Node, nr_injection %u\n", - vmce->nr_injection); - list_del(&entry->list); - xfree(entry); - } - else - mce_printk(MCE_QUIET, "MCE: Not found HVM guest" - " last injection Node, something Wrong!\n"); - } break; case MSR_IA32_MCG_CAP: - mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); - ret = -1; + /* + * According to Intel SDM, IA32_MCG_CAP is a read-only register, + * the effect of writing to the IA32_MCG_CAP is undefined. Here we + * treat writing as 'write not change'. Guest would not surprise. + */ + mce_printk(MCE_QUIET, "MCE: MCG_CAP is read only and write not change\n"); break; default: ret = mce_bank_msr(cur, msr) ? bank_mce_wrmsr(cur, msr, val) : 0; break; } - spin_unlock(&vmce->lock); + spin_unlock(&cur->arch.vmce.lock); return ret; } @@ -353,7 +309,7 @@ for_each_vcpu( d, v ) { struct hvm_vmce_vcpu ctxt = { - .caps = v->arch.mcg_cap + .caps = v->arch.vmce.mcg_cap }; err = hvm_save_entry(VMCE_VCPU, v->vcpu_id, h, &ctxt); @@ -433,93 +389,38 @@ return 0; } -/* This node list records errors impacting a domain. when one - * MCE# happens, one error bank impacts a domain. This error node - * will be inserted to the tail of the per_dom data for vMCE# MSR - * virtualization. When one vMCE# injection is finished processing - * processed by guest, the corresponding node will be deleted. - * This node list is for GUEST vMCE# MSRS virtualization. - */ -static struct bank_entry* alloc_bank_entry(void) +int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, + uint64_t gstatus) { - struct bank_entry *entry; + struct vcpu *v = d->vcpu[0]; - entry = xzalloc(struct bank_entry); - if ( entry == NULL ) - { - printk(KERN_ERR "MCE: malloc bank_entry failed\n"); - return NULL; - } - - INIT_LIST_HEAD(&entry->list); - return entry; -} - -/* Fill error bank info for #vMCE injection and GUEST vMCE# - * MSR virtualization data - * 1) Log down how many nr_injections of the impacted. - * 2) Copy MCE# error bank to impacted DOM node list, - * for vMCE# MSRs virtualization - */ -int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, - uint64_t gstatus) { - struct bank_entry *entry; - - /* This error bank impacts one domain, we need to fill domain related - * data for vMCE MSRs virtualization and vMCE# injection */ if ( mc_bank->mc_domid != (uint16_t)~0 ) { - /* For HVM guest, Only when first vMCE is consumed by HVM guest - * successfully, will we generete another node and inject another vMCE. - */ - if ( d->is_hvm && (dom_vmce(d)->nr_injection > 0) ) + if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP ) { - mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous" + mce_printk(MCE_QUIET, "MCE: guest has not handled previous" " vMCE yet!\n"); return -1; } - entry = alloc_bank_entry(); - if ( entry == NULL ) - return -1; + spin_lock(&v->arch.vmce.lock); - entry->mci_status = mc_bank->mc_status; - entry->mci_addr = mc_bank->mc_addr; - entry->mci_misc = mc_bank->mc_misc; - entry->bank = mc_bank->mc_bank; + v->arch.vmce.mcg_status = gstatus; + /* + * 1. Skip BANK0 to avoid 'bank0 quirk' of old processors + * 2. Filter MCi_STATUS MSCOD model specific error code to guest + */ + v->arch.vmce.bank[BANK1].mci_status = mc_bank->mc_status & + MCi_STATUS_MSCOD_MASK; + v->arch.vmce.bank[BANK1].mci_addr = mc_bank->mc_addr; + v->arch.vmce.bank[BANK1].mci_misc = mc_bank->mc_misc; - spin_lock(&dom_vmce(d)->lock); - /* New error Node, insert to the tail of the per_dom data */ - list_add_tail(&entry->list, &dom_vmce(d)->impact_header); - /* Fill MSR global status */ - dom_vmce(d)->mcg_status = gstatus; - /* New node impact the domain, need another vMCE# injection*/ - dom_vmce(d)->nr_injection++; - spin_unlock(&dom_vmce(d)->lock); - - mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " - "status %"PRIx64" addr %"PRIx64" domid %d]\n ", - mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr, - mc_bank->mc_domid); + spin_unlock(&v->arch.vmce.lock); } return 0; } -#if 0 /* currently unused */ -int vmce_domain_inject( - struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global) -{ - int ret; - - ret = fill_vmsr_data(bank, d, global->mc_gstatus); - if ( ret < 0 ) - return ret; - - return inject_vmce(d); -} -#endif - static int is_hvm_vmce_ready(struct mcinfo_bank *bank, struct domain *d) { struct vcpu *v; diff -r 585fe3e5a608 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/arch/x86/domain.c Thu Jul 19 21:42:55 2012 +0800 @@ -597,9 +597,6 @@ if ( (rc = iommu_domain_init(d)) != 0 ) goto fail; - - /* For Guest vMCE MSRs virtualization */ - vmce_init_msr(d); } if ( is_hvm_domain(d) ) @@ -627,7 +624,6 @@ fail: d->is_dying = DOMDYING_dead; - vmce_destroy_msr(d); cleanup_domain_irq_mapping(d); free_xenheap_page(d->shared_info); if ( paging_initialised ) @@ -656,7 +652,6 @@ else xfree(d->arch.pv_domain.e820); - vmce_destroy_msr(d); free_domain_pirqs(d); if ( !is_idle_domain(d) ) iommu_domain_destroy(d); diff -r 585fe3e5a608 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/arch/x86/domctl.c Thu Jul 19 21:42:55 2012 +0800 @@ -1023,7 +1023,7 @@ evc->syscall32_callback_eip = 0; evc->syscall32_disables_events = 0; } - evc->mcg_cap = v->arch.mcg_cap; + evc->mcg_cap = v->arch.vmce.mcg_cap; } else { diff -r 585fe3e5a608 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/arch/x86/traps.c Thu Jul 19 21:42:55 2012 +0800 @@ -3227,50 +3227,6 @@ break; ASSERT(trap <= VCPU_TRAP_LAST); - /* inject vMCE to PV_Guest including DOM0. */ - if ( trap == VCPU_TRAP_MCE ) - { - gdprintk(XENLOG_DEBUG, "MCE: Return from vMCE# trap!\n"); - if ( curr->vcpu_id == 0 ) - { - struct domain *d = curr->domain; - - if ( !d->arch.vmca_msrs->nr_injection ) - { - printk(XENLOG_WARNING "MCE: ret from vMCE#, " - "no injection node\n"); - goto end; - } - - d->arch.vmca_msrs->nr_injection--; - if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) - { - struct bank_entry *entry; - - entry = list_entry(d->arch.vmca_msrs->impact_header.next, - struct bank_entry, list); - gdprintk(XENLOG_DEBUG, "MCE: delete last injection node\n"); - list_del(&entry->list); - } - else - printk(XENLOG_ERR "MCE: didn't found last injection node\n"); - - /* further injection */ - if ( d->arch.vmca_msrs->nr_injection > 0 && - guest_has_trap_callback(d, 0, TRAP_machine_check) && - !test_and_set_bool(curr->mce_pending) ) - { - int cpu = smp_processor_id(); - - cpumask_copy(curr->cpu_affinity_tmp, curr->cpu_affinity); - printk(XENLOG_DEBUG "MCE: CPU%d set affinity, old %d\n", - cpu, curr->processor); - vcpu_set_affinity(curr, cpumask_of(cpu)); - } - } - } - -end: /* Restore previous asynchronous exception mask. */ curr->async_exception_mask = curr->async_exception_state(trap).old_mask; } diff -r 585fe3e5a608 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/include/asm-x86/domain.h Thu Jul 19 21:42:55 2012 +0800 @@ -309,9 +309,6 @@ struct PITState vpit; - /* For Guest vMCA handling */ - struct domain_mca_msrs *vmca_msrs; - /* TSC management (emulation, pv, scaling, stats) */ int tsc_mode; /* see include/asm-x86/time.h */ bool_t vtsc; /* tsc is emulated (may change after migrate) */ @@ -490,7 +487,7 @@ * and thus should be saved/restored. */ bool_t nonlazy_xstate_used; - uint64_t mcg_cap; + struct vmce vmce; struct paging_vcpu paging; diff -r 585fe3e5a608 xen/include/asm-x86/mce.h --- a/xen/include/asm-x86/mce.h Thu Jul 19 21:38:15 2012 +0800 +++ b/xen/include/asm-x86/mce.h Thu Jul 19 21:42:55 2012 +0800 @@ -3,28 +3,46 @@ #ifndef _XEN_X86_MCE_H #define _XEN_X86_MCE_H -/* This entry is for recording bank nodes for the impacted domain, - * put into impact_header list. */ -struct bank_entry { - struct list_head list; - uint16_t bank; +/* + * Emulalte 2 banks for guest + * Bank0: reserved for 'bank0 quirk' occur at some very old processors: + * 1). Intel cpu whose family-model value < 06-1A; + * 2). AMD K7 + * Bank1: used to transfer error info to guest + */ +#define BANK0 0 +#define BANK1 1 +#define GUEST_BANK_NUM 2 + +/* + * MCG_SER_P: software error recovery supported + * MCG_TES_P: to avoid MCi_status bit56:53 model specific + * MCG_CMCI_P: expose CMCI capability but never really inject it to guest, + * for sake of performance since guest not polling periodically + */ +#define GUEST_MCG_CAP (MCG_SER_P | MCG_TES_P | MCG_CMCI_P | GUEST_BANK_NUM) + +/* Filter MSCOD model specific error code to guest */ +#define MCi_STATUS_MSCOD_MASK (~(0x0ffffUL << 16)) + +/* No mci_ctl since it stick all 1's */ +struct vmce_bank { uint64_t mci_status; uint64_t mci_addr; uint64_t mci_misc; + uint64_t mci_ctl2; }; -struct domain_mca_msrs -{ - /* Guest should not change below values after DOM boot up */ +/* No mcg_ctl since it not expose to guest */ +struct vmce { + uint64_t mcg_cap; uint64_t mcg_status; - uint16_t nr_injection; - struct list_head impact_header; + struct vmce_bank bank[GUEST_BANK_NUM]; + spinlock_t lock; }; /* Guest vMCE MSRs virtualization */ -extern int vmce_init_msr(struct domain *d); -extern void vmce_destroy_msr(struct domain *d); extern void vmce_init_vcpu(struct vcpu *); extern int vmce_restore_vcpu(struct vcpu *, uint64_t caps); extern int vmce_wrmsr(uint32_t msr, uint64_t val); Attachment:
3_vmce_emulation.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |