|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/5] Xen/MCE: vMCE emulation
Xen/MCE: vMCE emulation
This patch provides virtual MCE support to guest. It emulates a simple
and clean MCE MSRs interface to guest by faking caps to guest if needed
and masking caps if unnecessary:
1. Providing a well-defined MCG_CAP to guest, filter out un-necessary caps and
provide only guest needed caps;
2. Disabling MCG_CTL to avoid model specific;
3. Sticking all 1's to MCi_CTL to guest to avoid model specific;
4. Enabling CMCI cap but never really inject to guest to prevent polling
periodically;
5. Masking MSCOD field of MCi_STATUS to avoid model specific;
6. Keeping natural semantics by per-vcpu instead of per-domain variables;
7. Using bank1 and reserving bank0 to work around 'bank0 quirk' of some very
old processors;
8. Cleaning some vMCE# injection logic which shared by Intel and AMD but
useless under new vMCE implement;
9. Keeping compatilbe w/ old xen version which has been backported to SLES11
SP2, so that old vMCE would not blocked when migrate to new vMCE;
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
diff -r fbd9e864c047 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Sep 18 22:39:10 2012 +0800
@@ -168,13 +168,12 @@
int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
uint64_t gstatus);
int inject_vmce(struct domain *d);
-int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct
mcinfo_global *global);
static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
{
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
msr >= MSR_IA32_MC0_CTL2 &&
- msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) )
+ msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
return 1;
return 0;
}
@@ -182,7 +181,7 @@
static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr)
{
if ( (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MCx_CTL(v->arch.mcg_cap & MCG_CAP_COUNT)) ||
+ msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) ||
mce_vendor_bank_msr(v, msr) )
return 1;
return 0;
diff -r fbd9e864c047 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Sep 18 22:39:10 2012 +0800
@@ -1300,14 +1300,15 @@
/* intel specific MCA MSR */
int intel_mce_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
{
+ unsigned int bank = msr - MSR_IA32_MC0_CTL2;
int ret = 0;
- if ( msr >= MSR_IA32_MC0_CTL2 &&
- msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) )
+ if ( bank < GUEST_MC_BANK_NUM )
{
- mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
- "Guest should not write this MSR!\n");
- ret = 1;
+ v->arch.vmce.bank[bank].mci_ctl2 = val;
+ mce_printk(MCE_VERBOSE, "MCE: wr MC%u_CTL2 %"PRIx64"\n",
+ bank, val);
+ ret = 1;
}
return ret;
@@ -1315,13 +1316,14 @@
int intel_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
{
+ unsigned int bank = msr - MSR_IA32_MC0_CTL2;
int ret = 0;
- if ( msr >= MSR_IA32_MC0_CTL2 &&
- msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) )
+ if ( bank < GUEST_MC_BANK_NUM )
{
- mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
- "Guest should not read this MSR!\n");
+ *val = v->arch.vmce.bank[bank].mci_ctl2;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL2 0x%"PRIx64"\n",
+ bank, *val);
ret = 1;
}
diff -r fbd9e864c047 xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/vmce.c Tue Sep 18 22:39:10 2012 +0800
@@ -1,5 +1,22 @@
/*
- * vmce.c - virtual MCE support
+ * vmce.c - provide software emulated vMCE support to guest
+ *
+ * Copyright (C) 2010, 2011 Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
+ * Copyright (C) 2012, 2013 Liu, Jinsong <jinsong.liu@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <xen/init.h>
@@ -19,67 +36,55 @@
#include "mce.h"
#include "x86_mca.h"
-/*
- * Emulate 2 banks for guest
- * Bank0: reserved for 'bank0 quirk' occur at some very old processors:
- * 1). Intel cpu whose family-model value < 06-1A;
- * 2). AMD K7
- * Bank1: used to transfer error info to guest
- */
-#define GUEST_BANK_NUM 2
-#define GUEST_MCG_CAP (MCG_TES_P | MCG_SER_P | GUEST_BANK_NUM)
-
-#define dom_vmce(x) ((x)->arch.vmca_msrs)
-
-int vmce_init_msr(struct domain *d)
-{
- dom_vmce(d) = xmalloc(struct domain_mca_msrs);
- if ( !dom_vmce(d) )
- return -ENOMEM;
-
- dom_vmce(d)->mcg_status = 0x0;
- dom_vmce(d)->nr_injection = 0;
-
- INIT_LIST_HEAD(&dom_vmce(d)->impact_header);
- spin_lock_init(&dom_vmce(d)->lock);
-
- return 0;
-}
-
-void vmce_destroy_msr(struct domain *d)
-{
- if ( !dom_vmce(d) )
- return;
- xfree(dom_vmce(d));
- dom_vmce(d) = NULL;
-}
-
void vmce_init_vcpu(struct vcpu *v)
{
- v->arch.mcg_cap = GUEST_MCG_CAP;
+ int i;
+
+ /* global MCA MSRs init */
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ v->arch.vmce.mcg_cap = INTEL_GUEST_MCG_CAP;
+ else
+ v->arch.vmce.mcg_cap = AMD_GUEST_MCG_CAP;
+
+ v->arch.vmce.mcg_status = 0;
+
+ /* per-bank MCA MSRs init */
+ for ( i = 0; i < GUEST_MC_BANK_NUM; i++ )
+ memset(&v->arch.vmce.bank[i], 0, sizeof(struct vmce_bank));
+
+ spin_lock_init(&v->arch.vmce.lock);
}
int vmce_restore_vcpu(struct vcpu *v, uint64_t caps)
{
- if ( caps & ~GUEST_MCG_CAP & ~MCG_CAP_COUNT & ~MCG_CTL_P )
+ uint64_t guest_mcg_cap;
+
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ guest_mcg_cap = INTEL_GUEST_MCG_CAP;
+ else
+ guest_mcg_cap = AMD_GUEST_MCG_CAP;
+
+ if ( caps & ~guest_mcg_cap & ~MCG_CAP_COUNT & ~MCG_CTL_P )
{
dprintk(XENLOG_G_ERR, "%s restore: unsupported MCA capabilities"
" %#" PRIx64 " for d%d:v%u (supported: %#Lx)\n",
is_hvm_vcpu(v) ? "HVM" : "PV", caps, v->domain->domain_id,
- v->vcpu_id, GUEST_MCG_CAP & ~MCG_CAP_COUNT);
+ v->vcpu_id, guest_mcg_cap & ~MCG_CAP_COUNT);
return -EPERM;
}
- v->arch.mcg_cap = caps;
+ v->arch.vmce.mcg_cap = caps;
return 0;
}
-static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+/*
+ * For historic version reason, bank number may greater than GUEST_MC_BANK_NUM,
+ * when migratie from old vMCE version to new vMCE.
+ */
+static int bank_mce_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
{
int ret = 1;
unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4;
- struct domain_mca_msrs *vmce = dom_vmce(v->domain);
- struct bank_entry *entry;
*val = 0;
@@ -92,46 +97,33 @@
bank, *val);
break;
case MSR_IA32_MC0_STATUS:
- /* Only error bank is read. Non-error banks simply return. */
- if ( !list_empty(&vmce->impact_header) )
+ if ( bank < GUEST_MC_BANK_NUM )
{
- entry = list_entry(vmce->impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_status;
+ *val = v->arch.vmce.bank[bank].mci_status;
+ if ( *val )
mce_printk(MCE_VERBOSE,
- "MCE: rd MC%u_STATUS in vMCE# context "
- "value 0x%"PRIx64"\n", bank, *val);
- }
+ "MCE: rdmsr MC%u_STATUS in vMCE# context "
+ "0x%"PRIx64"\n", bank, *val);
}
break;
case MSR_IA32_MC0_ADDR:
- if ( !list_empty(&vmce->impact_header) )
+ if ( bank < GUEST_MC_BANK_NUM )
{
- entry = list_entry(vmce->impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_addr;
+ *val = v->arch.vmce.bank[bank].mci_addr;
+ if ( *val )
mce_printk(MCE_VERBOSE,
"MCE: rdmsr MC%u_ADDR in vMCE# context "
"0x%"PRIx64"\n", bank, *val);
- }
}
break;
case MSR_IA32_MC0_MISC:
- if ( !list_empty(&vmce->impact_header) )
+ if ( bank < GUEST_MC_BANK_NUM )
{
- entry = list_entry(vmce->impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_misc;
+ *val = v->arch.vmce.bank[bank].mci_misc;
+ if ( *val )
mce_printk(MCE_VERBOSE,
- "MCE: rd MC%u_MISC in vMCE# context "
+ "MCE: rdmsr MC%u_MISC in vMCE# context "
"0x%"PRIx64"\n", bank, *val);
- }
}
break;
default:
@@ -157,56 +149,50 @@
*/
int vmce_rdmsr(uint32_t msr, uint64_t *val)
{
- const struct vcpu *cur = current;
- struct domain_mca_msrs *vmce = dom_vmce(cur->domain);
+ struct vcpu *cur = current;
int ret = 1;
*val = 0;
- spin_lock(&vmce->lock);
+ spin_lock(&cur->arch.vmce.lock);
switch ( msr )
{
case MSR_IA32_MCG_STATUS:
- *val = vmce->mcg_status;
+ *val = cur->arch.vmce.mcg_status;
if (*val)
mce_printk(MCE_VERBOSE,
"MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val);
break;
case MSR_IA32_MCG_CAP:
- *val = cur->arch.mcg_cap;
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n",
- *val);
+ *val = cur->arch.vmce.mcg_cap;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", *val);
break;
case MSR_IA32_MCG_CTL:
- /* Stick all 1's when CTL support, and 0's when no CTL support */
- if ( cur->arch.mcg_cap & MCG_CTL_P )
- *val = ~0ULL;
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", *val);
+ if ( cur->arch.vmce.mcg_cap & MCG_CTL_P )
+ {
+ *val = ~0UL;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", *val);
+ }
break;
default:
ret = mce_bank_msr(cur, msr) ? bank_mce_rdmsr(cur, msr, val) : 0;
break;
}
- spin_unlock(&vmce->lock);
+ spin_unlock(&cur->arch.vmce.lock);
+
return ret;
}
+/*
+ * For historic version reason, bank number may greater than GUEST_MC_BANK_NUM,
+ * when migratie from old vMCE version to new vMCE.
+ */
static int bank_mce_wrmsr(struct vcpu *v, u32 msr, u64 val)
{
int ret = 1;
unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4;
- struct domain_mca_msrs *vmce = dom_vmce(v->domain);
- struct bank_entry *entry = NULL;
-
- /* Give the first entry of the list, it corresponds to current
- * vMCE# injection. When vMCE# is finished processing by the
- * the guest, this node will be deleted.
- * Only error bank is written. Non-error banks simply return.
- */
- if ( !list_empty(&vmce->impact_header) )
- entry = list_entry(vmce->impact_header.next, struct bank_entry, list);
switch ( msr & (MSR_IA32_MC0_CTL | 3) )
{
@@ -217,50 +203,46 @@
*/
break;
case MSR_IA32_MC0_STATUS:
- if ( entry && (entry->bank == bank) )
+ if ( val )
{
- entry->mci_status = val;
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
+ mce_printk(MCE_QUIET,
+ "MCE: wr MC%u_STATUS w/ non-zero cause #GP\n", bank);
+ ret = -1;
+ }
+ if ( bank < GUEST_MC_BANK_NUM )
+ {
+ v->arch.vmce.bank[bank].mci_status = val;
+ mce_printk(MCE_VERBOSE, "MCE: wr MC%u_STATUS %"PRIx64"\n",
bank, val);
}
- else
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
break;
case MSR_IA32_MC0_ADDR:
- if ( !~val )
+ if ( val )
{
mce_printk(MCE_QUIET,
- "MCE: wr MC%u_ADDR with all 1s will cause #GP\n", bank);
+ "MCE: wr MC%u_ADDR w/ non-zero cause #GP\n", bank);
ret = -1;
}
- else if ( entry && (entry->bank == bank) )
+ else if ( bank < GUEST_MC_BANK_NUM )
{
- entry->mci_addr = val;
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val);
- }
- else
+ v->arch.vmce.bank[bank].mci_addr = val;
mce_printk(MCE_VERBOSE,
"MCE: wr MC%u_ADDR %"PRIx64"\n", bank, val);
+ }
break;
case MSR_IA32_MC0_MISC:
- if ( !~val )
+ if ( val )
{
mce_printk(MCE_QUIET,
- "MCE: wr MC%u_MISC with all 1s will cause #GP\n", bank);
+ "MCE: wr MC%u_MISC w/ non-zero cause #GP\n", bank);
ret = -1;
}
- else if ( entry && (entry->bank == bank) )
+ else if ( bank < GUEST_MC_BANK_NUM )
{
- entry->mci_misc = val;
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val);
- }
- else
+ v->arch.vmce.bank[bank].mci_misc = val;
mce_printk(MCE_VERBOSE,
"MCE: wr MC%u_MISC %"PRIx64"\n", bank, val);
+ }
break;
default:
switch ( boot_cpu_data.x86_vendor )
@@ -286,52 +268,33 @@
int vmce_wrmsr(u32 msr, u64 val)
{
struct vcpu *cur = current;
- struct bank_entry *entry = NULL;
- struct domain_mca_msrs *vmce = dom_vmce(cur->domain);
int ret = 1;
- spin_lock(&vmce->lock);
+ spin_lock(&cur->arch.vmce.lock);
switch ( msr )
{
case MSR_IA32_MCG_CTL:
+ /* If MCG_CTL exist then stick to all 1's. If not exist then ignore */
break;
case MSR_IA32_MCG_STATUS:
- vmce->mcg_status = val;
+ cur->arch.vmce.mcg_status = val;
mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val);
- /* For HVM guest, this is the point for deleting vMCE injection node */
- if ( is_hvm_vcpu(cur) && (vmce->nr_injection > 0) )
- {
- vmce->nr_injection--; /* Should be 0 */
- if ( !list_empty(&vmce->impact_header) )
- {
- entry = list_entry(vmce->impact_header.next,
- struct bank_entry, list);
- if ( entry->mci_status & MCi_STATUS_VAL )
- mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have "
- "been cleared before write MCG_STATUS MSR\n");
-
- mce_printk(MCE_QUIET, "MCE: Delete HVM last injection "
- "Node, nr_injection %u\n",
- vmce->nr_injection);
- list_del(&entry->list);
- xfree(entry);
- }
- else
- mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
- " last injection Node, something Wrong!\n");
- }
break;
case MSR_IA32_MCG_CAP:
- mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
- ret = -1;
+ /*
+ * According to Intel SDM, IA32_MCG_CAP is a read-only register,
+ * the effect of writing to the IA32_MCG_CAP is undefined. Here we
+ * treat writing as 'write not change'. Guest would not surprise.
+ */
+ mce_printk(MCE_QUIET, "MCE: MCG_CAP is read only and write not
change\n");
break;
default:
ret = mce_bank_msr(cur, msr) ? bank_mce_wrmsr(cur, msr, val) : 0;
break;
}
- spin_unlock(&vmce->lock);
+ spin_unlock(&cur->arch.vmce.lock);
return ret;
}
@@ -342,7 +305,7 @@
for_each_vcpu( d, v ) {
struct hvm_vmce_vcpu ctxt = {
- .caps = v->arch.mcg_cap
+ .caps = v->arch.vmce.mcg_cap
};
err = hvm_save_entry(VMCE_VCPU, v->vcpu_id, h, &ctxt);
@@ -422,93 +385,38 @@
return 0;
}
-/* This node list records errors impacting a domain. when one
- * MCE# happens, one error bank impacts a domain. This error node
- * will be inserted to the tail of the per_dom data for vMCE# MSR
- * virtualization. When one vMCE# injection is finished processing
- * processed by guest, the corresponding node will be deleted.
- * This node list is for GUEST vMCE# MSRS virtualization.
- */
-static struct bank_entry* alloc_bank_entry(void)
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+ uint64_t gstatus)
{
- struct bank_entry *entry;
+ struct vcpu *v = d->vcpu[0];
- entry = xzalloc(struct bank_entry);
- if ( entry == NULL )
- {
- printk(KERN_ERR "MCE: malloc bank_entry failed\n");
- return NULL;
- }
-
- INIT_LIST_HEAD(&entry->list);
- return entry;
-}
-
-/* Fill error bank info for #vMCE injection and GUEST vMCE#
- * MSR virtualization data
- * 1) Log down how many nr_injections of the impacted.
- * 2) Copy MCE# error bank to impacted DOM node list,
- * for vMCE# MSRs virtualization
- */
-int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
- uint64_t gstatus) {
- struct bank_entry *entry;
-
- /* This error bank impacts one domain, we need to fill domain related
- * data for vMCE MSRs virtualization and vMCE# injection */
if ( mc_bank->mc_domid != (uint16_t)~0 )
{
- /* For HVM guest, Only when first vMCE is consumed by HVM guest
- * successfully, will we generete another node and inject another vMCE.
- */
- if ( d->is_hvm && (dom_vmce(d)->nr_injection > 0) )
+ if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP )
{
- mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous"
+ mce_printk(MCE_QUIET, "MCE: guest has not handled previous"
" vMCE yet!\n");
return -1;
}
- entry = alloc_bank_entry();
- if ( entry == NULL )
- return -1;
+ spin_lock(&v->arch.vmce.lock);
- entry->mci_status = mc_bank->mc_status;
- entry->mci_addr = mc_bank->mc_addr;
- entry->mci_misc = mc_bank->mc_misc;
- entry->bank = mc_bank->mc_bank;
+ v->arch.vmce.mcg_status = gstatus;
+ /*
+ * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors
+ * 2. Filter MCi_STATUS MSCOD model specific error code to guest
+ */
+ v->arch.vmce.bank[1].mci_status = mc_bank->mc_status &
+ MCi_STATUS_MSCOD_MASK;
+ v->arch.vmce.bank[1].mci_addr = mc_bank->mc_addr;
+ v->arch.vmce.bank[1].mci_misc = mc_bank->mc_misc;
- spin_lock(&dom_vmce(d)->lock);
- /* New error Node, insert to the tail of the per_dom data */
- list_add_tail(&entry->list, &dom_vmce(d)->impact_header);
- /* Fill MSR global status */
- dom_vmce(d)->mcg_status = gstatus;
- /* New node impact the domain, need another vMCE# injection*/
- dom_vmce(d)->nr_injection++;
- spin_unlock(&dom_vmce(d)->lock);
-
- mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
- "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
- mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr,
- mc_bank->mc_domid);
+ spin_unlock(&v->arch.vmce.lock);
}
return 0;
}
-#if 0 /* currently unused */
-int vmce_domain_inject(
- struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global)
-{
- int ret;
-
- ret = fill_vmsr_data(bank, d, global->mc_gstatus);
- if ( ret < 0 )
- return ret;
-
- return inject_vmce(d);
-}
-#endif
-
static int is_hvm_vmce_ready(struct mcinfo_bank *bank, struct domain *d)
{
struct vcpu *v;
diff -r fbd9e864c047 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/arch/x86/domain.c Tue Sep 18 22:39:10 2012 +0800
@@ -571,9 +571,6 @@
if ( (rc = iommu_domain_init(d)) != 0 )
goto fail;
-
- /* For Guest vMCE MSRs virtualization */
- vmce_init_msr(d);
}
if ( is_hvm_domain(d) )
@@ -600,7 +597,6 @@
fail:
d->is_dying = DOMDYING_dead;
- vmce_destroy_msr(d);
cleanup_domain_irq_mapping(d);
free_xenheap_page(d->shared_info);
if ( paging_initialised )
@@ -623,7 +619,6 @@
else
xfree(d->arch.pv_domain.e820);
- vmce_destroy_msr(d);
free_domain_pirqs(d);
if ( !is_idle_domain(d) )
iommu_domain_destroy(d);
diff -r fbd9e864c047 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/arch/x86/domctl.c Tue Sep 18 22:39:10 2012 +0800
@@ -1024,7 +1024,7 @@
evc->syscall32_callback_eip = 0;
evc->syscall32_disables_events = 0;
}
- evc->mcg_cap = v->arch.mcg_cap;
+ evc->mcg_cap = v->arch.vmce.mcg_cap;
}
else
{
diff -r fbd9e864c047 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/arch/x86/traps.c Tue Sep 18 22:39:10 2012 +0800
@@ -3133,50 +3133,6 @@
break;
ASSERT(trap <= VCPU_TRAP_LAST);
- /* inject vMCE to PV_Guest including DOM0. */
- if ( trap == VCPU_TRAP_MCE )
- {
- gdprintk(XENLOG_DEBUG, "MCE: Return from vMCE# trap!\n");
- if ( curr->vcpu_id == 0 )
- {
- struct domain *d = curr->domain;
-
- if ( !d->arch.vmca_msrs->nr_injection )
- {
- printk(XENLOG_WARNING "MCE: ret from vMCE#, "
- "no injection node\n");
- goto end;
- }
-
- d->arch.vmca_msrs->nr_injection--;
- if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
- {
- struct bank_entry *entry;
-
- entry = list_entry(d->arch.vmca_msrs->impact_header.next,
- struct bank_entry, list);
- gdprintk(XENLOG_DEBUG, "MCE: delete last injection node\n");
- list_del(&entry->list);
- }
- else
- printk(XENLOG_ERR "MCE: didn't found last injection node\n");
-
- /* further injection */
- if ( d->arch.vmca_msrs->nr_injection > 0 &&
- guest_has_trap_callback(d, 0, TRAP_machine_check) &&
- !test_and_set_bool(curr->mce_pending) )
- {
- int cpu = smp_processor_id();
-
- cpumask_copy(curr->cpu_affinity_tmp, curr->cpu_affinity);
- printk(XENLOG_DEBUG "MCE: CPU%d set affinity, old %d\n",
- cpu, curr->processor);
- vcpu_set_affinity(curr, cpumask_of(cpu));
- }
- }
- }
-
-end:
/* Restore previous asynchronous exception mask. */
curr->async_exception_mask = curr->async_exception_state(trap).old_mask;
}
diff -r fbd9e864c047 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/include/asm-x86/domain.h Tue Sep 18 22:39:10 2012 +0800
@@ -296,9 +296,6 @@
struct PITState vpit;
- /* For Guest vMCA handling */
- struct domain_mca_msrs *vmca_msrs;
-
/* TSC management (emulation, pv, scaling, stats) */
int tsc_mode; /* see include/asm-x86/time.h */
bool_t vtsc; /* tsc is emulated (may change after migrate) */
@@ -438,8 +435,8 @@
* and thus should be saved/restored. */
bool_t nonlazy_xstate_used;
- uint64_t mcg_cap;
-
+ struct vmce vmce;
+
struct paging_vcpu paging;
uint32_t gdbsx_vcpu_event;
diff -r fbd9e864c047 xen/include/asm-x86/mce.h
--- a/xen/include/asm-x86/mce.h Mon Sep 17 18:02:59 2012 +0800
+++ b/xen/include/asm-x86/mce.h Tue Sep 18 22:39:10 2012 +0800
@@ -3,28 +3,50 @@
#ifndef _XEN_X86_MCE_H
#define _XEN_X86_MCE_H
-/* This entry is for recording bank nodes for the impacted domain,
- * put into impact_header list. */
-struct bank_entry {
- struct list_head list;
- uint16_t bank;
+/*
+ * Emulalte 2 banks for guest
+ * Bank0: reserved for 'bank0 quirk' occur at some very old processors:
+ * 1). Intel cpu whose family-model value < 06-1A;
+ * 2). AMD K7
+ * Bank1: used to transfer error info to guest
+ */
+#define GUEST_MC_BANK_NUM 2
+
+/*
+ * MCG_SER_P: software error recovery supported
+ * MCG_TES_P: to avoid MCi_status bit56:53 model specific
+ * MCG_CMCI_P: expose CMCI capability but never really inject it to guest,
+ * for sake of performance since guest not polling periodically
+ */
+#define INTEL_GUEST_MCG_CAP (MCG_SER_P | \
+ MCG_TES_P | \
+ MCG_CMCI_P | \
+ GUEST_MC_BANK_NUM)
+
+#define AMD_GUEST_MCG_CAP (MCG_SER_P | \
+ GUEST_MC_BANK_NUM)
+
+/* Filter MSCOD model specific error code to guest */
+#define MCi_STATUS_MSCOD_MASK (~(0xffffULL << 16))
+
+/* No mci_ctl since it stick all 1's */
+struct vmce_bank {
uint64_t mci_status;
uint64_t mci_addr;
uint64_t mci_misc;
+ uint64_t mci_ctl2;
};
-struct domain_mca_msrs
-{
- /* Guest should not change below values after DOM boot up */
+/* No mcg_ctl since it not expose to guest */
+struct vmce {
+ uint64_t mcg_cap;
uint64_t mcg_status;
- uint16_t nr_injection;
- struct list_head impact_header;
+ struct vmce_bank bank[GUEST_MC_BANK_NUM];
+
spinlock_t lock;
};
/* Guest vMCE MSRs virtualization */
-extern int vmce_init_msr(struct domain *d);
-extern void vmce_destroy_msr(struct domain *d);
extern void vmce_init_vcpu(struct vcpu *);
extern int vmce_restore_vcpu(struct vcpu *, uint64_t caps);
extern int vmce_wrmsr(uint32_t msr, uint64_t val);Attachment:
1_vmce_emulation.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |