[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD



On Fri, 2012-10-19 at 16:05 +0100, Jan Beulich wrote:
> >>> On 19.10.12 at 17:01, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> wrote:
> > Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support 
> > AMD"):
> >> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@xxxxxxx> wrote:
> >> > Ping?
> >> 
> >> I'm afraid it's not really clear who should commit this - it's tools
> >> side code, so IanJ or IanC would normally be the ones, but otoh
> >> it's code requiring low level hardware knowledge to review the
> >> patch, so both of them might want to rather not do the review.
> >> In the past it was usually Keir who eventually committed such
> >> patches, but I don't know whether he put this on his to-look-at-
> >> and-eventually-commit list.
> > 
> > My view is that I would like an ack from someone who understands
> > what's going on ...
> 
> Which would ideally be those who introduced the code, i.e.
> Intel folks if I'm not mistaken...

Lets CC some of them then.

Intel folks -- any opinion on the patch below from Christoph?

8<----------------

# User Christoph Egger
# Date 1349437062 -7200
xen mceinj: support AMD.

Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>

diff -r 21704bc429b4 -r 1a3eea784e09 tools/tests/mce-test/tools/xen-mceinj.c
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -1,6 +1,7 @@
 /*
  * xen-mceinj.c: utilities to inject fake MCE for x86.
  * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2012, AMD Cooperation Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +19,7 @@
  * Authors: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
  *          Haicheng Li <haicheng.li@xxxxxxxxx>
  *          Xudong Hao <xudong.hao@xxxxxxxxx>
+ *          Christoph Egger <Christoph.Egger@xxxxxxx>
  */
 
 
@@ -44,11 +46,14 @@
 #define MCi_type_STATUS     0x1
 #define MCi_type_ADDR       0x2
 #define MCi_type_MISC       0x3
-#define MCi_type_CTL2       0x4
+#define MC4_type_MISC1      0x4
+#define MC4_type_MISC2      0x5
+#define MC4_type_MISC3      0x6
+#define MCi_type_CTL2       0x7
 
 #define INVALID_MSR         ~0UL
 
-/* Intel MSRs */
+/* X86 machine check MSRs */
 #define MSR_IA32_MCG_CAP         0x00000179
 #define MSR_IA32_MCG_STATUS      0x0000017a
 #define MSR_IA32_MCG_CTL         0x0000017b
@@ -56,35 +61,66 @@
 #define MSR_IA32_MC0_STATUS      0x00000401
 #define MSR_IA32_MC0_ADDR        0x00000402
 #define MSR_IA32_MC0_MISC        0x00000403
+
+/* Intel MSRs */
 #define MSR_IA32_MC0_CTL2        0x00000280
 
-/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
+/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
 #define MCG_STATUS_SRAO_LLC_VAL  0x5
 #define MCE_SRAO_LLC_BANK        0x7
 #define MCi_STATUS_SRAO_LLC_VAL  0xBD2000008000017AUL
 #define MCi_MISC_SRAO_LLC_VAL    0x86UL
 
-/* Memory Patrol Scrub SRAO MCE */
+/* Intel: Memory Patrol Scrub SRAO MCE */
 #define MCG_STATUS_SRAO_MEM_VAL  0x5
 #define MCE_SRAO_MEM_BANK        0x8
 #define MCi_STATUS_SRAO_MEM_VAL  0xBD000000004000CFUL
 #define MCi_MISC_SRAO_MEM_VAL    0x86UL
 
-/* LLC EWB UCNA Error */
+/* Intel: LLC EWB UCNA Error */
 #define MCG_STATUS_UCNA_LLC_VAL  0x0
 #define CMCI_UCNA_LLC_BANK       0x9
 #define MCi_STATUS_UCNA_LLC_VAL  0xBC20000080000136UL
 #define MCi_MISC_UCNA_LLC_VAL    0x86UL
 
-/* Error Types */
-#define MCE_SRAO_MEM        0x0
-#define MCE_SRAO_LLC        0x1
-#define CMCI_UCNA_LLC       0x2
+/* Intel: Error Types */
+#define INTEL_MCE_SRAO_MEM        0x0
+#define INTEL_MCE_SRAO_LLC        0x1
+#define INTEL_CMCI_UCNA_LLC       0x2
+
+/* AMD: Memory Error */
+#define MCG_STATUS_MEM_VAL        0x5
+#define MCE_MEM_BANK              0x4
+#define MCi_STATUS_MEM_VAL        0xb4000000001c0100UL
+//#define MCi_STATUS_MEM_VAL        0xb600000000000100UL
+#define MCi_MISC_MEM_VAL          0x0
+
+/* AMD: L3 Cache Error */
+#define MCG_STATUS_L3_VAL         0x5
+#define MCE_L3_BANK               0x4
+#define MCi_STATUS_L3_VAL         0xbc000400001c010bULL
+#define MC4_MISC0_VAL             0x0
+#define MC4_MISC1_VAL             0x0
+#define MC4_MISC2_L3_VAL          0xc008000000000003ULL
+
+/* AMD: CPU corruption error */
+#define MCG_STATUS_CPU_VAL        0x5
+#define MCE_CPU_BANK              0x2
+#define MCi_STATUS_CPU_VAL        0x9200000000000000ULL
+//#define MCi_STATUS_CPU_VAL        0xb200000000000000ULL
+
+/* AMD: Error Types */
+#define AMD_MCE_MEM               0x20 /* memory error */
+#define AMD_MCE_L3                0x21 /* l3 cache */
 
 #define LOGFILE stdout
 
 int dump;
+int opt_exception;
 struct xen_mc_msrinject msr_inj;
+int cpu_is_amd;
+int cpu_is_intel;
+
 
 static void Lprintf(const char *fmt, ...)
 {
@@ -145,7 +181,7 @@ static int mca_cpuinfo(xc_interface *xc_
         return 0;
 }
 
-static int inject_cmci(xc_interface *xc_handle, int cpu_nr)
+static int intel_inject_cmci(xc_interface *xc_handle)
 {
     struct xen_mc mc;
     int nr_cpus;
@@ -191,6 +227,15 @@ static uint64_t bank_addr(int bank, int 
         case MCi_type_MISC:
             addr = MSR_IA32_MC0_CTL + (bank * 4) + type;
             break;
+        case MC4_type_MISC1:
+            addr = 0xc0000408;
+            break;
+        case MC4_type_MISC2:
+            addr = 0xc0000409;
+            break;
+        case MC4_type_MISC3:
+            addr = 0xc000040a;
+            break;
         case MCi_type_CTL2:
             addr = MSR_IA32_MC0_CTL2 + bank;
             break;
@@ -356,12 +401,11 @@ static int inject_mci_status(xc_interfac
 }
 
 static int inject_mci_misc(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint64_t bank,
-                             uint64_t val)
+                             uint32_t cpu_nr, uint32_t misctype,
+                             uint64_t bank, uint64_t val)
 {
     return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE,
-                                    MCi_type_MISC, bank, val); 
+                                    MCi_type_MISC + misctype, bank, val); 
 }
 
 static int inject_mci_addr(xc_interface *xc_handle,
@@ -373,10 +417,8 @@ static int inject_mci_addr(xc_interface 
                                     MCi_type_ADDR, bank, val); 
 }
 
-static int inject_llc_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -390,7 +432,7 @@ static int inject_llc_srao(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -407,17 +449,17 @@ static int inject_llc_srao(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_mem_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_mem_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -431,7 +473,7 @@ static int inject_mem_srao(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -448,17 +490,17 @@ static int inject_mem_srao(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_llc_ucna(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_ucna(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_cmci(xc_handle, cpu_nr);
+    ret = intel_inject_cmci(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MCE error\n");
 
     return 0;
 }
 
+static int amd_inject_mem(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_MEM_BANK, MCi_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_MEM_BANK, MCi_MISC_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_MISC MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+static int amd_inject_l3(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_L3_BANK, MCi_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_L3_BANK, MC4_MISC0_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC0 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 1,
+                          MCE_L3_BANK, MC4_MISC1_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC1 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 2,
+                          MCE_L3_BANK, MC4_MISC2_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC2 MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+
 static long xs_get_dom_mem(int domid)
 {
     char path[128];
@@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid)
     if (!xs)
         return -1;
 
-    sprintf(path, "/local/domain/%d/memory/target", domid);
+    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);
     memstr = xs_read(xs, XBT_NULL, path, &plen);
     xs_daemon_close(xs);
 
@@ -540,30 +677,80 @@ static void help(void)
            "  -D, --dump           dump addr info without error injection\n"
            "  -c, --cpu=CPU_ID     target CPU\n"
            "  -d, --domain=DomID   target domain, the default is Xen itself\n"
+           "  -e                   raise MCE exception\n"
            "  -h, --help           print this page\n"
            "  -p, --phyaddr        physical address\n"
            "  -t, --type=error     error type\n"
-           "                        0 : MCE_SRAO_MEM\n"
-           "                        1 : MCE_SRAO_LLC\n"
-           "                        2 : CMCI_UCNA_LLC\n"
+           "                        0x0 : MCE_SRAO_MEM (Intel only)\n"
+           "                        0x1 : MCE_SRAO_LLC (Intel only)\n"
+           "                        0x2 : CMCI_UCNA_LLC (Intel only)\n"
+           "                        0x20: DRAM error (AMD only)\n"
+           "                        0x21: L3 cache error (AMD only)\n"
            "\n"
            );
 }
 
+static void cpuid(const unsigned int *input, unsigned int *regs)
+{
+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+    asm (
+#ifdef __i386__
+        "push %%ebx; push %%edx\n\t"
+#else
+        "push %%rbx; push %%rdx\n\t"
+#endif
+        "cpuid\n\t"
+        "mov %%ebx,4(%4)\n\t"
+        "mov %%edx,12(%4)\n\t"
+#ifdef __i386__
+        "pop %%edx; pop %%ebx\n\t"
+#else
+        "pop %%rdx; pop %%rbx\n\t"
+#endif
+        : "=a" (regs[0]), "=c" (regs[2])
+        : "0" (input[0]), "1" (count), "S" (regs)
+        : "memory" );
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void cpuid_brand_get(char *str)
+{
+    unsigned int input[2] = { 0, 0 };
+    unsigned int regs[4];
+
+    cpuid(input, regs);
+
+    *(uint32_t *)(str + 0) = regs[1];
+    *(uint32_t *)(str + 4) = regs[3];
+    *(uint32_t *)(str + 8) = regs[2];
+    str[12] = '\0';
+}
+
 int main(int argc, char *argv[])
 {
-    int type = MCE_SRAO_MEM;
+    int type;
     int c, opt_index;
     uint32_t domid;
     xc_interface *xc_handle;
-    int cpu_nr;
-    int64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    unsigned int cpu_nr;
+    uint64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    char cpu_brand[13];
 
     /* Default Value */
     domid = DOMID_XEN;
     gaddr = 0x180020;
     cpu_nr = 0;
 
+    cpu_is_amd = cpu_is_intel = 0;
+    cpuid_brand_get(cpu_brand);
+    if (strstr(cpu_brand, "AMD"))
+        cpu_is_amd = 1;
+    else
+        cpu_is_intel = 1;
+
+    if (cpu_is_intel)
+        type = INTEL_MCE_SRAO_MEM;
+
     init_msr_inj();
     xc_handle = xc_interface_open(0, 0, 0);
     if ( !xc_handle ) {
@@ -571,8 +758,8 @@ int main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
 
-    while ( 1 ) {
-        c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index);
+    for (;;) {
+        c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, &opt_index);
         if ( c == -1 )
             break;
         switch ( c ) {
@@ -580,23 +767,26 @@ int main(int argc, char *argv[])
             dump=1;
             break;
         case 'c':
-            cpu_nr = strtol(optarg, &optarg, 10);
+            cpu_nr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for CPU\n");
             break;
         case 'd':
-            domid = strtol(optarg, &optarg, 10);
+            domid = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for domain\n");
             break;
         case 'p':
-            gaddr = strtol(optarg, &optarg, 0);
+            gaddr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input correct page address\n");
             break;
         case 't':
             type = strtol(optarg, NULL, 0);
             break;
+        case 'e':
+            opt_exception = 1;
+            break;
         case 'h':
         default:
             help();
@@ -627,16 +817,26 @@ int main(int argc, char *argv[])
         goto out;
     }
 
-    switch ( type )
-    {
-    case MCE_SRAO_MEM:
-        inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
+    switch ( type ) {
+    case INTEL_MCE_SRAO_MEM:
+        if ( cpu_is_intel )
+            intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
         break;
-    case MCE_SRAO_LLC:
-        inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
+    case INTEL_MCE_SRAO_LLC:
+        if ( cpu_is_intel )
+            intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
         break;
-    case CMCI_UCNA_LLC:
-        inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+    case INTEL_CMCI_UCNA_LLC:
+        if ( cpu_is_intel )
+            intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+        break;
+    case AMD_MCE_MEM:
+        if ( cpu_is_amd )
+            amd_inject_mem(xc_handle, cpu_nr, domid, gaddr);
+        break;
+    case AMD_MCE_L3:
+        if ( cpu_is_amd )
+            amd_inject_l3(xc_handle, cpu_nr, domid, gaddr);
         break;
     default:
         err(xc_handle, "Unsupported error type\n");


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.