[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD
On Fri, 2012-10-19 at 16:05 +0100, Jan Beulich wrote: > >>> On 19.10.12 at 17:01, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> wrote: > > Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support > > AMD"): > >> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@xxxxxxx> wrote: > >> > Ping? > >> > >> I'm afraid it's not really clear who should commit this - it's tools > >> side code, so IanJ or IanC would normally be the ones, but otoh > >> it's code requiring low level hardware knowledge to review the > >> patch, so both of them might want to rather not do the review. > >> In the past it was usually Keir who eventually committed such > >> patches, but I don't know whether he put this on his to-look-at- > >> and-eventually-commit list. > > > > My view is that I would like an ack from someone who understands > > what's going on ... > > Which would ideally be those who introduced the code, i.e. > Intel folks if I'm not mistaken... Lets CC some of them then. Intel folks -- any opinion on the patch below from Christoph? 8<---------------- # User Christoph Egger # Date 1349437062 -7200 xen mceinj: support AMD. Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx> diff -r 21704bc429b4 -r 1a3eea784e09 tools/tests/mce-test/tools/xen-mceinj.c --- a/tools/tests/mce-test/tools/xen-mceinj.c +++ b/tools/tests/mce-test/tools/xen-mceinj.c @@ -1,6 +1,7 @@ /* * xen-mceinj.c: utilities to inject fake MCE for x86. * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2012, AMD Cooperation Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -18,6 +19,7 @@ * Authors: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> * Haicheng Li <haicheng.li@xxxxxxxxx> * Xudong Hao <xudong.hao@xxxxxxxxx> + * Christoph Egger <Christoph.Egger@xxxxxxx> */ @@ -44,11 +46,14 @@ #define MCi_type_STATUS 0x1 #define MCi_type_ADDR 0x2 #define MCi_type_MISC 0x3 -#define MCi_type_CTL2 0x4 +#define MC4_type_MISC1 0x4 +#define MC4_type_MISC2 0x5 +#define MC4_type_MISC3 0x6 +#define MCi_type_CTL2 0x7 #define INVALID_MSR ~0UL -/* Intel MSRs */ +/* X86 machine check MSRs */ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b @@ -56,35 +61,66 @@ #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 + +/* Intel MSRs */ #define MSR_IA32_MC0_CTL2 0x00000280 -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 #define MCE_SRAO_LLC_BANK 0x7 #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL #define MCi_MISC_SRAO_LLC_VAL 0x86UL -/* Memory Patrol Scrub SRAO MCE */ +/* Intel: Memory Patrol Scrub SRAO MCE */ #define MCG_STATUS_SRAO_MEM_VAL 0x5 #define MCE_SRAO_MEM_BANK 0x8 #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL #define MCi_MISC_SRAO_MEM_VAL 0x86UL -/* LLC EWB UCNA Error */ +/* Intel: LLC EWB UCNA Error */ #define MCG_STATUS_UCNA_LLC_VAL 0x0 #define CMCI_UCNA_LLC_BANK 0x9 #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL #define MCi_MISC_UCNA_LLC_VAL 0x86UL -/* Error Types */ -#define MCE_SRAO_MEM 0x0 -#define MCE_SRAO_LLC 0x1 -#define CMCI_UCNA_LLC 0x2 +/* Intel: Error Types */ +#define INTEL_MCE_SRAO_MEM 0x0 +#define INTEL_MCE_SRAO_LLC 0x1 +#define INTEL_CMCI_UCNA_LLC 0x2 + +/* AMD: Memory Error */ +#define MCG_STATUS_MEM_VAL 0x5 +#define MCE_MEM_BANK 0x4 +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL +#define MCi_MISC_MEM_VAL 0x0 + +/* AMD: L3 Cache Error */ +#define MCG_STATUS_L3_VAL 0x5 +#define MCE_L3_BANK 0x4 +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL +#define MC4_MISC0_VAL 0x0 +#define MC4_MISC1_VAL 0x0 +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL + +/* AMD: CPU corruption error */ +#define MCG_STATUS_CPU_VAL 0x5 +#define MCE_CPU_BANK 0x2 +#define MCi_STATUS_CPU_VAL 0x9200000000000000ULL +//#define MCi_STATUS_CPU_VAL 0xb200000000000000ULL + +/* AMD: Error Types */ +#define AMD_MCE_MEM 0x20 /* memory error */ +#define AMD_MCE_L3 0x21 /* l3 cache */ #define LOGFILE stdout int dump; +int opt_exception; struct xen_mc_msrinject msr_inj; +int cpu_is_amd; +int cpu_is_intel; + static void Lprintf(const char *fmt, ...) { @@ -145,7 +181,7 @@ static int mca_cpuinfo(xc_interface *xc_ return 0; } -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) +static int intel_inject_cmci(xc_interface *xc_handle) { struct xen_mc mc; int nr_cpus; @@ -191,6 +227,15 @@ static uint64_t bank_addr(int bank, int case MCi_type_MISC: addr = MSR_IA32_MC0_CTL + (bank * 4) + type; break; + case MC4_type_MISC1: + addr = 0xc0000408; + break; + case MC4_type_MISC2: + addr = 0xc0000409; + break; + case MC4_type_MISC3: + addr = 0xc000040a; + break; case MCi_type_CTL2: addr = MSR_IA32_MC0_CTL2 + bank; break; @@ -356,12 +401,11 @@ static int inject_mci_status(xc_interfac } static int inject_mci_misc(xc_interface *xc_handle, - uint32_t cpu_nr, - uint64_t bank, - uint64_t val) + uint32_t cpu_nr, uint32_t misctype, + uint64_t bank, uint64_t val) { return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE, - MCi_type_MISC, bank, val); + MCi_type_MISC + misctype, bank, val); } static int inject_mci_addr(xc_interface *xc_handle, @@ -373,10 +417,8 @@ static int inject_mci_addr(xc_interface MCi_type_ADDR, bank, val); } -static int inject_llc_srao(xc_interface *xc_handle, - uint32_t cpu_nr, - uint32_t domain, - uint64_t gaddr) +static int intel_inject_llc_srao(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { uint64_t gpfn, mfn, haddr; int ret = 0; @@ -390,7 +432,7 @@ static int inject_llc_srao(xc_interface if ( ret ) err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); - ret = inject_mci_misc(xc_handle, cpu_nr, + ret = inject_mci_misc(xc_handle, cpu_nr, 0, MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL); if ( ret ) err(xc_handle, "Failed to inject MCi_MISC MSR\n"); @@ -407,17 +449,17 @@ static int inject_llc_srao(xc_interface ret = flush_msr_inj(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MSR\n"); - ret = inject_mce(xc_handle, cpu_nr); - if ( ret ) - err(xc_handle, "Failed to inject MCE error\n"); + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } return 0; } -static int inject_mem_srao(xc_interface *xc_handle, - uint32_t cpu_nr, - uint32_t domain, - uint64_t gaddr) +static int intel_inject_mem_srao(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { uint64_t gpfn, mfn, haddr; int ret = 0; @@ -431,7 +473,7 @@ static int inject_mem_srao(xc_interface if ( ret ) err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); - ret = inject_mci_misc(xc_handle, cpu_nr, + ret = inject_mci_misc(xc_handle, cpu_nr, 0, MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL); if ( ret ) err(xc_handle, "Failed to inject MCi_MISC MSR\n"); @@ -448,17 +490,17 @@ static int inject_mem_srao(xc_interface ret = flush_msr_inj(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MSR\n"); - ret = inject_mce(xc_handle, cpu_nr); - if ( ret ) - err(xc_handle, "Failed to inject MCE error\n"); + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } return 0; } -static int inject_llc_ucna(xc_interface *xc_handle, - uint32_t cpu_nr, - uint32_t domain, - uint64_t gaddr) +static int intel_inject_llc_ucna(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { uint64_t gpfn, mfn, haddr; int ret = 0; @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface if ( ret ) err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); - ret = inject_mci_misc(xc_handle, cpu_nr, + ret = inject_mci_misc(xc_handle, cpu_nr, 0, CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL); if ( ret ) err(xc_handle, "Failed to inject MCi_MISC MSR\n"); @@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface ret = flush_msr_inj(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MSR\n"); - ret = inject_cmci(xc_handle, cpu_nr); + ret = intel_inject_cmci(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MCE error\n"); return 0; } +static int amd_inject_mem(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ + uint64_t gpfn, mfn, haddr; + int ret = 0; + + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); + + ret = inject_mci_status(xc_handle, cpu_nr, + MCE_MEM_BANK, MCi_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 0, + MCE_MEM_BANK, MCi_MISC_MEM_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCi_MISC MSR\n"); + + gpfn = gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr); + if ( ret ) + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); + + ret = flush_msr_inj(xc_handle); + if ( ret ) + err(xc_handle, "Failed to inject MSR\n"); + + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } + + return 0; +} + +static int amd_inject_l3(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ + uint64_t gpfn, mfn, haddr; + int ret = 0; + + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); + + ret = inject_mci_status(xc_handle, cpu_nr, + MCE_L3_BANK, MCi_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 0, + MCE_L3_BANK, MC4_MISC0_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 1, + MCE_L3_BANK, MC4_MISC1_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 2, + MCE_L3_BANK, MC4_MISC2_L3_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); + + gpfn = gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr); + if ( ret ) + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); + + ret = flush_msr_inj(xc_handle); + if ( ret ) + err(xc_handle, "Failed to inject MSR\n"); + + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } + + return 0; +} + + static long xs_get_dom_mem(int domid) { char path[128]; @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) if (!xs) return -1; - sprintf(path, "/local/domain/%d/memory/target", domid); + snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid); memstr = xs_read(xs, XBT_NULL, path, &plen); xs_daemon_close(xs); @@ -540,30 +677,80 @@ static void help(void) " -D, --dump dump addr info without error injection\n" " -c, --cpu=CPU_ID target CPU\n" " -d, --domain=DomID target domain, the default is Xen itself\n" + " -e raise MCE exception\n" " -h, --help print this page\n" " -p, --phyaddr physical address\n" " -t, --type=error error type\n" - " 0 : MCE_SRAO_MEM\n" - " 1 : MCE_SRAO_LLC\n" - " 2 : CMCI_UCNA_LLC\n" + " 0x0 : MCE_SRAO_MEM (Intel only)\n" + " 0x1 : MCE_SRAO_LLC (Intel only)\n" + " 0x2 : CMCI_UCNA_LLC (Intel only)\n" + " 0x20: DRAM error (AMD only)\n" + " 0x21: L3 cache error (AMD only)\n" "\n" ); } +static void cpuid(const unsigned int *input, unsigned int *regs) +{ + unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1]; + asm ( +#ifdef __i386__ + "push %%ebx; push %%edx\n\t" +#else + "push %%rbx; push %%rdx\n\t" +#endif + "cpuid\n\t" + "mov %%ebx,4(%4)\n\t" + "mov %%edx,12(%4)\n\t" +#ifdef __i386__ + "pop %%edx; pop %%ebx\n\t" +#else + "pop %%rdx; pop %%rbx\n\t" +#endif + : "=a" (regs[0]), "=c" (regs[2]) + : "0" (input[0]), "1" (count), "S" (regs) + : "memory" ); +} + +/* Get the manufacturer brand name of the host processor. */ +static void cpuid_brand_get(char *str) +{ + unsigned int input[2] = { 0, 0 }; + unsigned int regs[4]; + + cpuid(input, regs); + + *(uint32_t *)(str + 0) = regs[1]; + *(uint32_t *)(str + 4) = regs[3]; + *(uint32_t *)(str + 8) = regs[2]; + str[12] = '\0'; +} + int main(int argc, char *argv[]) { - int type = MCE_SRAO_MEM; + int type; int c, opt_index; uint32_t domid; xc_interface *xc_handle; - int cpu_nr; - int64_t gaddr, gpfn, mfn, haddr, max_gpa; + unsigned int cpu_nr; + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; + char cpu_brand[13]; /* Default Value */ domid = DOMID_XEN; gaddr = 0x180020; cpu_nr = 0; + cpu_is_amd = cpu_is_intel = 0; + cpuid_brand_get(cpu_brand); + if (strstr(cpu_brand, "AMD")) + cpu_is_amd = 1; + else + cpu_is_intel = 1; + + if (cpu_is_intel) + type = INTEL_MCE_SRAO_MEM; + init_msr_inj(); xc_handle = xc_interface_open(0, 0, 0); if ( !xc_handle ) { @@ -571,8 +758,8 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - while ( 1 ) { - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index); + for (;;) { + c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, &opt_index); if ( c == -1 ) break; switch ( c ) { @@ -580,23 +767,26 @@ int main(int argc, char *argv[]) dump=1; break; case 'c': - cpu_nr = strtol(optarg, &optarg, 10); + cpu_nr = strtoul(optarg, &optarg, 0); if ( strlen(optarg) != 0 ) err(xc_handle, "Please input a digit parameter for CPU\n"); break; case 'd': - domid = strtol(optarg, &optarg, 10); + domid = strtoul(optarg, &optarg, 0); if ( strlen(optarg) != 0 ) err(xc_handle, "Please input a digit parameter for domain\n"); break; case 'p': - gaddr = strtol(optarg, &optarg, 0); + gaddr = strtoul(optarg, &optarg, 0); if ( strlen(optarg) != 0 ) err(xc_handle, "Please input correct page address\n"); break; case 't': type = strtol(optarg, NULL, 0); break; + case 'e': + opt_exception = 1; + break; case 'h': default: help(); @@ -627,16 +817,26 @@ int main(int argc, char *argv[]) goto out; } - switch ( type ) - { - case MCE_SRAO_MEM: - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); + switch ( type ) { + case INTEL_MCE_SRAO_MEM: + if ( cpu_is_intel ) + intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); break; - case MCE_SRAO_LLC: - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); + case INTEL_MCE_SRAO_LLC: + if ( cpu_is_intel ) + intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); break; - case CMCI_UCNA_LLC: - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + case INTEL_CMCI_UCNA_LLC: + if ( cpu_is_intel ) + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + break; + case AMD_MCE_MEM: + if ( cpu_is_amd ) + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); + break; + case AMD_MCE_L3: + if ( cpu_is_amd ) + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); break; default: err(xc_handle, "Unsupported error type\n"); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |