[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] VT-d: honor APEI firmware-first mode in XSA-59 workaround code
On 04/06/14 09:29, Jan Beulich wrote: > When firmware-first mode is being indicated by firmware, we shouldn't > be modifying AER registers - these are considered to be owned by > firmware in that case. Violating this is being reported to result in > SMI storms. While circumventing the workaround means re-exposing > affected hosts to the XSA-59 issues, this in any event seems better > than not booting at all. Respective messages are being issued to the > log, so the situation can be diagnosed. > > The basic building blocks were taken from Linux 3.15-rc. Note that > this includes a block of code enclosed in #ifdef CONFIG_X86_MCE - we > don't define that symbol, and that code also wouldn't build without > suitable machine check side code added; that should happen eventually, > but isn't subject of this change. > > Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > Reported-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx> > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > Tested-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx> Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > --- > This is unchanged (other than the Tested-by tag added) from the v2 RFC > posting. > > --- a/xen/arch/x86/acpi/boot.c > +++ b/xen/arch/x86/acpi/boot.c > @@ -754,6 +754,8 @@ int __init acpi_boot_init(void) > > erst_init(); > > + acpi_hest_init(); > + > acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt); > > return 0; > --- a/xen/drivers/acpi/apei/Makefile > +++ b/xen/drivers/acpi/apei/Makefile > @@ -1,3 +1,4 @@ > obj-y += erst.o > +obj-y += hest.o > obj-y += apei-base.o > obj-y += apei-io.o > --- /dev/null > +++ b/xen/drivers/acpi/apei/hest.c > @@ -0,0 +1,200 @@ > +/* > + * APEI Hardware Error Souce Table support > + * > + * HEST describes error sources in detail; communicates operational > + * parameters (i.e. severity levels, masking bits, and threshold > + * values) to Linux as necessary. It also allows the BIOS to report > + * non-standard error sources to Linux (for example, chipset-specific > + * error registers). > + * > + * For more information about HEST, please refer to ACPI Specification > + * version 4.0, section 17.3.2. > + * > + * Copyright 2009 Intel Corp. > + * Author: Huang Ying <ying.huang@xxxxxxxxx> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version > + * 2 as published by the Free Software Foundation; > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +#include <xen/errno.h> > +#include <xen/init.h> > +#include <xen/kernel.h> > +#include <xen/mm.h> > +#include <xen/pfn.h> > +#include <acpi/acpi.h> > +#include <acpi/apei.h> > + > +#include "apei-internal.h" > + > +#define HEST_PFX "HEST: " > + > +static bool_t hest_disable; > +boolean_param("hest_disable", hest_disable); > + > +/* HEST table parsing */ > + > +static struct acpi_table_hest *__read_mostly hest_tab; > + > +static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { > + [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ > + [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, > + [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi), > + [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root), > + [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), > + [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), > + [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), > +}; > + > +static int hest_esrc_len(const struct acpi_hest_header *hest_hdr) > +{ > + u16 hest_type = hest_hdr->type; > + int len; > + > + if (hest_type >= ACPI_HEST_TYPE_RESERVED) > + return 0; > + > + len = hest_esrc_len_tab[hest_type]; > + > + if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) { > + const struct acpi_hest_ia_corrected *cmc = > + container_of(hest_hdr, > + const struct acpi_hest_ia_corrected, > + header); > + > + len = sizeof(*cmc) + cmc->num_hardware_banks * > + sizeof(struct acpi_hest_ia_error_bank); > + } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) { > + const struct acpi_hest_ia_machine_check *mc = > + container_of(hest_hdr, > + const struct acpi_hest_ia_machine_check, > + header); > + > + len = sizeof(*mc) + mc->num_hardware_banks * > + sizeof(struct acpi_hest_ia_error_bank); > + } > + BUG_ON(len == -1); > + > + return len; > +}; > + > +int apei_hest_parse(apei_hest_func_t func, void *data) > +{ > + struct acpi_hest_header *hest_hdr; > + int i, rc, len; > + > + if (hest_disable || !hest_tab) > + return -EINVAL; > + > + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); > + for (i = 0; i < hest_tab->error_source_count; i++) { > + len = hest_esrc_len(hest_hdr); > + if (!len) { > + printk(XENLOG_WARNING HEST_PFX > + "Unknown or unused hardware error source " > + "type: %d for hardware error source: %d\n", > + hest_hdr->type, hest_hdr->source_id); > + return -EINVAL; > + } > + if ((void *)hest_hdr + len > > + (void *)hest_tab + hest_tab->header.length) { > + printk(XENLOG_WARNING HEST_PFX > + "Table contents overflow for hardware error > source: %d\n", > + hest_hdr->source_id); > + return -EINVAL; > + } > + > + rc = func(hest_hdr, data); > + if (rc) > + return rc; > + > + hest_hdr = (void *)hest_hdr + len; > + } > + > + return 0; > +} > + > +/* > + * Check if firmware advertises firmware first mode. We need FF bit to be set > + * along with a set of MC banks which work in FF mode. > + */ > +static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr, > + void *data) > +{ > +#ifdef CONFIG_X86_MCE > + unsigned int i; > + const struct acpi_hest_ia_corrected *cmc; > + const struct acpi_hest_ia_error_bank *mc_bank; > + > + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) > + return 0; > + > + cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected, > header); > + if (!cmc->enabled) > + return 0; > + > + /* > + * We expect HEST to provide a list of MC banks that report errors > + * in firmware first mode. Otherwise, return non-zero value to > + * indicate that we are done parsing HEST. > + */ > + if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || > !cmc->num_hardware_banks) > + return 1; > + > + printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected > errors.\n"); > + > + mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1); > + for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) > + mce_disable_bank(mc_bank->bank_number); > +#else > +# define acpi_disable_cmcff 1 > +#endif > + > + return 1; > +} > + > +void __init acpi_hest_init(void) > +{ > + acpi_status status; > + acpi_physical_address hest_addr; > + acpi_native_uint hest_len; > + > + if (acpi_disabled) > + return; > + > + if (hest_disable) { > + printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n"); > + return; > + } > + > + status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len); > + if (status == AE_NOT_FOUND) > + goto err; > + if (ACPI_FAILURE(status)) { > + printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n", > + acpi_format_exception(status)); > + goto err; > + } > + map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr), > + PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr), > + PAGE_HYPERVISOR); > + hest_tab = __va(hest_addr); > + > + if (!acpi_disable_cmcff) > + apei_hest_parse(hest_parse_cmc, NULL); > + > + printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n"); > + return; > +err: > + hest_disable = 1; > +} > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -1069,6 +1069,106 @@ void __hwdom_init setup_hwdom_pci_device > spin_unlock(&pcidevs_lock); > } > > +#ifdef CONFIG_ACPI > +#include <acpi/acpi.h> > +#include <acpi/apei.h> > + > +static int hest_match_pci(const struct acpi_hest_aer_common *p, > + const struct pci_dev *pdev) > +{ > + return ACPI_HEST_SEGMENT(p->bus) == pdev->seg && > + ACPI_HEST_BUS(p->bus) == pdev->bus && > + p->device == PCI_SLOT(pdev->devfn) && > + p->function == PCI_FUNC(pdev->devfn); > +} > + > +static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr, > + const struct pci_dev *pdev) > +{ > + unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), > + PCI_CAP_ID_EXP); > + u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), > + pos + PCI_EXP_FLAGS), > + PCI_EXP_FLAGS_TYPE); > + > + switch ( hest_hdr->type ) > + { > + case ACPI_HEST_TYPE_AER_ROOT_PORT: > + return pcie == PCI_EXP_TYPE_ROOT_PORT; > + case ACPI_HEST_TYPE_AER_ENDPOINT: > + return pcie == PCI_EXP_TYPE_ENDPOINT; > + case ACPI_HEST_TYPE_AER_BRIDGE: > + return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) == > + PCI_CLASS_BRIDGE_PCI; > + } > + > + return 0; > +} > + > +struct aer_hest_parse_info { > + const struct pci_dev *pdev; > + bool_t firmware_first; > +}; > + > +static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header > *hest_hdr) > +{ > + if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || > + hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || > + hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE ) > + return 1; > + return 0; > +} > + > +static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void > *data) > +{ > + struct aer_hest_parse_info *info = data; > + const struct acpi_hest_aer_common *p; > + bool_t ff; > + > + if ( !hest_source_is_pcie_aer(hest_hdr) ) > + return 0; > + > + p = (const struct acpi_hest_aer_common *)(hest_hdr + 1); > + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); > + > + /* > + * If no specific device is supplied, determine whether > + * FIRMWARE_FIRST is set for *any* PCIe device. > + */ > + if ( !info->pdev ) > + { > + info->firmware_first |= ff; > + return 0; > + } > + > + /* Otherwise, check the specific device */ > + if ( p->flags & ACPI_HEST_GLOBAL ? > + hest_match_type(hest_hdr, info->pdev) : > + hest_match_pci(p, info->pdev) ) > + { > + info->firmware_first = ff; > + return 1; > + } > + > + return 0; > +} > + > +bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev) > +{ > + struct aer_hest_parse_info info = { .pdev = pdev }; > + > + return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) && > + apei_hest_parse(aer_hest_parse, &info) >= 0 && > + info.firmware_first; > +} > +#endif > + > static int _dump_pci_devices(struct pci_seg *pseg, void *arg) > { > struct pci_dev *pdev; > --- a/xen/drivers/passthrough/vtd/quirks.c > +++ b/xen/drivers/passthrough/vtd/quirks.c > @@ -386,9 +386,11 @@ void pci_vtd_quirk(const struct pci_dev > int dev = PCI_SLOT(pdev->devfn); > int func = PCI_FUNC(pdev->devfn); > int pos; > - u32 val; > + bool_t ff; > + u32 val, val2; > u64 bar; > paddr_t pa; > + const char *action; > > if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) != > PCI_VENDOR_ID_INTEL ) > @@ -438,7 +440,10 @@ void pci_vtd_quirk(const struct pci_dev > pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, > pos, > PCI_EXT_CAP_ID_VNDR); > } > + ff = 0; > } > + else > + ff = pcie_aer_get_firmware_first(pdev); > if ( !pos ) > { > printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER > capability?\n", > @@ -447,18 +452,26 @@ void pci_vtd_quirk(const struct pci_dev > } > > val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK); > - pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK, > - val | PCI_ERR_UNC_UNSUP); > - val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK); > - pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK, > - val | PCI_ERR_COR_ADV_NFAT); > + val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK); > + if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) ) > + action = "Found masked"; > + else if ( !ff ) > + { > + pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK, > + val | PCI_ERR_UNC_UNSUP); > + pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK, > + val2 | PCI_ERR_COR_ADV_NFAT); > + action = "Masked"; > + } > + else > + action = "Must not mask"; > > /* XPUNCERRMSK Send Completion with Unsupported Request */ > val = pci_conf_read32(seg, bus, dev, func, 0x20c); > pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4)); > > - printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n", > - seg, bus, dev, func); > + printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n", > + action, seg, bus, dev, func); > break; > > case 0x100: case 0x104: case 0x108: /* Sandybridge */ > --- a/xen/include/acpi/actbl1.h > +++ b/xen/include/acpi/actbl1.h > @@ -445,6 +445,14 @@ struct acpi_hest_aer_common { > #define ACPI_HEST_FIRMWARE_FIRST (1) > #define ACPI_HEST_GLOBAL (1<<1) > > +/* > + * Macros to access the bus/segment numbers in Bus field above: > + * Bus number is encoded in bits 7:0 > + * Segment number is encoded in bits 23:8 > + */ > +#define ACPI_HEST_BUS(bus) ((bus) & 0xFF) > +#define ACPI_HEST_SEGMENT(bus) (((bus) >> 8) & 0xFFFF) > + > /* Hardware Error Notification */ > > struct acpi_hest_notify { > --- a/xen/include/acpi/apei.h > +++ b/xen/include/acpi/apei.h > @@ -12,6 +12,9 @@ > > #define FIX_APEI_RANGE_MAX 64 > > +typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *); > +int apei_hest_parse(apei_hest_func_t, void *); > + > int erst_write(const struct cper_record_header *record); > ssize_t erst_get_record_count(void); > int erst_get_next_record_id(u64 *record_id); > --- a/xen/include/xen/acpi.h > +++ b/xen/include/xen/acpi.h > @@ -61,6 +61,7 @@ int acpi_boot_init (void); > int acpi_boot_table_init (void); > int acpi_numa_init (void); > int erst_init(void); > +void acpi_hest_init(void); > > int acpi_table_init (void); > int acpi_table_parse(char *id, acpi_table_handler handler); > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -144,6 +144,8 @@ int pci_find_next_ext_capability(int seg > const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus, > unsigned int *dev, unsigned int *func); > > +bool_t pcie_aer_get_firmware_first(const struct pci_dev *); > + > struct pirq; > int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable); > void msixtbl_pt_unregister(struct domain *, struct pirq *); > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |