|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] VT-d: honor APEI firmware-first mode in XSA-59 workaround code
On 04/06/14 09:29, Jan Beulich wrote:
> When firmware-first mode is being indicated by firmware, we shouldn't
> be modifying AER registers - these are considered to be owned by
> firmware in that case. Violating this is being reported to result in
> SMI storms. While circumventing the workaround means re-exposing
> affected hosts to the XSA-59 issues, this in any event seems better
> than not booting at all. Respective messages are being issued to the
> log, so the situation can be diagnosed.
>
> The basic building blocks were taken from Linux 3.15-rc. Note that
> this includes a block of code enclosed in #ifdef CONFIG_X86_MCE - we
> don't define that symbol, and that code also wouldn't build without
> suitable machine check side code added; that should happen eventually,
> but isn't subject of this change.
>
> Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Reported-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> Tested-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
> This is unchanged (other than the Tested-by tag added) from the v2 RFC
> posting.
>
> --- a/xen/arch/x86/acpi/boot.c
> +++ b/xen/arch/x86/acpi/boot.c
> @@ -754,6 +754,8 @@ int __init acpi_boot_init(void)
>
> erst_init();
>
> + acpi_hest_init();
> +
> acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt);
>
> return 0;
> --- a/xen/drivers/acpi/apei/Makefile
> +++ b/xen/drivers/acpi/apei/Makefile
> @@ -1,3 +1,4 @@
> obj-y += erst.o
> +obj-y += hest.o
> obj-y += apei-base.o
> obj-y += apei-io.o
> --- /dev/null
> +++ b/xen/drivers/acpi/apei/hest.c
> @@ -0,0 +1,200 @@
> +/*
> + * APEI Hardware Error Souce Table support
> + *
> + * HEST describes error sources in detail; communicates operational
> + * parameters (i.e. severity levels, masking bits, and threshold
> + * values) to Linux as necessary. It also allows the BIOS to report
> + * non-standard error sources to Linux (for example, chipset-specific
> + * error registers).
> + *
> + * For more information about HEST, please refer to ACPI Specification
> + * version 4.0, section 17.3.2.
> + *
> + * Copyright 2009 Intel Corp.
> + * Author: Huang Ying <ying.huang@xxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation;
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +#include <xen/errno.h>
> +#include <xen/init.h>
> +#include <xen/kernel.h>
> +#include <xen/mm.h>
> +#include <xen/pfn.h>
> +#include <acpi/acpi.h>
> +#include <acpi/apei.h>
> +
> +#include "apei-internal.h"
> +
> +#define HEST_PFX "HEST: "
> +
> +static bool_t hest_disable;
> +boolean_param("hest_disable", hest_disable);
> +
> +/* HEST table parsing */
> +
> +static struct acpi_table_hest *__read_mostly hest_tab;
> +
> +static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
> + [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
> + [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
> + [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
> + [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
> + [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
> + [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
> + [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
> +};
> +
> +static int hest_esrc_len(const struct acpi_hest_header *hest_hdr)
> +{
> + u16 hest_type = hest_hdr->type;
> + int len;
> +
> + if (hest_type >= ACPI_HEST_TYPE_RESERVED)
> + return 0;
> +
> + len = hest_esrc_len_tab[hest_type];
> +
> + if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
> + const struct acpi_hest_ia_corrected *cmc =
> + container_of(hest_hdr,
> + const struct acpi_hest_ia_corrected,
> + header);
> +
> + len = sizeof(*cmc) + cmc->num_hardware_banks *
> + sizeof(struct acpi_hest_ia_error_bank);
> + } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
> + const struct acpi_hest_ia_machine_check *mc =
> + container_of(hest_hdr,
> + const struct acpi_hest_ia_machine_check,
> + header);
> +
> + len = sizeof(*mc) + mc->num_hardware_banks *
> + sizeof(struct acpi_hest_ia_error_bank);
> + }
> + BUG_ON(len == -1);
> +
> + return len;
> +};
> +
> +int apei_hest_parse(apei_hest_func_t func, void *data)
> +{
> + struct acpi_hest_header *hest_hdr;
> + int i, rc, len;
> +
> + if (hest_disable || !hest_tab)
> + return -EINVAL;
> +
> + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
> + for (i = 0; i < hest_tab->error_source_count; i++) {
> + len = hest_esrc_len(hest_hdr);
> + if (!len) {
> + printk(XENLOG_WARNING HEST_PFX
> + "Unknown or unused hardware error source "
> + "type: %d for hardware error source: %d\n",
> + hest_hdr->type, hest_hdr->source_id);
> + return -EINVAL;
> + }
> + if ((void *)hest_hdr + len >
> + (void *)hest_tab + hest_tab->header.length) {
> + printk(XENLOG_WARNING HEST_PFX
> + "Table contents overflow for hardware error
> source: %d\n",
> + hest_hdr->source_id);
> + return -EINVAL;
> + }
> +
> + rc = func(hest_hdr, data);
> + if (rc)
> + return rc;
> +
> + hest_hdr = (void *)hest_hdr + len;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Check if firmware advertises firmware first mode. We need FF bit to be set
> + * along with a set of MC banks which work in FF mode.
> + */
> +static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr,
> + void *data)
> +{
> +#ifdef CONFIG_X86_MCE
> + unsigned int i;
> + const struct acpi_hest_ia_corrected *cmc;
> + const struct acpi_hest_ia_error_bank *mc_bank;
> +
> + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
> + return 0;
> +
> + cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected,
> header);
> + if (!cmc->enabled)
> + return 0;
> +
> + /*
> + * We expect HEST to provide a list of MC banks that report errors
> + * in firmware first mode. Otherwise, return non-zero value to
> + * indicate that we are done parsing HEST.
> + */
> + if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) ||
> !cmc->num_hardware_banks)
> + return 1;
> +
> + printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected
> errors.\n");
> +
> + mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1);
> + for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
> + mce_disable_bank(mc_bank->bank_number);
> +#else
> +# define acpi_disable_cmcff 1
> +#endif
> +
> + return 1;
> +}
> +
> +void __init acpi_hest_init(void)
> +{
> + acpi_status status;
> + acpi_physical_address hest_addr;
> + acpi_native_uint hest_len;
> +
> + if (acpi_disabled)
> + return;
> +
> + if (hest_disable) {
> + printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n");
> + return;
> + }
> +
> + status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len);
> + if (status == AE_NOT_FOUND)
> + goto err;
> + if (ACPI_FAILURE(status)) {
> + printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n",
> + acpi_format_exception(status));
> + goto err;
> + }
> + map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr),
> + PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr),
> + PAGE_HYPERVISOR);
> + hest_tab = __va(hest_addr);
> +
> + if (!acpi_disable_cmcff)
> + apei_hest_parse(hest_parse_cmc, NULL);
> +
> + printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n");
> + return;
> +err:
> + hest_disable = 1;
> +}
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1069,6 +1069,106 @@ void __hwdom_init setup_hwdom_pci_device
> spin_unlock(&pcidevs_lock);
> }
>
> +#ifdef CONFIG_ACPI
> +#include <acpi/acpi.h>
> +#include <acpi/apei.h>
> +
> +static int hest_match_pci(const struct acpi_hest_aer_common *p,
> + const struct pci_dev *pdev)
> +{
> + return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
> + ACPI_HEST_BUS(p->bus) == pdev->bus &&
> + p->device == PCI_SLOT(pdev->devfn) &&
> + p->function == PCI_FUNC(pdev->devfn);
> +}
> +
> +static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
> + const struct pci_dev *pdev)
> +{
> + unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
> + PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn),
> + PCI_CAP_ID_EXP);
> + u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
> + PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn),
> + pos + PCI_EXP_FLAGS),
> + PCI_EXP_FLAGS_TYPE);
> +
> + switch ( hest_hdr->type )
> + {
> + case ACPI_HEST_TYPE_AER_ROOT_PORT:
> + return pcie == PCI_EXP_TYPE_ROOT_PORT;
> + case ACPI_HEST_TYPE_AER_ENDPOINT:
> + return pcie == PCI_EXP_TYPE_ENDPOINT;
> + case ACPI_HEST_TYPE_AER_BRIDGE:
> + return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
> + PCI_CLASS_BRIDGE_PCI;
> + }
> +
> + return 0;
> +}
> +
> +struct aer_hest_parse_info {
> + const struct pci_dev *pdev;
> + bool_t firmware_first;
> +};
> +
> +static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header
> *hest_hdr)
> +{
> + if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
> + hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
> + hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
> + return 1;
> + return 0;
> +}
> +
> +static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void
> *data)
> +{
> + struct aer_hest_parse_info *info = data;
> + const struct acpi_hest_aer_common *p;
> + bool_t ff;
> +
> + if ( !hest_source_is_pcie_aer(hest_hdr) )
> + return 0;
> +
> + p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
> + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
> +
> + /*
> + * If no specific device is supplied, determine whether
> + * FIRMWARE_FIRST is set for *any* PCIe device.
> + */
> + if ( !info->pdev )
> + {
> + info->firmware_first |= ff;
> + return 0;
> + }
> +
> + /* Otherwise, check the specific device */
> + if ( p->flags & ACPI_HEST_GLOBAL ?
> + hest_match_type(hest_hdr, info->pdev) :
> + hest_match_pci(p, info->pdev) )
> + {
> + info->firmware_first = ff;
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
> +{
> + struct aer_hest_parse_info info = { .pdev = pdev };
> +
> + return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> + PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
> + apei_hest_parse(aer_hest_parse, &info) >= 0 &&
> + info.firmware_first;
> +}
> +#endif
> +
> static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
> {
> struct pci_dev *pdev;
> --- a/xen/drivers/passthrough/vtd/quirks.c
> +++ b/xen/drivers/passthrough/vtd/quirks.c
> @@ -386,9 +386,11 @@ void pci_vtd_quirk(const struct pci_dev
> int dev = PCI_SLOT(pdev->devfn);
> int func = PCI_FUNC(pdev->devfn);
> int pos;
> - u32 val;
> + bool_t ff;
> + u32 val, val2;
> u64 bar;
> paddr_t pa;
> + const char *action;
>
> if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
> PCI_VENDOR_ID_INTEL )
> @@ -438,7 +440,10 @@ void pci_vtd_quirk(const struct pci_dev
> pos = pci_find_next_ext_capability(seg, bus, pdev->devfn,
> pos,
> PCI_EXT_CAP_ID_VNDR);
> }
> + ff = 0;
> }
> + else
> + ff = pcie_aer_get_firmware_first(pdev);
> if ( !pos )
> {
> printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER
> capability?\n",
> @@ -447,18 +452,26 @@ void pci_vtd_quirk(const struct pci_dev
> }
>
> val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK);
> - pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
> - val | PCI_ERR_UNC_UNSUP);
> - val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
> - pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
> - val | PCI_ERR_COR_ADV_NFAT);
> + val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
> + if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) )
> + action = "Found masked";
> + else if ( !ff )
> + {
> + pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
> + val | PCI_ERR_UNC_UNSUP);
> + pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
> + val2 | PCI_ERR_COR_ADV_NFAT);
> + action = "Masked";
> + }
> + else
> + action = "Must not mask";
>
> /* XPUNCERRMSK Send Completion with Unsupported Request */
> val = pci_conf_read32(seg, bus, dev, func, 0x20c);
> pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4));
>
> - printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
> - seg, bus, dev, func);
> + printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n",
> + action, seg, bus, dev, func);
> break;
>
> case 0x100: case 0x104: case 0x108: /* Sandybridge */
> --- a/xen/include/acpi/actbl1.h
> +++ b/xen/include/acpi/actbl1.h
> @@ -445,6 +445,14 @@ struct acpi_hest_aer_common {
> #define ACPI_HEST_FIRMWARE_FIRST (1)
> #define ACPI_HEST_GLOBAL (1<<1)
>
> +/*
> + * Macros to access the bus/segment numbers in Bus field above:
> + * Bus number is encoded in bits 7:0
> + * Segment number is encoded in bits 23:8
> + */
> +#define ACPI_HEST_BUS(bus) ((bus) & 0xFF)
> +#define ACPI_HEST_SEGMENT(bus) (((bus) >> 8) & 0xFFFF)
> +
> /* Hardware Error Notification */
>
> struct acpi_hest_notify {
> --- a/xen/include/acpi/apei.h
> +++ b/xen/include/acpi/apei.h
> @@ -12,6 +12,9 @@
>
> #define FIX_APEI_RANGE_MAX 64
>
> +typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *);
> +int apei_hest_parse(apei_hest_func_t, void *);
> +
> int erst_write(const struct cper_record_header *record);
> ssize_t erst_get_record_count(void);
> int erst_get_next_record_id(u64 *record_id);
> --- a/xen/include/xen/acpi.h
> +++ b/xen/include/xen/acpi.h
> @@ -61,6 +61,7 @@ int acpi_boot_init (void);
> int acpi_boot_table_init (void);
> int acpi_numa_init (void);
> int erst_init(void);
> +void acpi_hest_init(void);
>
> int acpi_table_init (void);
> int acpi_table_parse(char *id, acpi_table_handler handler);
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -144,6 +144,8 @@ int pci_find_next_ext_capability(int seg
> const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus,
> unsigned int *dev, unsigned int *func);
>
> +bool_t pcie_aer_get_firmware_first(const struct pci_dev *);
> +
> struct pirq;
> int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
> void msixtbl_pt_unregister(struct domain *, struct pirq *);
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |