[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] VT-d: honor APEI firmware-first mode in XSA-59 workaround code



On 04/06/14 09:29, Jan Beulich wrote:
> When firmware-first mode is being indicated by firmware, we shouldn't
> be modifying AER registers - these are considered to be owned by
> firmware in that case. Violating this is being reported to result in
> SMI storms. While circumventing the workaround means re-exposing
> affected hosts to the XSA-59 issues, this in any event seems better
> than not booting at all. Respective messages are being issued to the
> log, so the situation can be diagnosed.
>
> The basic building blocks were taken from Linux 3.15-rc. Note that
> this includes a block of code enclosed in #ifdef CONFIG_X86_MCE - we
> don't define that symbol, and that code also wouldn't build without
> suitable machine check side code added; that should happen eventually,
> but isn't subject of this change.
>
> Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Reported-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> Tested-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>

Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

> ---
> This is unchanged (other than the Tested-by tag added) from the v2 RFC
> posting.
>
> --- a/xen/arch/x86/acpi/boot.c
> +++ b/xen/arch/x86/acpi/boot.c
> @@ -754,6 +754,8 @@ int __init acpi_boot_init(void)
>  
>       erst_init();
>  
> +     acpi_hest_init();
> +
>       acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt);
>  
>       return 0;
> --- a/xen/drivers/acpi/apei/Makefile
> +++ b/xen/drivers/acpi/apei/Makefile
> @@ -1,3 +1,4 @@
>  obj-y += erst.o
> +obj-y += hest.o
>  obj-y += apei-base.o
>  obj-y += apei-io.o
> --- /dev/null
> +++ b/xen/drivers/acpi/apei/hest.c
> @@ -0,0 +1,200 @@
> +/*
> + * APEI Hardware Error Souce Table support
> + *
> + * HEST describes error sources in detail; communicates operational
> + * parameters (i.e. severity levels, masking bits, and threshold
> + * values) to Linux as necessary. It also allows the BIOS to report
> + * non-standard error sources to Linux (for example, chipset-specific
> + * error registers).
> + *
> + * For more information about HEST, please refer to ACPI Specification
> + * version 4.0, section 17.3.2.
> + *
> + * Copyright 2009 Intel Corp.
> + *   Author: Huang Ying <ying.huang@xxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation;
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#include <xen/errno.h>
> +#include <xen/init.h>
> +#include <xen/kernel.h>
> +#include <xen/mm.h>
> +#include <xen/pfn.h>
> +#include <acpi/acpi.h>
> +#include <acpi/apei.h>
> +
> +#include "apei-internal.h"
> +
> +#define HEST_PFX "HEST: "
> +
> +static bool_t hest_disable;
> +boolean_param("hest_disable", hest_disable);
> +
> +/* HEST table parsing */
> +
> +static struct acpi_table_hest *__read_mostly hest_tab;
> +
> +static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
> +     [ACPI_HEST_TYPE_IA32_CHECK] = -1,       /* need further calculation */
> +     [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
> +     [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
> +     [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
> +     [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
> +     [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
> +     [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
> +};
> +
> +static int hest_esrc_len(const struct acpi_hest_header *hest_hdr)
> +{
> +     u16 hest_type = hest_hdr->type;
> +     int len;
> +
> +     if (hest_type >= ACPI_HEST_TYPE_RESERVED)
> +             return 0;
> +
> +     len = hest_esrc_len_tab[hest_type];
> +
> +     if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
> +             const struct acpi_hest_ia_corrected *cmc =
> +                     container_of(hest_hdr,
> +                                  const struct acpi_hest_ia_corrected,
> +                                  header);
> +
> +             len = sizeof(*cmc) + cmc->num_hardware_banks *
> +                   sizeof(struct acpi_hest_ia_error_bank);
> +     } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
> +             const struct acpi_hest_ia_machine_check *mc =
> +                     container_of(hest_hdr,
> +                                  const struct acpi_hest_ia_machine_check,
> +                                  header);
> +
> +             len = sizeof(*mc) + mc->num_hardware_banks *
> +                   sizeof(struct acpi_hest_ia_error_bank);
> +     }
> +     BUG_ON(len == -1);
> +
> +     return len;
> +};
> +
> +int apei_hest_parse(apei_hest_func_t func, void *data)
> +{
> +     struct acpi_hest_header *hest_hdr;
> +     int i, rc, len;
> +
> +     if (hest_disable || !hest_tab)
> +             return -EINVAL;
> +
> +     hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
> +     for (i = 0; i < hest_tab->error_source_count; i++) {
> +             len = hest_esrc_len(hest_hdr);
> +             if (!len) {
> +                     printk(XENLOG_WARNING HEST_PFX
> +                            "Unknown or unused hardware error source "
> +                            "type: %d for hardware error source: %d\n",
> +                            hest_hdr->type, hest_hdr->source_id);
> +                     return -EINVAL;
> +             }
> +             if ((void *)hest_hdr + len >
> +                 (void *)hest_tab + hest_tab->header.length) {
> +                     printk(XENLOG_WARNING HEST_PFX
> +                            "Table contents overflow for hardware error 
> source: %d\n",
> +                            hest_hdr->source_id);
> +                     return -EINVAL;
> +             }
> +
> +             rc = func(hest_hdr, data);
> +             if (rc)
> +                     return rc;
> +
> +             hest_hdr = (void *)hest_hdr + len;
> +     }
> +
> +     return 0;
> +}
> +
> +/*
> + * Check if firmware advertises firmware first mode. We need FF bit to be set
> + * along with a set of MC banks which work in FF mode.
> + */
> +static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr,
> +                              void *data)
> +{
> +#ifdef CONFIG_X86_MCE
> +     unsigned int i;
> +     const struct acpi_hest_ia_corrected *cmc;
> +     const struct acpi_hest_ia_error_bank *mc_bank;
> +
> +     if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
> +             return 0;
> +
> +     cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected, 
> header);
> +     if (!cmc->enabled)
> +             return 0;
> +
> +     /*
> +      * We expect HEST to provide a list of MC banks that report errors
> +      * in firmware first mode. Otherwise, return non-zero value to
> +      * indicate that we are done parsing HEST.
> +      */
> +     if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || 
> !cmc->num_hardware_banks)
> +             return 1;
> +
> +     printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected 
> errors.\n");
> +
> +     mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1);
> +     for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
> +             mce_disable_bank(mc_bank->bank_number);
> +#else
> +# define acpi_disable_cmcff 1
> +#endif
> +
> +     return 1;
> +}
> +
> +void __init acpi_hest_init(void)
> +{
> +     acpi_status status;
> +     acpi_physical_address hest_addr;
> +     acpi_native_uint hest_len;
> +
> +     if (acpi_disabled)
> +             return;
> +
> +     if (hest_disable) {
> +             printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n");
> +             return;
> +     }
> +
> +     status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len);
> +     if (status == AE_NOT_FOUND)
> +             goto err;
> +     if (ACPI_FAILURE(status)) {
> +             printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n",
> +                    acpi_format_exception(status));
> +             goto err;
> +     }
> +     map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr),
> +                      PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr),
> +                      PAGE_HYPERVISOR);
> +     hest_tab = __va(hest_addr);
> +
> +     if (!acpi_disable_cmcff)
> +             apei_hest_parse(hest_parse_cmc, NULL);
> +
> +     printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n");
> +     return;
> +err:
> +     hest_disable = 1;
> +}
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1069,6 +1069,106 @@ void __hwdom_init setup_hwdom_pci_device
>      spin_unlock(&pcidevs_lock);
>  }
>  
> +#ifdef CONFIG_ACPI
> +#include <acpi/acpi.h>
> +#include <acpi/apei.h>
> +
> +static int hest_match_pci(const struct acpi_hest_aer_common *p,
> +                          const struct pci_dev *pdev)
> +{
> +    return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
> +           ACPI_HEST_BUS(p->bus)     == pdev->bus &&
> +           p->device                 == PCI_SLOT(pdev->devfn) &&
> +           p->function               == PCI_FUNC(pdev->devfn);
> +}
> +
> +static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
> +                              const struct pci_dev *pdev)
> +{
> +    unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
> +                                           PCI_SLOT(pdev->devfn),
> +                                           PCI_FUNC(pdev->devfn),
> +                                           PCI_CAP_ID_EXP);
> +    u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
> +                                        PCI_SLOT(pdev->devfn),
> +                                        PCI_FUNC(pdev->devfn),
> +                                        pos + PCI_EXP_FLAGS),
> +                        PCI_EXP_FLAGS_TYPE);
> +
> +    switch ( hest_hdr->type )
> +    {
> +    case ACPI_HEST_TYPE_AER_ROOT_PORT:
> +        return pcie == PCI_EXP_TYPE_ROOT_PORT;
> +    case ACPI_HEST_TYPE_AER_ENDPOINT:
> +        return pcie == PCI_EXP_TYPE_ENDPOINT;
> +    case ACPI_HEST_TYPE_AER_BRIDGE:
> +        return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> +                               PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
> +               PCI_CLASS_BRIDGE_PCI;
> +    }
> +
> +    return 0;
> +}
> +
> +struct aer_hest_parse_info {
> +    const struct pci_dev *pdev;
> +    bool_t firmware_first;
> +};
> +
> +static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header 
> *hest_hdr)
> +{
> +    if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
> +         hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
> +         hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
> +        return 1;
> +    return 0;
> +}
> +
> +static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void 
> *data)
> +{
> +    struct aer_hest_parse_info *info = data;
> +    const struct acpi_hest_aer_common *p;
> +    bool_t ff;
> +
> +    if ( !hest_source_is_pcie_aer(hest_hdr) )
> +        return 0;
> +
> +    p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
> +    ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
> +
> +    /*
> +     * If no specific device is supplied, determine whether
> +     * FIRMWARE_FIRST is set for *any* PCIe device.
> +     */
> +    if ( !info->pdev )
> +    {
> +        info->firmware_first |= ff;
> +        return 0;
> +    }
> +
> +    /* Otherwise, check the specific device */
> +    if ( p->flags & ACPI_HEST_GLOBAL ?
> +         hest_match_type(hest_hdr, info->pdev) :
> +         hest_match_pci(p, info->pdev) )
> +    {
> +        info->firmware_first = ff;
> +        return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
> +{
> +    struct aer_hest_parse_info info = { .pdev = pdev };
> +
> +    return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> +                               PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
> +           apei_hest_parse(aer_hest_parse, &info) >= 0 &&
> +           info.firmware_first;
> +}
> +#endif
> +
>  static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
>  {
>      struct pci_dev *pdev;
> --- a/xen/drivers/passthrough/vtd/quirks.c
> +++ b/xen/drivers/passthrough/vtd/quirks.c
> @@ -386,9 +386,11 @@ void pci_vtd_quirk(const struct pci_dev 
>      int dev = PCI_SLOT(pdev->devfn);
>      int func = PCI_FUNC(pdev->devfn);
>      int pos;
> -    u32 val;
> +    bool_t ff;
> +    u32 val, val2;
>      u64 bar;
>      paddr_t pa;
> +    const char *action;
>  
>      if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
>           PCI_VENDOR_ID_INTEL )
> @@ -438,7 +440,10 @@ void pci_vtd_quirk(const struct pci_dev 
>                  pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, 
> pos,
>                                                     PCI_EXT_CAP_ID_VNDR);
>              }
> +            ff = 0;
>          }
> +        else
> +            ff = pcie_aer_get_firmware_first(pdev);
>          if ( !pos )
>          {
>              printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER 
> capability?\n",
> @@ -447,18 +452,26 @@ void pci_vtd_quirk(const struct pci_dev 
>          }
>  
>          val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK);
> -        pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
> -                         val | PCI_ERR_UNC_UNSUP);
> -        val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
> -        pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
> -                         val | PCI_ERR_COR_ADV_NFAT);
> +        val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
> +        if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) )
> +            action = "Found masked";
> +        else if ( !ff )
> +        {
> +            pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
> +                             val | PCI_ERR_UNC_UNSUP);
> +            pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
> +                             val2 | PCI_ERR_COR_ADV_NFAT);
> +            action = "Masked";
> +        }
> +        else
> +            action = "Must not mask";
>  
>          /* XPUNCERRMSK Send Completion with Unsupported Request */
>          val = pci_conf_read32(seg, bus, dev, func, 0x20c);
>          pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4));
>  
> -        printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
> -               seg, bus, dev, func);
> +        printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n",
> +               action, seg, bus, dev, func);
>          break;
>  
>      case 0x100: case 0x104: case 0x108: /* Sandybridge */
> --- a/xen/include/acpi/actbl1.h
> +++ b/xen/include/acpi/actbl1.h
> @@ -445,6 +445,14 @@ struct acpi_hest_aer_common {
>  #define ACPI_HEST_FIRMWARE_FIRST        (1)
>  #define ACPI_HEST_GLOBAL                (1<<1)
>  
> +/*
> + * Macros to access the bus/segment numbers in Bus field above:
> + *  Bus number is encoded in bits 7:0
> + *  Segment number is encoded in bits 23:8
> + */
> +#define ACPI_HEST_BUS(bus)              ((bus) & 0xFF)
> +#define ACPI_HEST_SEGMENT(bus)          (((bus) >> 8) & 0xFFFF)
> +
>  /* Hardware Error Notification */
>  
>  struct acpi_hest_notify {
> --- a/xen/include/acpi/apei.h
> +++ b/xen/include/acpi/apei.h
> @@ -12,6 +12,9 @@
>  
>  #define FIX_APEI_RANGE_MAX 64
>  
> +typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *);
> +int apei_hest_parse(apei_hest_func_t, void *);
> +
>  int erst_write(const struct cper_record_header *record);
>  ssize_t erst_get_record_count(void);
>  int erst_get_next_record_id(u64 *record_id);
> --- a/xen/include/xen/acpi.h
> +++ b/xen/include/xen/acpi.h
> @@ -61,6 +61,7 @@ int acpi_boot_init (void);
>  int acpi_boot_table_init (void);
>  int acpi_numa_init (void);
>  int erst_init(void);
> +void acpi_hest_init(void);
>  
>  int acpi_table_init (void);
>  int acpi_table_parse(char *id, acpi_table_handler handler);
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -144,6 +144,8 @@ int pci_find_next_ext_capability(int seg
>  const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus,
>                        unsigned int *dev, unsigned int *func);
>  
> +bool_t pcie_aer_get_firmware_first(const struct pci_dev *);
> +
>  struct pirq;
>  int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
>  void msixtbl_pt_unregister(struct domain *, struct pirq *);
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.