[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] x86/intel: workaround several MONITOR/MWAIT errata



On Thu, Apr 17, 2025 at 5:19 PM Roger Pau Monne <roger.pau@xxxxxxxxxx> wrote:
>
> There are several errata on Intel regarding the usage of the MONITOR/MWAIT
> instructions, all having in common that stores to the monitored region
> might not wake up the CPU.
>
> Fix them by forcing the sending of an IPI for the affected models.
>
> The Ice Lake issue has been reproduced internally on XenServer hardware,
> and the fix does seem to prevent it.  The symptom was APs getting stuck in
> the idle loop immediately after bring up, which in turn prevented the BSP
> from making progress.  This would happen before the watchdog was
> initialized, and hence the whole system would get stuck.
>
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> Apollo and Lunar Lake fixes have not been tested, due to lack of hardware.
> ---
>  xen/arch/x86/acpi/cpu_idle.c     |  6 +++++
>  xen/arch/x86/cpu/intel.c         | 41 +++++++++++++++++++++++++++++++-
>  xen/arch/x86/include/asm/mwait.h |  3 +++
>  3 files changed, 49 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
> index 420198406def..f8f11f3c31e4 100644
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -441,8 +441,14 @@ void cpuidle_wakeup_mwait(cpumask_t *mask)
>      cpumask_andnot(mask, mask, &target);
>  }
>
> +/* Force sending of a wakeup IPI regardless of mwait usage. */
> +bool force_mwait_ipi_wakeup __read_mostly;
> +
>  bool arch_skip_send_event_check(unsigned int cpu)
>  {
> +    if ( force_mwait_ipi_wakeup )
> +        return false;
> +
>      /*
>       * This relies on softirq_pending() and mwait_wakeup() to access data
>       * on the same cache line.
> diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
> index 6a680ba38dc9..9d7c6ea297a9 100644
> --- a/xen/arch/x86/cpu/intel.c
> +++ b/xen/arch/x86/cpu/intel.c
> @@ -8,6 +8,7 @@
>  #include <asm/intel-family.h>
>  #include <asm/processor.h>
>  #include <asm/msr.h>
> +#include <asm/mwait.h>
>  #include <asm/uaccess.h>
>  #include <asm/mpspec.h>
>  #include <asm/apic.h>
> @@ -368,7 +369,6 @@ static void probe_c3_errata(const struct cpuinfo_x86 *c)
>          INTEL_FAM6_MODEL(0x25),
>          { }
>      };
> -#undef INTEL_FAM6_MODEL
>
>      /* Serialized by the AP bringup code. */
>      if ( max_cstate > 1 && (c->apicid & (c->x86_num_siblings - 1)) &&
> @@ -380,6 +380,43 @@ static void probe_c3_errata(const struct cpuinfo_x86 *c)
>      }
>  }
>
> +/*
> + * APL30: One use of the MONITOR/MWAIT instruction pair is to allow a logical
> + * processor to wait in a sleep state until a store to the armed address 
> range
> + * occurs. Due to this erratum, stores to the armed address range may not
> + * trigger MWAIT to resume execution.
> + *
> + * ICX143: Under complex microarchitectural conditions, a monitor that is 
> armed
> + * with the MWAIT instruction may not be triggered, leading to a processor
> + * hang.
> + *
> + * LNL030: Problem P-cores may not exit power state Core C6 on monitor hit.
> + *
> + * Force the sending of an IPI in those cases.
> + */
> +static void probe_mwait_errata(void)
> +{
> +    static const struct x86_cpu_id models[] = {
> +        /* Apollo Lake */
> +        INTEL_FAM6_MODEL(0x5C),
> +        /* Ice Lake */
> +        INTEL_FAM6_MODEL(0x6A),
> +        INTEL_FAM6_MODEL(0x6C),

Intel patch for Linux only adds model 0x6a, not 0x6c. Did we manage to
reproduce on 0x6c? Which patch is more correct? Surely we are on the
safer side.

> +        /* Lunar Lake */
> +        INTEL_FAM6_MODEL(0xBD),
> +        { }
> +    };
> +#undef INTEL_FAM6_MODEL
> +
> +    if ( boot_cpu_has(X86_FEATURE_MONITOR) && !force_mwait_ipi_wakeup &&
> +         x86_match_cpu(models) )
> +    {
> +        printk(XENLOG_WARNING
> +               "Forcing IPI MWAIT wakeup due to CPU erratum\n");
> +        force_mwait_ipi_wakeup = true;
> +    }
> +}
> +
>  /*
>   * P4 Xeon errata 037 workaround.
>   * Hardware prefetcher may cause stale data to be loaded into the cache.
> @@ -406,6 +443,8 @@ static void Intel_errata_workarounds(struct cpuinfo_x86 
> *c)
>                 __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
>
>         probe_c3_errata(c);
> +       if (c == &boot_cpu_data)
> +               probe_mwait_errata();
>  }
>
>
> diff --git a/xen/arch/x86/include/asm/mwait.h 
> b/xen/arch/x86/include/asm/mwait.h
> index 000a692f6d19..c52cd3f51011 100644
> --- a/xen/arch/x86/include/asm/mwait.h
> +++ b/xen/arch/x86/include/asm/mwait.h
> @@ -13,6 +13,9 @@
>
>  #define MWAIT_ECX_INTERRUPT_BREAK      0x1
>
> +/* Force sending of a wakeup IPI regardless of mwait usage. */
> +extern bool force_mwait_ipi_wakeup;
> +
>  void mwait_idle_with_hints(unsigned int eax, unsigned int ecx);
>  #ifdef CONFIG_INTEL
>  bool mwait_pc10_supported(void);

Frediano



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.