[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH 5/5] x86/mwait-idle: adjust the SKX C6 parameters if PC6 is disabled
From: Chen Yu <yu.c.chen@xxxxxxxxx> Because cpuidle assumes worst-case C-state parameters, PC6 parameters are used for describing C6, which is worst-case for requesting CC6. When PC6 is enabled, this is appropriate. But if PC6 is disabled in the BIOS, the exit latency and target residency should be adjusted accordingly. Exit latency: Previously the C6 exit latency was measured as the PC6 exit latency. With PC6 disabled, the C6 exit latency should be the one of CC6. Target residency: With PC6 disabled, the idle duration within [CC6, PC6) would make the idle governor choose C1E over C6. This would cause low energy-efficiency. We should lower the bar to request C6 when PC6 is disabled. To fill this gap, check if PC6 is disabled in the BIOS in the MSR_PKG_CST_CONFIG_CONTROL(0xe2) register. If so, use the CC6 exit latency for C6 and set target_residency to 3 times of the new exit latency. [This is consistent with how intel_idle driver uses _CST to calculate the target_residency.] As a result, the OS would be more likely to choose C6 over C1E when PC6 is disabled, which is reasonable, because if C6 is enabled, it implies that the user cares about energy, so choosing C6 more frequently makes sense. The new CC6 exit latency of 92us was measured with wult[1] on SKX via NIC wakeup as the 99.99th percentile. Also CLX and CPX both have the same CPU model number as SkX, but their CC6 exit latencies are similar to the SKX one, 96us and 89us respectively, so reuse the SKX value for them. There is a concern that it might be better to use a more generic approach instead of optimizing every platform. However, if the required code complexity and different PC6 bit interpretation on different platforms are taken into account, tuning the code per platform seems to be an acceptable tradeoff. Link: https://intel.github.io/wult/ # [1] Suggested-by: Len Brown <len.brown@xxxxxxxxx> Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx> Reviewed-by: Artem Bityutskiy <artem.bityutskiy@xxxxxxxxxxxxxxx> [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> [Linux commit: 64233338499126c5c31e07165735ab5441c7e45a] Pull in Linux'es MSR_PKG_CST_CONFIG_CONTROL. Alongside the dropping of "const" from skx_cstates[] add __read_mostly, and extend that to other similar non-const tables. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -484,7 +484,7 @@ static const struct cpuidle_state bdw_cs {} }; -static struct cpuidle_state skl_cstates[] = { +static struct cpuidle_state __read_mostly skl_cstates[] = { { .name = "C1-SKL", .flags = MWAIT2flg(0x00), @@ -536,7 +536,7 @@ static struct cpuidle_state skl_cstates[ {} }; -static const struct cpuidle_state skx_cstates[] = { +static struct cpuidle_state __read_mostly skx_cstates[] = { { .name = "C1-SKX", .flags = MWAIT2flg(0x00), @@ -674,7 +674,7 @@ static const struct cpuidle_state knl_cs {} }; -static struct cpuidle_state bxt_cstates[] = { +static struct cpuidle_state __read_mostly bxt_cstates[] = { { .name = "C1-BXT", .flags = MWAIT2flg(0x00), @@ -870,9 +870,9 @@ static void auto_demotion_disable(void * { u64 msr_bits; - rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); msr_bits &= ~(icpu->auto_demotion_disable_flags); - wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); + wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } static void byt_auto_demotion_disable(void *dummy) @@ -1141,7 +1141,7 @@ static void __init sklh_idle_state_table if ((mwait_substates & (MWAIT_CSTATE_MASK << 28)) == 0) return; - rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr); + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); /* PC10 is not enabled in PKG C-state limit */ if ((msr & 0xF) != 8) @@ -1161,6 +1161,36 @@ static void __init sklh_idle_state_table } /* + * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake + * idle states table. + */ +static void __init skx_idle_state_table_update(void) +{ + unsigned long long msr; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* + * 000b: C0/C1 (no package C-state support) + * 001b: C2 + * 010b: C6 (non-retention) + * 011b: C6 (retention) + * 111b: No Package C state limits. + */ + if ((msr & 0x7) < 2) { + /* + * Uses the CC6 + PC0 latency and 3 times of + * latency for target_residency if the PC6 + * is disabled in BIOS. This is consistent + * with how intel_idle driver uses _CST + * to set the target_residency. + */ + skx_cstates[2].exit_latency = 92; + skx_cstates[2].target_residency = 276; + } +} + +/* * mwait_idle_state_table_update() * * Update the default state_table for this CPU-id @@ -1178,6 +1208,9 @@ static void __init mwait_idle_state_tabl case 0x5e: /* SKL-H */ sklh_idle_state_table_update(); break; + case 0x55: /* SKL-X */ + skx_idle_state_table_update(); + break; } } --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -45,6 +45,13 @@ #define MSR_CORE_CAPABILITIES 0x000000cf #define CORE_CAPS_SPLITLOCK_DETECT (_AC(1, ULL) << 5) +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (_AC(1, ULL) << 25) +#define NHM_C1_AUTO_DEMOTE (_AC(1, ULL) << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (_AC(1, ULL) << 25) +#define SNB_C3_AUTO_UNDEMOTE (_AC(1, ULL) << 27) +#define SNB_C1_AUTO_UNDEMOTE (_AC(1, ULL) << 28) + #define MSR_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAPS_RDCL_NO (_AC(1, ULL) << 0) #define ARCH_CAPS_IBRS_ALL (_AC(1, ULL) << 1) @@ -175,11 +182,6 @@ #define MSR_IA32_A_PERFCTR0 0x000004c1 #define MSR_FSB_FREQ 0x000000cd -#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 -#define NHM_C3_AUTO_DEMOTE (1UL << 25) -#define NHM_C1_AUTO_DEMOTE (1UL << 26) -#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) - #define MSR_MTRRcap 0x000000fe #define MTRRcap_VCNT 0x000000ff
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |