[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v1 05/11] xen/x86: introduce a new amd pstate driver for cpufreq scaling
amd-pstate is the AMD CPU performance scaling driver that introduces a new CPU frequency control mechanism on AMD Zen based CPU series. The new mechanism is based on Collaborative Processor Performance Control (CPPC) which is a finer grain frequency management than legacy ACPI hardware P-States. Current AMD CPU platforms are using the ACPI P-states driver to manage CPU frequency and clocks with switching only in 3 P-states. The new amd-pstate allows a more flexible, low-latency interface for Xen to directly communicate the performance hints to hardware. The first version "amd-pstate" could leverage common governors such as *ondemand*, *performance*, etc, to manage the performance hints. In the future, we will introduce an advanced active mode to enable autonomous performence level selection. Signed-off-by: Penny Zheng <Penny.Zheng@xxxxxxx> --- xen/arch/x86/acpi/cpufreq/amd-pstate.c | 336 +++++++++++++++++++++++++ xen/arch/x86/include/asm/msr-index.h | 5 + 2 files changed, 341 insertions(+) diff --git a/xen/arch/x86/acpi/cpufreq/amd-pstate.c b/xen/arch/x86/acpi/cpufreq/amd-pstate.c index bfad96ae3d..5dfa35581a 100644 --- a/xen/arch/x86/acpi/cpufreq/amd-pstate.c +++ b/xen/arch/x86/acpi/cpufreq/amd-pstate.c @@ -15,6 +15,53 @@ #include <xen/init.h> #include <xen/param.h> #include <acpi/cpufreq/cpufreq.h> +#include <asm/msr.h> + +#define amd_pstate_err(cpu, fmt, args...) \ + printk(XENLOG_ERR "AMD_PSTATE: CPU%u error: " fmt, cpu, ## args) +#define amd_pstate_verbose(fmt, args...) \ +({ \ + if ( cpufreq_verbose ) \ + printk(XENLOG_DEBUG "AMD_PSTATE: " fmt, ## args); \ +}) +#define amd_pstate_warn(fmt, args...) \ + printk(XENLOG_WARNING "AMD_PSTATE: CPU%u warning: " fmt, cpu, ## args) + +struct amd_pstate_drv_data +{ + struct xen_processor_cppc *cppc_data; + union + { + uint64_t amd_caps; + struct + { + unsigned int lowest_perf:8; + unsigned int lowest_nonlinear_perf:8; + unsigned int nominal_perf:8; + unsigned int highest_perf:8; + unsigned int :32; + } hw; + }; + union + { + uint64_t amd_req; + struct + { + unsigned int max_perf:8; + unsigned int min_perf:8; + unsigned int des_perf:8; + unsigned int epp:8; + unsigned int :32; + } req; + }; + int err; + + uint32_t max_freq; + uint32_t min_freq; + uint32_t nominal_freq; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct amd_pstate_drv_data *, amd_pstate_drv_data); uint16_t __read_mostly dmi_max_speed_mhz; @@ -52,9 +99,298 @@ int __init amd_pstate_cmdline_parse(const char *s, const char *e) return 0; } +/* + * If CPPC lowest_freq and nominal_freq registers are exposed then we can + * use them to convert perf to freq and vice versa. The conversion is + * extrapolated as an affine function passing by the 2 points: + * - (Low perf, Low freq) + * - (Nominal perf, Nominal freq) + */ +static unsigned int amd_pstate_khz_to_perf(struct amd_pstate_drv_data *data, unsigned int freq) +{ + struct xen_processor_cppc* cppc_data = data->cppc_data; + uint64_t mul, div, offset = 0; + + if ( freq == (cppc_data->nominal_freq * 1000) ) + return data->hw.nominal_perf; + + if ( freq == (cppc_data->lowest_freq * 1000) ) + return data->hw.lowest_perf; + + if ( (cppc_data->lowest_freq) && (cppc_data->nominal_freq) ) + { + mul = data->hw.nominal_perf - data->hw.lowest_perf; + div = cppc_data->nominal_freq - cppc_data->lowest_freq; + /* + * We don't need to convert to kHz for computing offset and can + * directly use nominal_freq and lowest_freq as the division + * will remove the frequency unit. + */ + div = div ?: 1; + offset = data->hw.nominal_perf - (mul * cppc_data->nominal_freq) / div; + } + else + { + /* Read Processor Max Speed(mhz) from DMI table as anchor point */ + mul = data->hw.highest_perf; + div = dmi_max_speed_mhz; + } + + return (unsigned int)(offset + (mul * freq ) / (div * 1000)); +} + +static unsigned int amd_get_min_freq(struct amd_pstate_drv_data *data) +{ + struct xen_processor_cppc *cppc_data = data->cppc_data; + uint64_t mul, div; + + if ( cppc_data->lowest_freq ) + /* Switch to khz */ + return cppc_data->lowest_freq * 1000; + else + { + /* Read Processor Max Speed(mhz) from DMI table as anchor point */ + mul = dmi_max_speed_mhz; + div = data->hw.highest_perf; + + return (unsigned int)(mul / div) * data->hw.lowest_perf * 1000; + } +} + +static unsigned int amd_get_nominal_freq(struct amd_pstate_drv_data *data) +{ + struct xen_processor_cppc *cppc_data = data->cppc_data; + uint64_t mul, div; + + if ( cppc_data->nominal_freq ) + /* Switch to khz */ + return cppc_data->nominal_freq * 1000; + else + { + /* Read Processor Max Speed(mhz) from DMI table as anchor point */ + mul = dmi_max_speed_mhz; + div = data->hw.highest_perf; + + return (unsigned int)(mul / div) * data->hw.nominal_perf * 1000; + } +} + +static unsigned int amd_get_max_freq(struct amd_pstate_drv_data *data) +{ + uint64_t max_perf, max_freq, nominal_freq, nominal_perf; + uint64_t boost_ratio; + + nominal_freq = amd_get_nominal_freq(data); + nominal_perf = data->hw.nominal_perf; + max_perf = data->hw.highest_perf; + + boost_ratio = (uint64_t)(max_perf / nominal_perf); + max_freq = nominal_freq * boost_ratio; + + return max_freq; +} + +static int cf_check amd_pstate_cpufreq_verify(struct cpufreq_policy *policy) +{ + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, policy->cpu); + + cpufreq_verify_within_limits(policy, data->min_freq, data->max_freq); + + return 0; +} + +static void amd_pstate_write_request_msrs(void *info) +{ + struct amd_pstate_drv_data *data =(struct amd_pstate_drv_data *)info; + + if ( wrmsr_safe(MSR_AMD_CPPC_REQ, data->amd_req) ) + { + amd_pstate_verbose("Failed to wrmsr_safe(MSR_AMD_CPPC_REQ, %lx)\n", + data->amd_req); + data->err = -EINVAL; + return; + } + data->err = 0; +} + +static int cf_check amd_pstate_write_request(int cpu, uint8_t min_perf, + uint8_t des_perf, uint8_t max_perf) +{ + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, cpu); + uint64_t prev = data->amd_req; + + data->req.min_perf = min_perf; + data->req.max_perf = max_perf; + data->req.des_perf = des_perf; + + if ( prev == data->amd_req ) + return 0; + + on_selected_cpus(cpumask_of(cpu), amd_pstate_write_request_msrs, data, 1); + + return data->err; +} + +static int cf_check amd_pstate_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int cpu = policy->cpu; + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, cpu); + uint64_t max_perf, min_perf, des_perf; + + if ( unlikely(!target_freq) ) + { + amd_pstate_warn("Not setting target frequency to zero\n"); + return 0; + } + max_perf = data->hw.highest_perf; + min_perf = data->hw.lowest_nonlinear_perf; + des_perf = amd_pstate_khz_to_perf(data, target_freq); + + return amd_pstate_write_request(policy->cpu, min_perf, des_perf, max_perf); +} + +static void cf_check amd_pstate_init_msrs(void *info) +{ + struct cpufreq_policy *policy = info; + struct amd_pstate_drv_data *data = this_cpu(amd_pstate_drv_data); + uint64_t val; + unsigned int min_freq, nominal_freq, max_freq; + + /* Package level MSR */ + if ( rdmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) + { + amd_pstate_err(policy->cpu, "rdmsr_safe(MSR_AMD_CPPC_ENABLE)\n"); + data->err = -EINVAL; + return; + } + + if ( !(val & AMD_CPPC_ENABLE) ) + { + val |= AMD_CPPC_ENABLE; + if ( wrmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) + { + amd_pstate_err(policy->cpu, "wrmsr_safe(MSR_AMD_CPPC_ENABLE, %lx)\n", val); + data->err = -EINVAL; + return; + } + } + + if ( rdmsr_safe(MSR_AMD_CPPC_CAP1, data->amd_caps) ) + { + amd_pstate_err(policy->cpu, "rdmsr_safe(MSR_AMD_CPPC_CAP1)\n"); + goto error; + } + + if ( data->hw.highest_perf == 0 || data->hw.lowest_perf == 0 || + data->hw.nominal_perf == 0 || data->hw.lowest_nonlinear_perf == 0 ) + { + amd_pstate_err(policy->cpu, "Platform malfunction, read CPPC highest_perf: %u, lowest_perf: %u, nominal_perf: %u, lowest_nonlinear_perf: %u zero value\n", + data->hw.highest_perf, data->hw.lowest_perf, + data->hw.nominal_perf, data->hw.lowest_nonlinear_perf); + goto error; + } + + min_freq = amd_get_min_freq(data); + nominal_freq = amd_get_nominal_freq(data); + max_freq = amd_get_max_freq(data); + if ( min_freq > max_freq ) + { + amd_pstate_err(policy->cpu, "min_freq(%u) or max_freq(%u) value is incorrect\n", + min_freq, max_freq); + goto error; + } + + policy->min = min_freq; + policy->max = max_freq; + + policy->cpuinfo.min_freq = min_freq; + policy->cpuinfo.max_freq = max_freq; + policy->cpuinfo.perf_freq = nominal_freq; + policy->cur = nominal_freq; + + /* Initial processor data capability frequencies */ + data->min_freq = min_freq; + data->nominal_freq = nominal_freq; + data->max_freq = max_freq; + + data->err = 0; + return; + + error: + data->err = -EINVAL; + val &= ~AMD_CPPC_ENABLE; + if ( wrmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) + amd_pstate_err(policy->cpu, "wrmsr_safe(MSR_AMD_CPPC_ENABLE, %lx)\n", val); +} + +/* + * The new AMD P-States driver is different than legacy ACPI hardware P-State, + * which has a finer grain frequency range between the highest and lowest + * frequency. And boost frequency is actually the frequency which is mapped on + * highest performance ratio. The legacy P0 frequency is actually mapped on + * nominal performance ratio. + */ +static void amd_pstate_boost_init(struct cpufreq_policy *policy, struct amd_pstate_drv_data *data) +{ + uint32_t highest_perf, nominal_perf; + + highest_perf = data->hw.highest_perf; + nominal_perf = data->hw.nominal_perf; + + if ( highest_perf <= nominal_perf ) + return; + + policy->turbo = CPUFREQ_TURBO_ENABLED; +} + +static int cf_check amd_pstate_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct amd_pstate_drv_data *data; + + data = xzalloc(struct amd_pstate_drv_data); + if ( !data ) + return -ENOMEM; + + data->cppc_data = &processor_pminfo[cpu]->cppc_data; + + policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR; + + per_cpu(amd_pstate_drv_data, cpu) = data; + + on_selected_cpus(cpumask_of(cpu), amd_pstate_init_msrs, policy, 1); + + if ( data->err ) + { + amd_pstate_err(cpu, "Could not initialize AMD CPPC MSR properly\n"); + per_cpu(amd_pstate_drv_data, cpu) = NULL; + xfree(data); + return -ENODEV; + } + + amd_pstate_boost_init(policy, data); + return 0; +} + +static int cf_check amd_pstate_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, policy->cpu); + + per_cpu(amd_pstate_drv_data, policy->cpu) = NULL; + xfree(data); + + return 0; +} + static const struct cpufreq_driver __initconstrel amd_pstate_cpufreq_driver = { .name = XEN_AMD_PSTATE_DRIVER_NAME, + .verify = amd_pstate_cpufreq_verify, + .target = amd_pstate_cpufreq_target, + .init = amd_pstate_cpufreq_cpu_init, + .exit = amd_pstate_cpufreq_cpu_exit, }; int __init amd_pstate_register_driver(void) diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h index 9cdb5b2625..14eaddcaad 100644 --- a/xen/arch/x86/include/asm/msr-index.h +++ b/xen/arch/x86/include/asm/msr-index.h @@ -455,6 +455,11 @@ #define MSR_AMD_PPIN_CTL 0xc00102f0U #define MSR_AMD_PPIN 0xc00102f1U +#define MSR_AMD_CPPC_CAP1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_REQ 0xc00102b3 +#define AMD_CPPC_ENABLE BIT(0, ULL) + /* VIA Cyrix defined MSRs*/ #define MSR_VIA_FCR 0x00001107 #define MSR_VIA_RNG 0x0000110b -- 2.34.1
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |