[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v4 09/15] xen/x86: introduce a new amd cppc driver for cpufreq scaling
On 14.04.2025 09:40, Penny Zheng wrote: > --- a/xen/arch/x86/acpi/cpufreq/amd-cppc.c > +++ b/xen/arch/x86/acpi/cpufreq/amd-cppc.c > @@ -14,7 +14,56 @@ > #include <xen/domain.h> > #include <xen/init.h> > #include <xen/param.h> > +#include <xen/percpu.h> > +#include <xen/xvmalloc.h> > #include <acpi/cpufreq/cpufreq.h> > +#include <asm/amd.h> > +#include <asm/msr-index.h> > + > +#define amd_cppc_err(cpu, fmt, args...) \ > + printk(XENLOG_ERR "AMD_CPPC: CPU%u error: " fmt, cpu, ## args) > +#define amd_cppc_warn(cpu, fmt, args...) \ > + printk(XENLOG_WARNING "AMD_CPPC: CPU%u warning: " fmt, cpu, ## args) > +#define amd_cppc_verbose(cpu, fmt, args...) \ > +({ \ > + if ( cpufreq_verbose ) \ > + printk(XENLOG_DEBUG "AMD_CPPC: CPU%u " fmt, cpu, ## args); \ > +}) > + > +struct amd_cppc_drv_data > +{ > + const struct xen_processor_cppc *cppc_data; > + union { > + uint64_t raw; > + struct { > + unsigned int lowest_perf:8; > + unsigned int lowest_nonlinear_perf:8; > + unsigned int nominal_perf:8; > + unsigned int highest_perf:8; > + unsigned int :32; > + }; > + } caps; > + union { > + uint64_t raw; > + struct { > + unsigned int max_perf:8; > + unsigned int min_perf:8; > + unsigned int des_perf:8; > + unsigned int epp:8; > + unsigned int :32; > + }; > + } req; > + > + int err; > +}; > + > +static DEFINE_PER_CPU_READ_MOSTLY(struct amd_cppc_drv_data *, > + amd_cppc_drv_data); > +/* > + * Core max frequency read from PstateDef as anchor point > + * for freq-to-perf transition > + */ > +static DEFINE_PER_CPU_READ_MOSTLY(uint64_t, amd_max_pxfreq_mhz); Throughout here (and maybe also further down) - is the amd_ prefix really needed everywhere? Even the cppc part of the identifiers seems excessive in at least some of the cases. For this last one, also once again see the type related comment on patch 07. > @@ -51,10 +100,354 @@ int __init amd_cppc_cmdline_parse(const char *s, const > char *e) > return 0; > } > > +/* > + * If CPPC lowest_freq and nominal_freq registers are exposed then we can > + * use them to convert perf to freq and vice versa. The conversion is > + * extrapolated as an linear function passing by the 2 points: > + * - (Low perf, Low freq) > + * - (Nominal perf, Nominal freq) > + */ > +static int amd_cppc_khz_to_perf(const struct amd_cppc_drv_data *data, > + unsigned int freq, uint8_t *perf) > +{ > + const struct xen_processor_cppc *cppc_data = data->cppc_data; > + uint64_t mul, div; > + int offset = 0, res; > + > + if ( freq == (cppc_data->cpc.nominal_mhz * 1000) ) I'm pretty sure I commented on this before: The expression here _suggests_ that "freq" is in kHz, but that's not being made explicit anywhere. > + { > + *perf = data->caps.nominal_perf; > + return 0; > + } > + > + if ( freq == (cppc_data->cpc.lowest_mhz * 1000) ) > + { > + *perf = data->caps.lowest_perf; > + return 0; > + } How likely is it that these two early return paths are taken, when the incoming "freq" is 25 or 5 MHz granular? IOW - is it relevant to shortcut these two cases? > + if ( cppc_data->cpc.lowest_mhz && cppc_data->cpc.nominal_mhz && > + cppc_data->cpc.lowest_mhz < cppc_data->cpc.nominal_mhz ) Along the lines of a comment on an earlier patch, the middle part of the condition here is redundant with the 3rd one. Also, don't you check this relation already during init? IOW isn't it the 3rd part which can be dropped? > + { > + mul = data->caps.nominal_perf - data->caps.lowest_perf; > + div = cppc_data->cpc.nominal_mhz - cppc_data->cpc.lowest_mhz; > + > + /* > + * We don't need to convert to kHz for computing offset and can > + * directly use nominal_mhz and lowest_mhz as the division > + * will remove the frequency unit. > + */ > + offset = data->caps.nominal_perf - > + (mul * cppc_data->cpc.nominal_mhz) / div; > + } > + else > + { > + /* Read Processor Max Speed(MHz) as anchor point */ > + mul = data->caps.highest_perf; > + div = this_cpu(amd_max_pxfreq_mhz); > + if ( !div || div == INVAL_FREQ_MHZ ) Seeing this - do we need INVAL_FREQ_MHZ at all? Isn't 0 good enough an indicator of "data not available"? > + return -EINVAL; > + } > + > + res = offset + (mul * freq) / (div * 1000); > + if ( res > UINT8_MAX ) > + { > + printk_once(XENLOG_WARNING > + "Perf value exceeds maximum value 255: %d\n", res); > + *perf = 0xff; > + return 0; > + } else if ( res < 0 ) Nit: Style. And no real need for "else" here anyway (or alternatively for the "return 0", with an "else" added below). > + { > + printk_once(XENLOG_WARNING > + "Perf value smaller than minimum value 0: %d\n", res); > + *perf = 0; > + return 0; > + } > + *perf = (uint8_t)res; We don't normally spell out such conversions (i.e. please drop the cast). > + return 0; > +} > + > +static int amd_get_lowest_or_nominal_freq(const struct amd_cppc_drv_data > *data, Nothing in the function looks to limit it to "nominal" or "lowest", so them being in the identifier feels misleading. > + uint32_t cpc_mhz, uint8_t perf, > + unsigned int *freq) > +{ > + uint64_t mul, div, res; > + > + if ( !freq ) > + return -EINVAL; Is this needed? It's an internal function. > + if ( cpc_mhz ) > + { > + /* Switch to khz */ > + *freq = cpc_mhz * 1000; > + return 0; > + } > + > + /* Read Processor Max Speed(MHz) as anchor point */ > + mul = this_cpu(amd_max_pxfreq_mhz); > + if ( mul == INVAL_FREQ_MHZ || !mul ) > + { > + printk(XENLOG_ERR > + "Failed to read valid processor max frequency as anchor > point: %lu\n", > + mul); > + return -EINVAL; > + } > + div = data->caps.highest_perf; > + res = (mul * perf * 1000) / div; While there is a comment further up, clarifying function-wide that the result is to be in kHz would perhaps be better. > + if ( res > UINT_MAX || !res ) > + { > + printk(XENLOG_ERR > + "Frequeny exceeds maximum value UINT_MAX or being zero value: > %lu\n", Just "out of range"? > + res); > + return -EINVAL; And then -ERANGE here? > + } > + *freq = (unsigned int)res; See above. > +static int amd_get_max_freq(const struct amd_cppc_drv_data *data, > + unsigned int *max_freq) > +{ > + unsigned int nom_freq = 0, boost_ratio; > + int res; > + > + res = amd_get_lowest_or_nominal_freq(data, > + data->cppc_data->cpc.nominal_mhz, > + data->caps.nominal_perf, > + &nom_freq); > + if ( res ) > + return res; > + > + boost_ratio = (unsigned int)(data->caps.highest_perf / > + data->caps.nominal_perf); I may have seen logic ensuring nominal_perf isn't 0, so that part may be fine. What guarantees this division to yield a positive value, though? If it yields zero (say 0xff / 0x80), ... > + *max_freq = nom_freq * boost_ratio; ... zero will be reported back here. I think you want to scale the calculations here to avoid such. > +static void cf_check amd_cppc_init_msrs(void *info) > +{ > + struct cpufreq_policy *policy = info; > + struct amd_cppc_drv_data *data = this_cpu(amd_cppc_drv_data); > + uint64_t val; > + unsigned int min_freq = 0, nominal_freq = 0, max_freq; > + > + /* Package level MSR */ > + rdmsrl(MSR_AMD_CPPC_ENABLE, val); > + /* > + * Only when Enable bit is on, the hardware will calculate the > processor’s > + * performance capabilities and initialize the performance level fields > in > + * the CPPC capability registers. > + */ > + if ( !(val & AMD_CPPC_ENABLE) ) > + { > + val |= AMD_CPPC_ENABLE; > + wrmsrl(MSR_AMD_CPPC_ENABLE, val); > + } > + > + rdmsrl(MSR_AMD_CPPC_CAP1, data->caps.raw); > + > + if ( data->caps.highest_perf == 0 || data->caps.lowest_perf == 0 || > + data->caps.nominal_perf == 0 || data->caps.lowest_nonlinear_perf == > 0 || > + data->caps.lowest_perf > data->caps.lowest_nonlinear_perf || Same question as asked elsewhere - where is this relation specified? > + data->caps.lowest_nonlinear_perf > data->caps.nominal_perf || > + data->caps.nominal_perf > data->caps.highest_perf ) > + { > + amd_cppc_err(policy->cpu, > + "Platform malfunction, read CPPC capability > highest(%u), lowest(%u), nominal(%u), lowest_nonlinear(%u) zero value\n", The message wants shortening if at all possible, and it wants to not be misleading (saying "zero" when the issue may be something else). > + data->caps.highest_perf, data->caps.lowest_perf, > + data->caps.nominal_perf, > data->caps.lowest_nonlinear_perf); > + goto err; > + } > + > + amd_process_freq(cpu_data + policy->cpu, &cpu_data[policy->cpu] is generally preferred in such cases. > +static int cf_check amd_cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) > +{ > + unsigned int cpu = policy->cpu; > + struct amd_cppc_drv_data *data; > + const struct cpuinfo_x86 *c = cpu_data + cpu; > + > + data = xvzalloc(struct amd_cppc_drv_data); > + if ( !data ) > + return -ENOMEM; > + > + data->cppc_data = &processor_pminfo[cpu]->cppc_data; > + > + per_cpu(amd_cppc_drv_data, cpu) = data; > + > + /* Feature CPPC is firstly introduced on Zen2 */ > + if ( c->x86 < 0x17 ) > + { > + printk_once("Unsupported cpu family: %x\n", c->x86); > + return -EOPNOTSUPP; > + } With the cpu_has_cppc check in amd_cppc_register_driver(), is this check really needed? Jan
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |