[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v1 05/11] xen/x86: introduce a new amd pstate driver for cpufreq scaling
On 03.12.2024 09:11, Penny Zheng wrote: > --- a/xen/arch/x86/acpi/cpufreq/amd-pstate.c > +++ b/xen/arch/x86/acpi/cpufreq/amd-pstate.c > @@ -15,6 +15,53 @@ > #include <xen/init.h> > #include <xen/param.h> > #include <acpi/cpufreq/cpufreq.h> > +#include <asm/msr.h> > + > +#define amd_pstate_err(cpu, fmt, args...) \ > + printk(XENLOG_ERR "AMD_PSTATE: CPU%u error: " fmt, cpu, ## args) > +#define amd_pstate_verbose(fmt, args...) \ > +({ \ > + if ( cpufreq_verbose ) \ > + printk(XENLOG_DEBUG "AMD_PSTATE: " fmt, ## args); \ > +}) > +#define amd_pstate_warn(fmt, args...) \ > + printk(XENLOG_WARNING "AMD_PSTATE: CPU%u warning: " fmt, cpu, ## args) > + > +struct amd_pstate_drv_data > +{ > + struct xen_processor_cppc *cppc_data; > + union > + { > + uint64_t amd_caps; > + struct > + { > + unsigned int lowest_perf:8; > + unsigned int lowest_nonlinear_perf:8; > + unsigned int nominal_perf:8; > + unsigned int highest_perf:8; > + unsigned int :32; > + } hw; Please can this be union { uint64_t raw; struct { unsigned int lowest_perf:8; unsigned int lowest_nonlinear_perf:8; unsigned int nominal_perf:8; unsigned int highest_perf:8; unsigned int :32; }; } caps; such that at use sites (e.g. amd_pstate_write_request()) it is possible to actually spot that the same thing is being accessed through the two parts of the union? > + }; > + union > + { > + uint64_t amd_req; > + struct > + { > + unsigned int max_perf:8; > + unsigned int min_perf:8; > + unsigned int des_perf:8; > + unsigned int epp:8; > + unsigned int :32; > + } req; > + }; Along the same lines here then. > + int err; > + > + uint32_t max_freq; > + uint32_t min_freq; > + uint32_t nominal_freq; > +}; > + > +static DEFINE_PER_CPU_READ_MOSTLY(struct amd_pstate_drv_data *, > amd_pstate_drv_data); > > uint16_t __read_mostly dmi_max_speed_mhz; > > @@ -52,9 +99,298 @@ int __init amd_pstate_cmdline_parse(const char *s, const > char *e) > return 0; > } > > +/* > + * If CPPC lowest_freq and nominal_freq registers are exposed then we can > + * use them to convert perf to freq and vice versa. The conversion is > + * extrapolated as an affine function passing by the 2 points: > + * - (Low perf, Low freq) > + * - (Nominal perf, Nominal freq) > + */ > +static unsigned int amd_pstate_khz_to_perf(struct amd_pstate_drv_data *data, > unsigned int freq) Const? > +{ > + struct xen_processor_cppc* cppc_data = data->cppc_data; Nit: Misplaced *. Const? > + uint64_t mul, div, offset = 0; > + > + if ( freq == (cppc_data->nominal_freq * 1000) ) > + return data->hw.nominal_perf; > + > + if ( freq == (cppc_data->lowest_freq * 1000) ) > + return data->hw.lowest_perf; > + > + if ( (cppc_data->lowest_freq) && (cppc_data->nominal_freq) ) > + { > + mul = data->hw.nominal_perf - data->hw.lowest_perf; > + div = cppc_data->nominal_freq - cppc_data->lowest_freq; > + /* > + * We don't need to convert to kHz for computing offset and can > + * directly use nominal_freq and lowest_freq as the division > + * will remove the frequency unit. > + */ > + div = div ?: 1; > + offset = data->hw.nominal_perf - (mul * cppc_data->nominal_freq) / > div; > + } > + else > + { > + /* Read Processor Max Speed(mhz) from DMI table as anchor point */ > + mul = data->hw.highest_perf; > + div = dmi_max_speed_mhz; What if dmi_max_speed_mhz is still 0? > + } > + > + return (unsigned int)(offset + (mul * freq ) / (div * 1000)); Nit: Excess blank before a closing parenthesis. > +} > + > +static unsigned int amd_get_min_freq(struct amd_pstate_drv_data *data) > +{ > + struct xen_processor_cppc *cppc_data = data->cppc_data; > + uint64_t mul, div; > + > + if ( cppc_data->lowest_freq ) > + /* Switch to khz */ > + return cppc_data->lowest_freq * 1000; > + else Please avoid "else" when the earlier if() ends in an unconditional control flow statement. > + { > + /* Read Processor Max Speed(mhz) from DMI table as anchor point */ > + mul = dmi_max_speed_mhz; > + div = data->hw.highest_perf; > + > + return (unsigned int)(mul / div) * data->hw.lowest_perf * 1000; No risk of the cast chopping off set bits? > +static unsigned int amd_get_nominal_freq(struct amd_pstate_drv_data *data) > +{ > + struct xen_processor_cppc *cppc_data = data->cppc_data; > + uint64_t mul, div; > + > + if ( cppc_data->nominal_freq ) > + /* Switch to khz */ > + return cppc_data->nominal_freq * 1000; > + else > + { > + /* Read Processor Max Speed(mhz) from DMI table as anchor point */ > + mul = dmi_max_speed_mhz; > + div = data->hw.highest_perf; > + > + return (unsigned int)(mul / div) * data->hw.nominal_perf * 1000; > + } > +} > + > +static unsigned int amd_get_max_freq(struct amd_pstate_drv_data *data) > +{ > + uint64_t max_perf, max_freq, nominal_freq, nominal_perf; > + uint64_t boost_ratio; > + > + nominal_freq = amd_get_nominal_freq(data); > + nominal_perf = data->hw.nominal_perf; > + max_perf = data->hw.highest_perf; > + > + boost_ratio = (uint64_t)(max_perf / nominal_perf); What's the point of the cast here? > + max_freq = nominal_freq * boost_ratio; > + > + return max_freq; > +} > + > +static int cf_check amd_pstate_cpufreq_verify(struct cpufreq_policy *policy) > +{ > + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, > policy->cpu); Const? (I won't further repeat this. Please make pointers pointer-to- const wherever possible. We aim at having fully const-correct code.) > + cpufreq_verify_within_limits(policy, data->min_freq, data->max_freq); > + > + return 0; > +} > + > +static void amd_pstate_write_request_msrs(void *info) > +{ > + struct amd_pstate_drv_data *data =(struct amd_pstate_drv_data *)info; Nit: Blank after = and no need for a cast. > + if ( wrmsr_safe(MSR_AMD_CPPC_REQ, data->amd_req) ) > + { > + amd_pstate_verbose("Failed to wrmsr_safe(MSR_AMD_CPPC_REQ, %lx)\n", > + data->amd_req); If the was to ever trigger, it would likely get very noisy in the log. > + data->err = -EINVAL; > + return; > + } > + data->err = 0; > +} > + > +static int cf_check amd_pstate_write_request(int cpu, uint8_t min_perf, > + uint8_t des_perf, uint8_t > max_perf) > +{ > + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, cpu); > + uint64_t prev = data->amd_req; > + > + data->req.min_perf = min_perf; > + data->req.max_perf = max_perf; > + data->req.des_perf = des_perf; > + > + if ( prev == data->amd_req ) > + return 0; > + > + on_selected_cpus(cpumask_of(cpu), amd_pstate_write_request_msrs, data, > 1); > + > + return data->err; > +} > + > +static int cf_check amd_pstate_cpufreq_target(struct cpufreq_policy *policy, > + unsigned int target_freq, > + unsigned int relation) > +{ > + unsigned int cpu = policy->cpu; > + struct amd_pstate_drv_data *data = per_cpu(amd_pstate_drv_data, cpu); > + uint64_t max_perf, min_perf, des_perf; > + > + if ( unlikely(!target_freq) ) > + { > + amd_pstate_warn("Not setting target frequency to zero\n"); Potentially overly noisy again. > + return 0; > + } > + max_perf = data->hw.highest_perf; > + min_perf = data->hw.lowest_nonlinear_perf; > + des_perf = amd_pstate_khz_to_perf(data, target_freq); > + > + return amd_pstate_write_request(policy->cpu, min_perf, des_perf, > max_perf); The use of intermadiate variables doesn't look very helpful here. Even more so that in the course of the call 64-bit quantaties will be truncated to 8-bit ones. > +} > + > +static void cf_check amd_pstate_init_msrs(void *info) > +{ > + struct cpufreq_policy *policy = info; > + struct amd_pstate_drv_data *data = this_cpu(amd_pstate_drv_data); > + uint64_t val; > + unsigned int min_freq, nominal_freq, max_freq; > + > + /* Package level MSR */ > + if ( rdmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) Before trying this, wouldn't you better exclude certain very old families? Even looking at a random Fam17 PPR I can't spot documentation of this MSR (nor the other two), despite you merely saying Zen elsewhere (without any version number). > + { > + amd_pstate_err(policy->cpu, "rdmsr_safe(MSR_AMD_CPPC_ENABLE)\n"); > + data->err = -EINVAL; > + return; > + } > + > + if ( !(val & AMD_CPPC_ENABLE) ) > + { > + val |= AMD_CPPC_ENABLE; > + if ( wrmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) > + { > + amd_pstate_err(policy->cpu, "wrmsr_safe(MSR_AMD_CPPC_ENABLE, > %lx)\n", val); > + data->err = -EINVAL; > + return; > + } > + } Do you really need to enable befor reading ... > + if ( rdmsr_safe(MSR_AMD_CPPC_CAP1, data->amd_caps) ) ... capabilities and ... > + { > + amd_pstate_err(policy->cpu, "rdmsr_safe(MSR_AMD_CPPC_CAP1)\n"); > + goto error; > + } > + > + if ( data->hw.highest_perf == 0 || data->hw.lowest_perf == 0 || > + data->hw.nominal_perf == 0 || data->hw.lowest_nonlinear_perf == 0 ) > + { > + amd_pstate_err(policy->cpu, "Platform malfunction, read CPPC > highest_perf: %u, lowest_perf: %u, nominal_perf: %u, lowest_nonlinear_perf: > %u zero value\n", > + data->hw.highest_perf, data->hw.lowest_perf, > + data->hw.nominal_perf, > data->hw.lowest_nonlinear_perf); > + goto error; > + } > + > + min_freq = amd_get_min_freq(data); > + nominal_freq = amd_get_nominal_freq(data); > + max_freq = amd_get_max_freq(data); > + if ( min_freq > max_freq ) > + { > + amd_pstate_err(policy->cpu, "min_freq(%u) or max_freq(%u) value is > incorrect\n", > + min_freq, max_freq); > + goto error; > + } ... checking them? Error handling would be easier if you enabled only afterwards. > + policy->min = min_freq; > + policy->max = max_freq; > + > + policy->cpuinfo.min_freq = min_freq; > + policy->cpuinfo.max_freq = max_freq; > + policy->cpuinfo.perf_freq = nominal_freq; > + policy->cur = nominal_freq; > + > + /* Initial processor data capability frequencies */ > + data->min_freq = min_freq; > + data->nominal_freq = nominal_freq; > + data->max_freq = max_freq; > + > + data->err = 0; > + return; > + > + error: > + data->err = -EINVAL; > + val &= ~AMD_CPPC_ENABLE; > + if ( wrmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) > + amd_pstate_err(policy->cpu, "wrmsr_safe(MSR_AMD_CPPC_ENABLE, > %lx)\n", val); > +} > + > +/* > + * The new AMD P-States driver is different than legacy ACPI hardware > P-State, > + * which has a finer grain frequency range between the highest and lowest > + * frequency. And boost frequency is actually the frequency which is mapped > on > + * highest performance ratio. The legacy P0 frequency is actually mapped on > + * nominal performance ratio. > + */ > +static void amd_pstate_boost_init(struct cpufreq_policy *policy, struct > amd_pstate_drv_data *data) > +{ > + uint32_t highest_perf, nominal_perf; Why uint32_t when the fields read are both 8-bit only? > + highest_perf = data->hw.highest_perf; > + nominal_perf = data->hw.nominal_perf; > + > + if ( highest_perf <= nominal_perf ) > + return; > + > + policy->turbo = CPUFREQ_TURBO_ENABLED; > +} And why the helper variables in the first place? > +static int cf_check amd_pstate_cpufreq_cpu_init(struct cpufreq_policy > *policy) > +{ > + unsigned int cpu = policy->cpu; > + struct amd_pstate_drv_data *data; > + > + data = xzalloc(struct amd_pstate_drv_data); > + if ( !data ) > + return -ENOMEM; > + > + data->cppc_data = &processor_pminfo[cpu]->cppc_data; > + > + policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR; > + > + per_cpu(amd_pstate_drv_data, cpu) = data; > + > + on_selected_cpus(cpumask_of(cpu), amd_pstate_init_msrs, policy, 1); > + > + if ( data->err ) > + { > + amd_pstate_err(cpu, "Could not initialize AMD CPPC MSR properly\n"); > + per_cpu(amd_pstate_drv_data, cpu) = NULL; > + xfree(data); Please use XFREE() (really XVFREE() after respective conversion) in such cases. > + return -ENODEV; You're no undoing the policy->governor adjustment - maybe best to delay that until after all errors were handled? > --- a/xen/arch/x86/include/asm/msr-index.h > +++ b/xen/arch/x86/include/asm/msr-index.h > @@ -455,6 +455,11 @@ > #define MSR_AMD_PPIN_CTL 0xc00102f0U > #define MSR_AMD_PPIN 0xc00102f1U > > +#define MSR_AMD_CPPC_CAP1 0xc00102b0 > +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 > +#define MSR_AMD_CPPC_REQ 0xc00102b3 > +#define AMD_CPPC_ENABLE BIT(0, ULL) Bits of individual MSRs want to follow their MSR #define directly, with suitably different indentation. Please further pay attention to this comment in the file (as well as the bigger one at the top): /* * Legacy MSR constants in need of cleanup. No new MSRs below this comment. */ Jan
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |