|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v6 06/15] cpufreq: Add Hardware P-State (HWP) driver
On Mon, Jul 24, 2023 at 12:15 PM Jan Beulich <jbeulich@xxxxxxxx> wrote:
>
> On 24.07.2023 14:58, Jason Andryuk wrote:
> > --- /dev/null
> > +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> > @@ -0,0 +1,521 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * hwp.c cpufreq driver to run Intel Hardware P-States (HWP)
> > + *
> > + * Copyright (C) 2021 Jason Andryuk <jandryuk@xxxxxxxxx>
> > + */
> > +
> > +#include <xen/cpumask.h>
> > +#include <xen/init.h>
> > +#include <xen/param.h>
> > +#include <xen/xmalloc.h>
> > +#include <asm/msr.h>
> > +#include <acpi/cpufreq/cpufreq.h>
> > +
> > +static bool __ro_after_init feature_hwp_notification;
> > +static bool __ro_after_init feature_hwp_activity_window;
> > +
> > +static bool __ro_after_init feature_hdc;
> > +
> > +static bool __ro_after_init opt_cpufreq_hdc = true;
> > +
> > +union hwp_request
> > +{
> > + struct
> > + {
> > + unsigned int min_perf:8;
> > + unsigned int max_perf:8;
> > + unsigned int desired:8;
> > + unsigned int energy_perf:8;
> > + unsigned int activity_window:10;
> > + bool package_control:1;
> > + unsigned int :16;
> > + bool activity_window_valid:1;
> > + bool energy_perf_valid:1;
> > + bool desired_valid:1;
> > + bool max_perf_valid:1;
> > + bool min_perf_valid:1;
> > + };
> > + uint64_t raw;
> > +};
> > +
> > +struct hwp_drv_data
> > +{
> > + union
> > + {
> > + uint64_t hwp_caps;
> > + struct
> > + {
> > + unsigned int highest:8;
> > + unsigned int guaranteed:8;
> > + unsigned int most_efficient:8;
> > + unsigned int lowest:8;
> > + unsigned int :32;
> > + } hw;
> > + };
> > + union hwp_request curr_req;
> > + int ret;
> > + uint16_t activity_window;
> > + uint8_t minimum;
> > + uint8_t maximum;
> > + uint8_t desired;
> > + uint8_t energy_perf;
> > +};
> > +static DEFINE_PER_CPU_READ_MOSTLY(struct hwp_drv_data *, hwp_drv_data);
> > +
> > +#define hwp_err(cpu, fmt, ...) \
> > + printk(XENLOG_ERR "HWP: CPU%u error: " fmt, cpu, ##__VA_ARGS__)
> > +#define hwp_info(fmt, ...) printk(XENLOG_INFO "HWP: " fmt,
> > ##__VA_ARGS__)
>
> I'm not convinced ", ##__VA_ARGS__" is a good construct to use. I notice
> we have a few instances (mainly in code inherited from elsewhere), but I
> think it ought to either be plain C99 style (without the ##) or purely
> the gcc extension form (not using __VA_ARGS__).
Using plain __VA_ARGS__ doesn't work for the cases without arguments:
arch/x86/acpi/cpufreq/hwp.c:78:53: error: expected expression before ‘)’ token
78 | printk(XENLOG_DEBUG "HWP: " fmt, __VA_ARGS__); \
| ^
arch/x86/acpi/cpufreq/hwp.c:201:9: note: in expansion of macro ‘hwp_verbose’
201 | hwp_verbose("disabled: No energy/performance
preference available");
| ^~~~~~~~~~~
I can use "__VA_OPT__(,) __VA_ARGS__" though.
> > +#define hwp_verbose(fmt, ...) \
> > +({ \
> > + if ( cpufreq_verbose ) \
> > + printk(XENLOG_DEBUG "HWP: " fmt, ##__VA_ARGS__); \
>
> (One more here then.)
>
> > +static bool hwp_handle_option(const char *s, const char *end)
>
> __init?
Yes, thanks.
> > +static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
> > +{
> > + uint32_t base_khz, max_khz, bus_khz, edx;
> > +
> > + cpuid(0x16, &base_khz, &max_khz, &bus_khz, &edx);
> > +
> > + policy->cpuinfo.perf_freq = base_khz * 1000;
> > + policy->cpuinfo.min_freq = base_khz * 1000;
> > + policy->cpuinfo.max_freq = max_khz * 1000;
> > + policy->min = base_khz * 1000;
> > + policy->max = max_khz * 1000;
>
> Earlier on I asked what about cases where the CPUID output yields
> some zero values (as I know can happen). Iirc you said this doesn't
> affect functionality, but wouldn't it make sense to have a comment
> to this effect here (proving the cases were thought through).
Sure.
> > +static void cf_check hwp_init_msrs(void *info)
> > +{
> > + struct cpufreq_policy *policy = info;
> > + struct hwp_drv_data *data = this_cpu(hwp_drv_data);
> > + uint64_t val;
> > +
> > + /*
> > + * Package level MSR, but we don't have a good idea of packages here,
> > so
> > + * just do it everytime.
> > + */
> > + if ( rdmsr_safe(MSR_PM_ENABLE, val) )
> > + {
> > + hwp_err(policy->cpu, "rdmsr_safe(MSR_PM_ENABLE)\n");
> > + data->curr_req.raw = -1;
> > + return;
> > + }
> > +
> > + /* Ensure we don't generate interrupts */
> > + if ( feature_hwp_notification )
> > + wrmsr_safe(MSR_HWP_INTERRUPT, 0);
> > +
> > + if ( !(val & PM_ENABLE_HWP_ENABLE) )
> > + {
> > + val |= PM_ENABLE_HWP_ENABLE;
> > + if ( wrmsr_safe(MSR_PM_ENABLE, val) )
> > + {
> > + hwp_err(policy->cpu, "wrmsr_safe(MSR_PM_ENABLE, %lx)\n", val);
> > + data->curr_req.raw = -1;
> > + return;
> > + }
> > + }
> > +
> > + if ( rdmsr_safe(MSR_HWP_CAPABILITIES, data->hwp_caps) )
> > + {
> > + hwp_err(policy->cpu, "rdmsr_safe(MSR_HWP_CAPABILITIES)\n");
> > + goto error;
> > + }
> > +
> > + if ( rdmsr_safe(MSR_HWP_REQUEST, data->curr_req.raw) )
> > + {
> > + hwp_err(policy->cpu, "rdmsr_safe(MSR_HWP_REQUEST)\n");
> > + goto error;
> > + }
> > +
> > + /*
> > + * Check for APERF/MPERF support in hardware
> > + * also check for boost/turbo support
> > + */
> > + intel_feature_detect(policy);
>
> Nit: The comment could do with adding at least a comma or semicolon.
Will change to "Check for turbo support." APERF/MPERF was removed
from intel_feature_detect() in commit 4dd16c44152f ("x86/cpufreq:
Rework APERF/MPERF handling").
> > + if ( feature_hdc &&
> > + (!hdc_set_pkg_hdc_ctl(policy->cpu, opt_cpufreq_hdc) ||
> > + !hdc_set_pm_ctl1(policy->cpu, opt_cpufreq_hdc)) )
> > + {
> > + hwp_err(policy->cpu, "Disabling HDC support\n");
> > + feature_hdc = false;
>
> Nit: Too deep indentation.
Yes, thanks.
> > +static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
> > +{
> > + static union hwp_request initial_req;
> > + unsigned int cpu = policy->cpu;
> > + struct hwp_drv_data *data;
> > + bool first_run = false;
> > +
> > + data = xzalloc(struct hwp_drv_data);
> > + if ( !data )
> > + return -ENOMEM;
> > +
> > + policy->governor = &cpufreq_gov_hwp;
> > +
> > + per_cpu(hwp_drv_data, cpu) = data;
> > +
> > + on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
> > +
> > + if ( data->curr_req.raw == -1 )
> > + {
> > + hwp_err(cpu, "Could not initialize HWP properly\n");
> > + per_cpu(hwp_drv_data, cpu) = NULL;
> > + xfree(data);
> > + return -ENODEV;
> > + }
> > +
> > + data->minimum = data->curr_req.min_perf;
> > + data->maximum = data->curr_req.max_perf;
> > + data->desired = data->curr_req.desired;
> > + data->energy_perf = data->curr_req.energy_perf;
> > + data->activity_window = data->curr_req.activity_window;
> > +
> > + if ( initial_req.raw == 0 )
> > + {
> > + hwp_verbose("CPU%u: HWP_CAPABILITIES: %016lx\n", cpu,
> > data->hwp_caps);
> > + initial_req = data->curr_req;
> > + first_run = true;
> > + }
>
> What part of data->curr_req is guaranteed to be non-0 (for the condition
> around ...
>
> > + if ( first_run || data->curr_req.raw != initial_req.raw )
> > + hwp_verbose("CPU%u: rdmsr HWP_REQUEST %016lx\n", cpu,
> > + data->curr_req.raw);
>
> ... this logging to be effective)?
Hmmm. I think you are correct that there is no guarantee that
data->curr_req will be non-zero. i.e. a BIOS could zero the whole
register. In practice, I see 0x8000ff01 - energy_perf = 0x80, maximum
= 0xff and minimum = 0x01.
Would you prefer that I make first_run a static variable to track if
initial_req has been populated?
> > +static void cf_check hwp_set_misc_turbo(void *info)
> > +{
> > + const struct cpufreq_policy *policy = info;
> > + struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
> > + uint64_t msr;
> > +
> > + data->ret = 0;
> > +
> > + if ( rdmsr_safe(MSR_IA32_MISC_ENABLE, msr) )
> > + {
> > + hwp_verbose("CPU%u: error rdmsr_safe(MSR_IA32_MISC_ENABLE)\n",
> > + policy->cpu);
> > + data->ret = -EINVAL;
> > +
> > + return;
> > + }
> > +
> > + if ( policy->turbo == CPUFREQ_TURBO_ENABLED )
> > + msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE;
> > + else
> > + msr |= MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE;
> > +
> > + if ( wrmsr_safe(MSR_IA32_MISC_ENABLE, msr) )
> > + {
> > + hwp_verbose("CPU%u: error wrmsr_safe(MSR_IA32_MISC_ENABLE):
> > %016lx\n",
> > + policy->cpu, msr);
> > + data->ret = -EINVAL;
> > + }
> > +}
>
> Neither of the two -EINVAL really indicate an invalid argument that was
> passed. Maybe EACCES (or less desirably EFAULT)?
Ok, I'll use EACCES.
> > @@ -89,15 +96,45 @@ static int __init cf_check setup_cpufreq_option(const
> > char *str)
> > return 0;
> > }
> >
> > - if ( choice > 0 || !cmdline_strcmp(str, "xen") )
> > + do
> > {
> > - xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
> > - cpufreq_controller = FREQCTL_xen;
> > - if ( *arg && *(arg + 1) )
> > - return cpufreq_cmdline_parse(arg + 1);
> > - }
> > + const char *end = strchr(str, ';');
> > +
> > + if ( end == NULL )
> > + end = strchr(str, '\0');
> > +
> > + arg = strpbrk(str, ",:");
> > + if ( !arg || arg > end )
> > + arg = strchr(str, '\0');
> > +
> > + if ( cpufreq_xen_cnt == ARRAY_SIZE(cpufreq_xen_opts) )
> > + return -E2BIG;
> > +
> > + if ( choice > 0 || !cmdline_strcmp(str, "xen") )
> > + {
> > + xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
> > + cpufreq_controller = FREQCTL_xen;
> > + cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_xen;
> > + ret = 0;
> > + if ( arg[0] && arg[1] )
> > + ret = cpufreq_cmdline_parse(arg + 1, end);
> > + }
> > + else if ( choice < 0 && !cmdline_strcmp(str, "hwp") )
> > + {
> > + xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
> > + cpufreq_controller = FREQCTL_xen;
> > + cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_hwp;
> > + ret = 0;
> > + if ( arg[0] && arg[1] )
> > + ret = hwp_cmdline_parse(arg + 1, end);
> > + }
> > + else
> > + ret = -EINVAL;
> > +
> > + str = end ? ++end : end;
> > + } while ( choice < 0 && ret == 0 && *str );
>
> According to the earlier of the two lines, str may be NULL and hence
> cannot be dereferenced without first checking to be non-NULL. Earlier
> in the loop though you ensure end cannot be NULL. In that case,
> however, you point end at the nul character, so the increment above
> would point end at the next following one. So my guess is that you
> meant
>
> str = *end ? ++end : end;
Yes, you are correct. Thanks for catching it.
Thanks,
Jason
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |