[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v4 10/11] cpufreq: add hwdom-cpufreq driver



On Tue, 4 Nov 2014, Oleksandr Dmytryshyn wrote:
> This driver uses hwdom to change frequencies on CPUs
> Workflow:
>  * cpufreq governor driver in Xen wants to change the
>    frequency of the physical CPU
>  * hwdom-cpufreq driver sets parameters in the shared
>    memory
>  * hwdom-cpufreq driver sends an VIRQ_CPUFREQ interrupt
>    to notify the hardware domain
>  * cpufreq driver in the hardware domain reads parameters
>    from the shared memory, changes frequency and copies
>    the result of the operation to the shared memory
> 
> Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
> ---
>  xen/Rules.mk                        |   1 +
>  xen/drivers/cpufreq/Makefile        |   1 +
>  xen/drivers/cpufreq/hwdom-cpufreq.c | 252 
> ++++++++++++++++++++++++++++++++++++
>  xen/include/public/xen.h            |   1 +
>  4 files changed, 255 insertions(+)
>  create mode 100644 xen/drivers/cpufreq/hwdom-cpufreq.c
> 
> diff --git a/xen/Rules.mk b/xen/Rules.mk
> index 3b0b89b..cccbc72 100644
> --- a/xen/Rules.mk
> +++ b/xen/Rules.mk
> @@ -56,6 +56,7 @@ CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
>  CFLAGS-$(lock_profile)  += -DLOCK_PROFILE
>  CFLAGS-$(HAS_ACPI)      += -DHAS_ACPI
>  CFLAGS-$(HAS_CPUFREQ)   += -DHAS_CPUFREQ
> +CFLAGS-$(HAS_HWDOM_CPUFREQ) += -DHAS_HWDOM_CPUFREQ
>  CFLAGS-$(HAS_PM)        += -DHAS_PM
>  CFLAGS-$(HAS_CPU_TURBO) += -DHAS_CPU_TURBO
>  CFLAGS-$(HAS_GDBSX)     += -DHAS_GDBSX
> diff --git a/xen/drivers/cpufreq/Makefile b/xen/drivers/cpufreq/Makefile
> index b87d127..891997c 100644
> --- a/xen/drivers/cpufreq/Makefile
> +++ b/xen/drivers/cpufreq/Makefile
> @@ -2,3 +2,4 @@ obj-y += cpufreq.o
>  obj-y += cpufreq_ondemand.o
>  obj-y += cpufreq_misc_governors.o
>  obj-y += utility.o
> +obj-$(HAS_HWDOM_CPUFREQ) += hwdom-cpufreq.o
> diff --git a/xen/drivers/cpufreq/hwdom-cpufreq.c 
> b/xen/drivers/cpufreq/hwdom-cpufreq.c
> new file mode 100644
> index 0000000..a39fc6c
> --- /dev/null
> +++ b/xen/drivers/cpufreq/hwdom-cpufreq.c
> @@ -0,0 +1,252 @@
> +/*
> + *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
> + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
> + *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx>
> + *  Copyright (C) 2006        Denis Sadykov <denis.m.sadykov@xxxxxxxxx>
> + *
> + *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
> + *      porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor
> + *
> + *  Copyright (C) 2014 GlobalLogic Inc.
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or (at
> + *  your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful, but
> + *  WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + *  General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License along
> + *  with this program; if not, write to the Free Software Foundation, Inc.,
> + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + */
> +#include <xen/types.h>
> +#include <xen/errno.h>
> +#include <xen/sched.h>
> +#include <xen/event.h>
> +#include <xen/irq.h>
> +#include <xen/spinlock.h>
> +#include <xen/cpufreq.h>
> +#include <xen/err.h>
> +#include <asm/shared.h>
> +#include <asm/current.h>
> +#include <asm/system.h>
> +
> +struct hwdom_cpufreq_data {
> +    struct processor_performance *perf_data;
> +    struct cpufreq_frequency_table *freq_table;
> +};
> +
> +static struct hwdom_cpufreq_data *hwdom_cpufreq_drv_data[NR_CPUS];
> +
> +int cpufreq_cpu_init(unsigned int cpuid)
> +{
> +    return cpufreq_add_cpu(cpuid);
> +}
> +
> +static void notify_cpufreq_domain(void)
> +{
> +    send_global_virq(VIRQ_CPUFREQ);
> +}
> +
> +static int hwdom_cpufreq_verify(struct cpufreq_policy *policy)
> +{
> +    struct hwdom_cpufreq_data *data;
> +    struct processor_performance *perf;
> +
> +    if ( !policy || !(data = hwdom_cpufreq_drv_data[policy->cpu]) ||
> +         !processor_pminfo[policy->cpu] )
> +        return -EINVAL;
> +
> +    perf = &processor_pminfo[policy->cpu]->perf;
> +
> +    cpufreq_verify_within_limits(policy, 0,
> +        perf->states[perf->platform_limit].core_frequency * 1000);
> +
> +    return cpufreq_frequency_table_verify(policy, data->freq_table);
> +}
> +
> +static int hwdom_cpufreq_target(struct cpufreq_policy *policy,
> +                               unsigned int target_freq, unsigned int 
> relation)
> +{
> +    struct hwdom_cpufreq_data *data = hwdom_cpufreq_drv_data[policy->cpu];
> +    struct processor_performance *perf;
> +    struct cpufreq_freqs freqs;
> +    struct cpufreq_sh_info *cpufreq_info;
> +    cpumask_t online_policy_cpus;
> +    unsigned int next_state = 0; /* Index into freq_table */
> +    unsigned int next_perf_state = 0; /* Index into perf table */
> +    unsigned int j;
> +    int ret = 0;
> +
> +    if ( unlikely(data == NULL ||
> +         data->perf_data == NULL || data->freq_table == NULL) )
> +        return -ENODEV;
> +
> +    perf = data->perf_data;
> +    ret = cpufreq_frequency_table_target(policy,
> +                                         data->freq_table,
> +                                         target_freq,
> +                                         relation, &next_state);
> +    if ( unlikely(ret) )
> +        return -ENODEV;
> +
> +    cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus);
> +
> +    next_perf_state = data->freq_table[next_state].index;
> +    if ( perf->state == next_perf_state )
> +    {
> +        if ( unlikely(policy->resume) )
> +            policy->resume = 0;
> +        else
> +            return 0;
> +    }
> +
> +    freqs.old = perf->states[perf->state].core_frequency * 1000;
> +    freqs.new = data->freq_table[next_state].frequency;
> +
> +     /* Do send cmd for Hardware domain */
> +    cpufreq_info = arch_get_cpufreq_addr(dom0);
> +
> +    /* Set previous result in the Hardware domain then read it */
> +    smp_rmb();
> +
> +     /* return previous result */
> +    ret = cpufreq_info->result;

What if the previous command hasn't completed yet?


> +    cpufreq_info->cpu = policy->cpu;
> +    cpufreq_info->freq = freqs.new;
> +    cpufreq_info->relation = (uint32_t)relation;
> +
> +    smp_wmb(); /* above must be visible before notify_cpufreq_domain() */
> +
> +    notify_cpufreq_domain();
> +
> +    for_each_cpu( j, &online_policy_cpus )
> +        cpufreq_statistic_update(j, perf->state, next_perf_state);
> +
> +    perf->state = next_perf_state;
> +    policy->cur = freqs.new;
> +
> +    return ret;
> +}
> +
> +static int
> +hwdom_cpufreq_cpu_init(struct cpufreq_policy *policy)
> +{
> +    struct processor_performance *perf;
> +    struct hwdom_cpufreq_data *data;
> +    unsigned int cpu = policy->cpu;
> +    unsigned int valid_states = 0;
> +    int i;
> +    int ret = 0;
> +
> +    data = xzalloc(struct hwdom_cpufreq_data);
> +    if ( !data )
> +        return -ENOMEM;
> +
> +    hwdom_cpufreq_drv_data[cpu] = data;
> +
> +    data->perf_data = &processor_pminfo[cpu]->perf;
> +
> +    perf = data->perf_data;
> +    policy->shared_type = perf->shared_type;
> +
> +    data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
> +                                     (perf->state_count+1));
> +    if ( !data->freq_table )
> +    {
> +        ret = -ENOMEM;
> +        goto err_unreg;
> +    }
> +
> +    /* detect transition latency */
> +    policy->cpuinfo.transition_latency = 0;
> +    for ( i = 0; i < perf->state_count; i++ )
> +    {
> +        if ( (perf->states[i].transition_latency * 1000) >
> +             policy->cpuinfo.transition_latency )
> +            policy->cpuinfo.transition_latency =
> +                perf->states[i].transition_latency * 1000;
> +    }
> +
> +    policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
> +
> +    /* table init */
> +    for ( i = 0; i < perf->state_count; i++ )
> +    {
> +        if ( i > 0 && perf->states[i].core_frequency >=
> +            data->freq_table[valid_states-1].frequency / 1000 )
> +            continue;
> +
> +        data->freq_table[valid_states].index = i;
> +        data->freq_table[valid_states].frequency =
> +            perf->states[i].core_frequency * 1000;
> +        valid_states++;
> +    }
> +    data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
> +    perf->state = 0;
> +
> +    ret = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
> +    if ( ret )
> +        goto err_freqfree;
> +
> +
> +    /*
> +     * the first call to ->target() should result in us actually
> +     * send command to the Hardware domain to set frequency.
> +     */
> +    policy->resume = 1;
> +
> +    /* Set the minimal frequency */
> +    return hwdom_cpufreq_target(policy, policy->min, CPUFREQ_RELATION_L);
> +
> + err_freqfree:
> +    xfree(data->freq_table);
> + err_unreg:
> +    xfree(data);
> +    hwdom_cpufreq_drv_data[cpu] = NULL;
> +
> +    return ret;
> +}
> +
> +static int hwdom_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> +{
> +    struct hwdom_cpufreq_data *data = hwdom_cpufreq_drv_data[policy->cpu];
> +
> +    if ( data )
> +    {
> +        hwdom_cpufreq_drv_data[policy->cpu] = NULL;
> +        xfree(data->freq_table);
> +        xfree(data);
> +    }
> +
> +    return 0;
> +}
> +
> +static struct cpufreq_driver hwdom_cpufreq_driver = {
> +    .name   = "hwdom-cpufreq",
> +    .verify = hwdom_cpufreq_verify,
> +    .target = hwdom_cpufreq_target,
> +    .init   = hwdom_cpufreq_cpu_init,
> +    .exit   = hwdom_cpufreq_cpu_exit,
> +};
> +
> +static int __init hwdom_cpufreq_driver_init(void)
> +{
> +    int ret = 0;
> +
> +    if ( cpufreq_controller == FREQCTL_xen )
> +        ret = cpufreq_register_driver(&hwdom_cpufreq_driver);
> +
> +    return ret;
> +}
> +
> +__initcall(hwdom_cpufreq_driver_init);
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 8c5697e..abd8438 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -160,6 +160,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
>  #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           
> */
>  #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     
> */
>  #define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
> +#define VIRQ_CPUFREQ    14 /* G. (DOM0) Notify cpufreq driver                
> */
>  
>  /* Architecture-specific VIRQ definitions. */
>  #define VIRQ_ARCH_0    16
> -- 
> 1.9.1
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.