[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v4 9/9] xen/arm: cpufreq: add xen-cpufreq driver



On Tue, Nov 4, 2014 at 6:33 PM, Stefano Stabellini
<stefano.stabellini@xxxxxxxxxxxxx> wrote:
> On Tue, 4 Nov 2014, Oleksandr Dmytryshyn wrote:
>> Xen changes frequencies on CPUs using this high-level
>> cpufreq driver.
>>
>> Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
>
> You CC the wrong email address for Rafael in the entire series.
Could anybody give me right address for Rafael?

>>  drivers/cpufreq/Kconfig           |  20 +
>>  drivers/cpufreq/Makefile          |   1 +
>>  drivers/cpufreq/cpufreq_drv_ops.c |  13 +-
>>  drivers/cpufreq/cpufreq_drv_ops.h |   4 +
>>  drivers/cpufreq/xen-cpufreq.c     | 869 
>> ++++++++++++++++++++++++++++++++++++++
>>  include/xen/interface/platform.h  |   1 +
>>  include/xen/interface/xen.h       |   1 +
>>  7 files changed, 907 insertions(+), 2 deletions(-)
>>  create mode 100644 drivers/cpufreq/xen-cpufreq.c
>>
>> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
>> index f5a8f84..4847d8a 100644
>> --- a/drivers/cpufreq/Kconfig
>> +++ b/drivers/cpufreq/Kconfig
>> @@ -19,6 +19,26 @@ config CPU_FREQ
>>
>>         If in doubt, say N.
>>
>> +config XEN_CPUFREQ
>> +     bool "Xen Cpufreq driver"
>> +     depends on XEN_DOM0
>> +     depends on !CPUMASK_OFFSTACK
>> +     default n
>> +     select CPUFREQ_DRV_OPS
>> +     help
>> +       This driver uploads Power Management information to the Xen
>> +       hypervisor and changes CPUs frequency using CPU Frequency scaling
>> +       drivers.
>> +
>> +       To do that the driver uses CPU Frequency scaling drivers to parse
>> +       the Power Management data and uploads said information to the Xen
>> +       hypervisor. Then the Xen hypervisor can select the proper Pxx states.
>> +
>> +       Then the Xen hypervisor can change CPUs frequency by giving commands
>> +       via this driver to the CPU Frequency scaling driver.
>> +
>> +       If in doubt, say N.
>> +
>>  if CPUFREQ_DRV_OPS
>>
>>  config CPU_FREQ_TABLE
>> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
>> index f12a0d3..c8d5037 100644
>> --- a/drivers/cpufreq/Makefile
>> +++ b/drivers/cpufreq/Makefile
>> @@ -1,5 +1,6 @@
>>  # CPUfreq core
>>  obj-$(CONFIG_CPU_FREQ)                       += cpufreq.o
>> +obj-$(CONFIG_XEN_CPUFREQ)            += xen-cpufreq.o
>>  obj-$(CONFIG_CPUFREQ_DRV_OPS)                += cpufreq_drv_ops.o
>>  # CPUfreq stats
>>  obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
>> diff --git a/drivers/cpufreq/cpufreq_drv_ops.c 
>> b/drivers/cpufreq/cpufreq_drv_ops.c
>> index c971442..71c3357 100644
>> --- a/drivers/cpufreq/cpufreq_drv_ops.c
>> +++ b/drivers/cpufreq/cpufreq_drv_ops.c
>> @@ -18,6 +18,8 @@
>>  #include <linux/init.h>
>>  #include <linux/export.h>
>>
>> +#include <xen/xen.h>
>> +
>>  static struct cpufreq_drv_ops *ops;
>>
>>  struct kobject *get_cpufreq_global_kobject(void)
>> @@ -177,10 +179,17 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
>>
>>  static int __init cpufreq_drv_ops_init(void)
>>  {
>> +     if (xen_initial_domain()) {
>> +#ifdef CONFIG_XEN_CPUFREQ
>> +             ops = &xen_cpufreq_drv_ops;
>> +             pr_debug("using xen_cpufreq_drv_ops\n");
>> +#endif
>> +     } else {
>>  #ifdef CONFIG_CPU_FREQ
>> -     ops = &kern_cpufreq_drv_ops;
>> -     pr_debug("using kern_cpufreq_drv_ops\n");
>> +             ops = &kern_cpufreq_drv_ops;
>> +             pr_debug("using kern_cpufreq_drv_ops\n");
>>  #endif
>> +     }
>>
>>       return 0;
>>  }
>> diff --git a/drivers/cpufreq/cpufreq_drv_ops.h 
>> b/drivers/cpufreq/cpufreq_drv_ops.h
>> index 5cc8e05..d02d509 100644
>> --- a/drivers/cpufreq/cpufreq_drv_ops.h
>> +++ b/drivers/cpufreq/cpufreq_drv_ops.h
>> @@ -47,4 +47,8 @@ struct cpufreq_drv_ops {
>>  extern struct cpufreq_drv_ops kern_cpufreq_drv_ops;
>>  #endif
>>
>> +#ifdef CONFIG_XEN_CPUFREQ
>> +extern struct cpufreq_drv_ops xen_cpufreq_drv_ops;
>> +#endif
>> +
>>  #endif /* _CPUFREQ_DRV_OPS_H */
>> diff --git a/drivers/cpufreq/xen-cpufreq.c b/drivers/cpufreq/xen-cpufreq.c
>> new file mode 100644
>> index 0000000..21062c7
>> --- /dev/null
>> +++ b/drivers/cpufreq/xen-cpufreq.c
>> @@ -0,0 +1,869 @@
>> +/*
>> + *  Copyright (C) 2001 Russell King
>> + *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
>> + *
>> + *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
>> + *   Added handling for CPU hotplug
>> + *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
>> + *   Fix handling for CPU hotplug -- affected CPUs
>> + *
>> + *           (C) 2014 GlobalLogic Inc.
>> + *
>> + * Based on drivers/cpufreq/cpufreq.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/notifier.h>
>> +#include <linux/types.h>
>> +#include <linux/slab.h>
>> +#include <linux/mutex.h>
>> +#include <linux/irq.h>
>> +#include <linux/workqueue.h>
>> +#include <linux/cpufreq.h>
>> +
>> +#include <trace/events/power.h>
>> +
>> +#include <xen/xen.h>
>> +#include <xen/events.h>
>> +#include <xen/interface/xen.h>
>> +#include <xen/interface/platform.h>
>> +#include <xen/interface/sysctl.h>
>> +#include <asm/xen/hypercall.h>
>> +#include <asm/xen/hypervisor.h>
>> +
>> +#include "cpufreq_drv_ops.h"
>> +
>> +static int xen_nr_cpus;
>> +static int xen_irq;
>> +
>> +#define for_each_xen_cpu(cpu, mask)                  \
>> +     for ((cpu) = -1;                                \
>> +             (cpu) = cpumask_next((cpu), (mask)),    \
>> +             (cpu) < xen_nr_cpus;)
>> +
>> +static struct cpufreq_driver *cpufreq_driver;
>> +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
>> +
>> +static DEFINE_SPINLOCK(cpufreq_driver_lock);
>> +
>> +/*
>> + * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
>> + * all cpufreq/hotplug/workqueue/etc related lock issues.
>> + *
>> + * The rules for this semaphore:
>> + * - Any routine that wants to read from the policy structure will
>> + *   do a down_read on this semaphore.
>> + * - Any routine that will write to the policy structure and/or may take 
>> away
>> + *   the policy altogether (eg. CPU hotplug), will hold this lock in write
>> + *   mode before doing so.
>> + *
>> + * Additional rules:
>> + * - Governor routines that can be called in cpufreq hotplug path should not
>> + *   take this sem as top level hotplug notifier handler takes this.
>> + * - Lock should not be held across
>> + *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
>> + */
>> +static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
>> +static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
>> +
>> +#define lock_policy_rwsem(mode, cpu)                         \
>> +static int lock_policy_rwsem_##mode                          \
>> +(int cpu)                                                    \
>> +{                                                            \
>> +     int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);      \
>> +     BUG_ON(policy_cpu == -1);                               \
>> +     down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));    \
>> +                                                             \
>> +     return 0;                                               \
>> +}
>> +
>> +lock_policy_rwsem(write, cpu);
>> +
>> +static void unlock_policy_rwsem_write(int cpu)
>> +{
>> +     int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
>> +     BUG_ON(policy_cpu == -1);
>> +     up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
>> +}
>> +
>> +/**
>> + * The "transition" notifier list for kernel code that needs to handle
>> + * changes to devices when the CPU clock speed changes.
>> + * The mutex locks this list.
>> + */
>> +static struct srcu_notifier_head xen_cpufreq_transition_notifier_list;
>> +
>> +static bool init_cpufreq_transition_notifier_list_called;
>> +static int __init init_cpufreq_transition_notifier_list(void)
>> +{
>> +     srcu_init_notifier_head(&xen_cpufreq_transition_notifier_list);
>> +     init_cpufreq_transition_notifier_list_called = true;
>> +     return 0;
>> +}
>> +pure_initcall(init_cpufreq_transition_notifier_list);
>> +
>> +static struct cpufreq_policy *xen_cpufreq_cpu_get(unsigned int cpu)
>> +{
>> +     struct cpufreq_policy *data = NULL;
>> +     unsigned long flags;
>> +
>> +     if (cpu >= xen_nr_cpus)
>> +             goto err_out;
>> +
>> +     /* get the cpufreq driver */
>> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +
>> +     if (!cpufreq_driver)
>> +             goto err_out_unlock;
>> +
>> +     /* get the CPU */
>> +     data = per_cpu(cpufreq_cpu_data, cpu);
>> +
>> +err_out_unlock:
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +err_out:
>> +     return data;
>> +}
>> +
>> +static void xen_cpufreq_cpu_put(struct cpufreq_policy *data)
>> +{
>> +     module_put(cpufreq_driver->owner);
>> +}
>> +
>> +static int push_data_to_hypervisor(struct cpufreq_policy *policy,
>> +                                struct cpufreq_frequency_table *table)
>> +{
>> +     int ret = 0;
>> +     unsigned int i;
>> +     unsigned int cpu;
>> +     uint32_t platform_limit = 0;
>> +     unsigned int max_freq = 0;
>> +     unsigned int state_count = 0;
>> +     unsigned int prev_freq = 0;
>> +     struct xen_processor_px *dst_states;
>> +     struct xen_processor_performance *dst_perf;
>> +     struct xen_platform_op op = {
>> +             .cmd                    = XENPF_set_processor_pminfo,
>> +             .interface_version      = XENPF_INTERFACE_VERSION,
>> +             .u.set_pminfo.type      = XEN_PM_PX,
>> +     };
>> +
>> +     dst_perf = &op.u.set_pminfo.perf;
>> +
>> +     /* Check freq table and find max frequency */
>> +     for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
>> +             unsigned int freq = table[i].frequency;
>> +             if (freq == CPUFREQ_ENTRY_INVALID)
>> +                     continue;
>> +
>> +             if (table[i].index != state_count || freq <= prev_freq) {
>> +                     pr_err("Frequency table format error\n");
>> +                     return -EINVAL;
>> +             }
>> +
>> +             prev_freq = freq;
>> +             state_count++;
>> +             if (freq > max_freq)
>> +                     max_freq = freq;
>> +     }
>> +
>> +     if (!state_count)
>> +             return -EINVAL;
>> +
>> +     dst_perf->state_count = state_count;
>> +
>> +     dst_states = kcalloc(state_count,
>> +                          sizeof(struct xen_processor_px), GFP_KERNEL);
>> +
>> +     if (!dst_states)
>> +             return -ENOMEM;
>> +
>> +     set_xen_guest_handle(dst_perf->states, dst_states);
>> +
>> +     /*
>> +      * Freq table should start from lower values
>> +      * dst_states should start from higer values
>> +      */
>> +     for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
>> +             unsigned int freq = table[i].frequency;
>> +             unsigned int tbl_index = state_count - 1 - table[i].index;
>> +             if (freq == CPUFREQ_ENTRY_INVALID)
>> +                     continue;
>> +
>> +             if (freq == max_freq)
>> +                     platform_limit = tbl_index;
>> +
>> +             dst_states[tbl_index].core_frequency = freq / 1000;
>> +             dst_states[tbl_index].transition_latency =
>> +                             policy->cpuinfo.transition_latency / 1000;
>> +     }
>> +
>> +     dst_perf->shared_type = policy->shared_type;
>> +     dst_perf->platform_limit = platform_limit;
>> +     dst_perf->domain_info.domain = policy->cpu;
>> +     dst_perf->domain_info.num_processors = xen_nr_cpus;
>> +     dst_perf->flags = XEN_PX_DATA;
>> +
>> +     for_each_xen_cpu(cpu, policy->cpus) {
>> +             op.u.set_pminfo.id = cpu;
>> +             ret = HYPERVISOR_dom0_op(&op);
>> +             if (ret) {
>> +                     pr_debug("Hypervisor error(%d) for CPU%u\n", ret, cpu);
>> +                     goto err_free_states;
>> +             }
>> +             pr_debug("CPU%u - P-states uploaded\n", cpu);
>> +
>> +             for (i = 0; i < dst_perf->state_count; i++) {
>> +                     pr_debug("    state %d: %d MHz, %d uS\n",
>> +                              i, (u32) dst_states[i].core_frequency,
>> +                              (u32) dst_states[i].transition_latency);
>> +             }
>> +     }
>> +
>> +err_free_states:
>> +     kfree(dst_states);
>> +     return ret;
>> +}
>> +
>> +/*
>> + * Returns:
>> + *   Negative: Failure
>> + *   0:        Success
>> + *   Positive: When we have a managed CPU and the sysfs got symlinked
>> + */
>> +static int xen_cpufreq_add_dev_policy(unsigned int cpu,
>> +                               struct cpufreq_policy *policy)
>> +{
>> +     int ret = 0;
>> +#ifdef CONFIG_SMP
>> +     unsigned long flags;
>> +     unsigned int j;
>> +
>> +     for_each_cpu(j, policy->cpus) {
>> +             struct cpufreq_policy *managed_policy;
>> +
>> +             if (cpu == j)
>> +                     continue;
>> +
>> +             /* Check for existing affected CPUs.
>> +              * They may not be aware of it due to CPU Hotplug.
>> +              * cpufreq_cpu_put is called when the device is removed
>> +              * in __cpufreq_remove_dev()
>> +              */
>> +             managed_policy = xen_cpufreq_cpu_get(j);
>> +             if (unlikely(managed_policy)) {
>> +                     /* Set proper policy_cpu */
>> +                     unlock_policy_rwsem_write(cpu);
>> +                     per_cpu(cpufreq_policy_cpu, cpu) =
>> +                                             managed_policy->cpu;
>> +
>> +                     if (lock_policy_rwsem_write(cpu) < 0) {
>> +                             /* Should not go through policy unlock path */
>> +                             if (cpufreq_driver->exit)
>> +                                     cpufreq_driver->exit(policy);
>> +                             xen_cpufreq_cpu_put(managed_policy);
>> +                             return -EBUSY;
>> +                     }
>> +
>> +                     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +                     cpumask_copy(managed_policy->cpus, policy->cpus);
>> +                     per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
>> +                     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +
>> +                     pr_debug("CPU already managed, adding link\n");
>> +
>> +                     /*
>> +                      * Success. We only needed to be added to the mask.
>> +                      * Call driver->exit() because only the cpu parent of
>> +                      * the kobj needed to call init().
>> +                      */
>> +                     if (cpufreq_driver->exit)
>> +                             cpufreq_driver->exit(policy);
>> +
>> +                     return 1;
>> +             }
>> +     }
>> +#endif
>> +     return ret;
>> +}
>> +
>> +/**
>> + * xen_cpufreq_add_dev - add a CPU device
>> + *
>> + * Adds the cpufreq interface for a CPU device.
>> + */
>> +static int xen_cpufreq_add_dev(unsigned int cpu)
>> +{
>> +     int ret = 0;
>> +     struct cpufreq_policy *policy;
>> +     unsigned long flags;
>> +     unsigned int j;
>> +
>> +     pr_debug("adding CPU %u\n", cpu);
>> +
>> +#ifdef CONFIG_SMP
>> +     /* check whether a different CPU already registered this
>> +      * CPU because it is in the same boat. */
>> +     policy = xen_cpufreq_cpu_get(cpu);
>> +     if (unlikely(policy)) {
>> +             xen_cpufreq_cpu_put(policy);
>> +             return 0;
>> +     }
>> +#endif
>> +
>> +     if (!try_module_get(cpufreq_driver->owner)) {
>> +             ret = -EINVAL;
>> +             goto module_out;
>> +     }
>> +
>> +     ret = -ENOMEM;
>> +     policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
>> +     if (!policy)
>> +             goto nomem_out;
>> +
>> +     if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
>> +             goto err_free_policy;
>> +
>> +     if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
>> +             goto err_free_cpumask;
>> +
>> +     policy->cpu = cpu;
>> +     cpumask_copy(policy->cpus, cpumask_of(cpu));
>> +
>> +     /* Initially set CPU itself as the policy_cpu */
>> +     per_cpu(cpufreq_policy_cpu, cpu) = cpu;
>> +     ret = (lock_policy_rwsem_write(cpu) < 0);
>> +     WARN_ON(ret);
>> +
>> +     /* call driver. From then on the cpufreq must be able
>> +      * to accept all calls to ->verify and ->setpolicy for this CPU
>> +      */
>> +     ret = cpufreq_driver->init(policy);
>> +     if (ret) {
>> +             pr_debug("initialization failed\n");
>> +             goto err_unlock_policy;
>> +     }
>> +     ret = xen_cpufreq_add_dev_policy(cpu, policy);
>> +     if (ret) {
>> +             if (ret > 0)
>> +                     /* This is a managed cpu, symlink created,
>> +                        exit with 0 */
>> +                     ret = 0;
>> +             goto err_unlock_policy;
>> +     }
>> +
>> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +     for_each_cpu(j, policy->cpus) {
>> +             per_cpu(cpufreq_cpu_data, j) = policy;
>> +             per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
>> +     }
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +
>> +     unlock_policy_rwsem_write(cpu);
>> +
>> +     module_put(cpufreq_driver->owner);
>> +     pr_debug("initialization complete\n");
>> +
>> +     return 0;
>> +
>> +err_unlock_policy:
>> +     unlock_policy_rwsem_write(cpu);
>> +     free_cpumask_var(policy->related_cpus);
>> +err_free_cpumask:
>> +     free_cpumask_var(policy->cpus);
>> +err_free_policy:
>> +     kfree(policy);
>> +nomem_out:
>> +     module_put(cpufreq_driver->owner);
>> +module_out:
>> +     return ret;
>> +}
>> +
>> +/**
>> + * __cpufreq_remove_dev - remove a CPU device
>> + *
>> + * Removes the cpufreq interface for a CPU device.
>> + * Caller should already have policy_rwsem in write mode for this CPU.
>> + * This routine frees the rwsem before returning.
>> + */
>> +static int __cpufreq_remove_dev(unsigned int cpu)
>> +{
>> +     unsigned long flags;
>> +     struct cpufreq_policy *data;
>> +#ifdef CONFIG_SMP
>> +     unsigned int j;
>> +#endif
>> +
>> +     pr_debug("unregistering CPU %u\n", cpu);
>> +
>> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +     data = per_cpu(cpufreq_cpu_data, cpu);
>> +
>> +     if (!data) {
>> +             spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +             unlock_policy_rwsem_write(cpu);
>> +             return -EINVAL;
>> +     }
>> +     per_cpu(cpufreq_cpu_data, cpu) = NULL;
>> +
>> +
>> +#ifdef CONFIG_SMP
>> +     /* if this isn't the CPU which is the parent of the kobj, we
>> +      * only need to unlink, put and exit
>> +      */
>> +     if (unlikely(cpu != data->cpu)) {
>> +             pr_debug("removing link\n");
>> +             cpumask_clear_cpu(cpu, data->cpus);
>> +             spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +             xen_cpufreq_cpu_put(data);
>> +             unlock_policy_rwsem_write(cpu);
>> +             return 0;
>> +     }
>> +#endif
>> +
>> +#ifdef CONFIG_SMP
>> +
>> +     /* if we have other CPUs still registered, we need to unlink them,
>> +      * or else wait_for_completion below will lock up. Clean the
>> +      * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
>> +      * the sysfs links afterwards.
>> +      */
>> +     if (unlikely(cpumask_weight(data->cpus) > 1)) {
>> +             for_each_cpu(j, data->cpus) {
>> +                     if (j == cpu)
>> +                             continue;
>> +                     per_cpu(cpufreq_cpu_data, j) = NULL;
>> +             }
>> +     }
>> +
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +
>> +     if (unlikely(cpumask_weight(data->cpus) > 1)) {
>> +             for_each_cpu(j, data->cpus) {
>> +                     if (j == cpu)
>> +                             continue;
>> +                     pr_debug("removing link for cpu %u\n", j);
>> +                     unlock_policy_rwsem_write(cpu);
>> +                     lock_policy_rwsem_write(cpu);
>> +                     xen_cpufreq_cpu_put(data);
>> +             }
>> +     }
>> +#else
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +#endif
>> +
>> +     unlock_policy_rwsem_write(cpu);
>> +
>> +     lock_policy_rwsem_write(cpu);
>> +     if (cpufreq_driver->exit)
>> +             cpufreq_driver->exit(data);
>> +     unlock_policy_rwsem_write(cpu);
>> +
>> +     free_cpumask_var(data->related_cpus);
>> +     free_cpumask_var(data->cpus);
>> +     kfree(data);
>> +
>> +     return 0;
>> +}
>> +
>> +static int cpufreq_remove_dev(unsigned int cpu)
>> +{
>> +     int retval;
>> +
>> +     if (unlikely(lock_policy_rwsem_write(cpu)))
>> +             BUG();
>> +
>> +     retval = __cpufreq_remove_dev(cpu);
>> +     return retval;
>> +}
>> +
>> +/*********************************************************************
>> + *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
>> + *********************************************************************/
>> +
>> +/**
>> + * adjust_jiffies - adjust the system "loops_per_jiffy"
>> + *
>> + * This function alters the system "loops_per_jiffy" for the clock
>> + * speed change. Note that loops_per_jiffy cannot be updated on SMP
>> + * systems as each CPU might be scaled differently. So, use the arch
>> + * per-CPU loops_per_jiffy value wherever possible.
>> + */
>> +#ifndef CONFIG_SMP
>> +static unsigned long l_p_j_ref;
>> +static unsigned int  l_p_j_ref_freq;
>> +
>> +static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
>> +{
>> +     if (ci->flags & CPUFREQ_CONST_LOOPS)
>> +             return;
>> +
>> +     if (!l_p_j_ref_freq) {
>> +             l_p_j_ref = loops_per_jiffy;
>> +             l_p_j_ref_freq = ci->old;
>> +             pr_debug("saving %lu as reference value for loops_per_jiffy; "
>> +                     "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
>> +     }
>> +     if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
>> +         (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
>> +             loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
>> +                                                             ci->new);
>> +             pr_debug("scaling loops_per_jiffy to %lu "
>> +                     "for frequency %u kHz\n", loops_per_jiffy, ci->new);
>> +     }
>> +}
>> +#else
>> +static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs 
>> *ci)
>> +{
>> +     return;
>> +}
>> +#endif
>
> There is quite a lot of code duplication with cpufreq.c, I don't think
> that is going to be acceptable for the upstream maintainers.
It is very complicated to use an common code. Some functions copied
and modified. I'll try to do something in the future.

>> +/**
>> + * xen_cpufreq_notify_transition - call notifier chain and adjust_jiffies
>> + * on frequency transition.
>> + *
>> + * This function calls the transition notifiers and the "adjust_jiffies"
>> + * function. It is called twice on all CPU frequency changes that have
>> + * external effects.
>> + */
>> +void xen_cpufreq_notify_transition(struct cpufreq_freqs *freqs,
>> +                                unsigned int state)
>> +{
>> +     struct cpufreq_policy *policy;
>> +
>> +     BUG_ON(irqs_disabled());
>> +
>> +     freqs->flags = cpufreq_driver->flags;
>> +     pr_debug("notification %u of frequency transition to %u kHz\n",
>> +              state, freqs->new);
>> +
>> +     policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
>> +     switch (state) {
>> +     case CPUFREQ_PRECHANGE:
>> +             /* detect if the driver reported a value as "old frequency"
>> +              * which is not equal to what the cpufreq core thinks is
>> +              * "old frequency".
>> +              */
>> +             if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
>> +                     if ((policy) && (policy->cpu == freqs->cpu) &&
>> +                         (policy->cur) && (policy->cur != freqs->old)) {
>> +                             pr_debug("Warning: CPU frequency is"
>> +                                      " %u, cpufreq assumed %u kHz.\n",
>> +                                      freqs->old, policy->cur);
>> +                             freqs->old = policy->cur;
>> +                     }
>> +             }
>> +             srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list,
>> +                                      CPUFREQ_PRECHANGE, freqs);
>> +             adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
>> +             break;
>> +
>> +     case CPUFREQ_POSTCHANGE:
>> +             adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
>> +             pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new,
>> +                      (unsigned long)freqs->cpu);
>> +             trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
>> +             trace_cpu_frequency(freqs->new, freqs->cpu);
>> +             srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list,
>> +                                      CPUFREQ_POSTCHANGE, freqs);
>> +             if (likely(policy) && likely(policy->cpu == freqs->cpu))
>> +                     policy->cur = freqs->new;
>> +             break;
>> +     }
>> +}
>> +
>> +/*********************************************************************
>> + *                              GOVERNORS                            *
>> + *********************************************************************/
>> +
>> +int __xen_cpufreq_driver_target(struct cpufreq_policy *policy,
>> +                             unsigned int target_freq,
>> +                             unsigned int relation)
>> +{
>> +     int retval = -EINVAL;
>> +     unsigned int old_target_freq = target_freq;
>> +
>> +     /* Make sure that target_freq is within supported range */
>> +     if (target_freq > policy->max)
>> +             target_freq = policy->max;
>> +     if (target_freq < policy->min)
>> +             target_freq = policy->min;
>> +
>> +     pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
>> +              policy->cpu, target_freq, relation, old_target_freq);
>> +
>> +     if (target_freq == policy->cur)
>> +             return 0;
>> +
>> +     if (cpufreq_driver->target)
>> +             retval = cpufreq_driver->target(policy, target_freq,
>> +                                                 relation);
>> +
>> +     return retval;
>> +}
>> +
>> +int xen_cpufreq_driver_target(struct cpufreq_policy *policy,
>> +                           unsigned int target_freq,
>> +                           unsigned int relation)
>> +{
>> +     int ret = -EINVAL;
>> +
>> +     if (!policy)
>> +             goto no_policy;
>> +
>> +     if (unlikely(lock_policy_rwsem_write(policy->cpu)))
>> +             goto fail;
>> +
>> +     ret = __xen_cpufreq_driver_target(policy, target_freq, relation);
>> +
>> +     unlock_policy_rwsem_write(policy->cpu);
>> +
>> +fail:
>> +     xen_cpufreq_cpu_put(policy);
>> +no_policy:
>> +     return ret;
>> +}
>> +
>> +/*********************************************************************
>> + *                    HANDLE COMMANDS FROM XEN                       *
>> + *********************************************************************/
>> +static void cpufreq_work_hnd(struct work_struct *w);
>> +
>> +static struct workqueue_struct *cpufreq_wq;
>> +static DECLARE_WORK(cpufreq_work, cpufreq_work_hnd);
>> +
>> +static void cpufreq_work_hnd(struct work_struct *w)
>> +{
>> +     int ret;
>> +     struct cpufreq_policy *policy;
>> +     struct cpufreq_sh_info *cpufreq_info;
>> +
>> +     cpufreq_info = &HYPERVISOR_shared_info->arch.cpufreq;
>> +
>> +     policy = xen_cpufreq_cpu_get(cpufreq_info->cpu);
>> +     ret = xen_cpufreq_driver_target(policy,
>> +                                     cpufreq_info->freq,
>> +                                     cpufreq_info->relation);
>> +
>> +     cpufreq_info->result = ret;
>> +}
>
> No barriers? No locking?
I'll add barriers in the next patch-set.

>> +static irqreturn_t cpufreq_interrupt(int irq, void *data)
>> +{
>> +     queue_work(cpufreq_wq, &cpufreq_work);
>> +     return IRQ_HANDLED;
>> +}
>> +
>> +/*********************************************************************
>> + *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
>> + *********************************************************************/
>> +
>> +/**
>> + * xen_cpufreq_register_driver - register a CPU Frequency driver
>> + * @driver_data: A struct cpufreq_driver containing the values#
>> + * submitted by the CPU Frequency driver.
>> + *
>> + *   Registers a CPU Frequency driver to this core code. This code
>> + * returns zero on success, -EBUSY when another driver got here first
>> + * (and isn't unregistered in the meantime).
>> + *
>> + */
>> +int xen_cpufreq_register_driver(struct cpufreq_driver *driver_data)
>> +{
>> +     unsigned long flags;
>> +     int ret;
>> +     unsigned int cpu;
>> +     struct cpufreq_frequency_table *table;
>> +     struct cpufreq_policy *policy;
>> +     cpumask_var_t pushed_cpus;
>> +     int irq;
>> +
>> +     if (!xen_nr_cpus)
>> +             return -EPROBE_DEFER;
>> +
>> +     if (!driver_data || !driver_data->verify || !driver_data->init ||
>> +         (!driver_data->target))
>> +             return -EINVAL;
>> +
>> +     pr_debug("trying to register driver %s\n", driver_data->name);
>> +
>> +     if (driver_data->setpolicy)
>> +             driver_data->flags |= CPUFREQ_CONST_LOOPS;
>> +
>> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +
>> +     if (cpufreq_driver) {
>> +             spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +             return -EBUSY;
>> +     }
>> +     cpufreq_driver = driver_data;
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +
>> +     irq = bind_virq_to_irq(VIRQ_CPUFREQ, 0);
>> +     if (irq < 0) {
>> +             pr_err("Bind virq (%d) error (%d)\n", VIRQ_CPUFREQ, irq);
>> +             ret = irq;
>> +             goto err_remove_drv;
>> +     }
>> +
>> +     irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN|IRQ_NOPROBE);
>> +
>> +     ret = request_irq(irq, cpufreq_interrupt, 0,
>> +                        "xen_cpufreq", NULL);
>> +
>> +     if (ret < 0) {
>> +             pr_err("Request irq (%d) error (%d)\n", irq, ret);
>> +             goto err_unbind_from_irqhnd;
>> +     }
>> +
>> +     xen_irq = irq;
>> +
>> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++) {
>> +             ret = xen_cpufreq_add_dev(cpu);
>> +             if (ret)
>> +                     goto err_remove_cpu;
>> +     }
>> +
>> +     if (!zalloc_cpumask_var(&pushed_cpus, GFP_KERNEL))
>> +             goto err_remove_cpu;
>> +
>> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++) {
>> +             if (cpumask_test_cpu(cpu, pushed_cpus))
>> +                     continue;
>> +
>> +             policy = xen_cpufreq_cpu_get(cpu);
>> +             if (!policy) {
>> +                     ret = -EINVAL;
>> +                     goto err_free_cpumask;
>> +             }
>> +
>> +             cpumask_or(pushed_cpus, pushed_cpus, policy->cpus);
>> +             table = cpufreq_frequency_get_table(policy->cpu);
>> +             if (!table) {
>> +                     ret = -EINVAL;
>> +                     goto err_free_cpumask;
>> +             }
>> +
>> +             ret = push_data_to_hypervisor(policy, table);
>> +             if (ret)
>> +                     goto err_free_cpumask;
>> +     }
>> +
>> +     free_cpumask_var(pushed_cpus);
>> +
>> +     pr_debug("driver %s up and running\n", driver_data->name);
>> +
>> +     return 0;
>> +
>> +err_free_cpumask:
>> +     free_cpumask_var(pushed_cpus);
>> +err_remove_cpu:
>> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++)
>> +             cpufreq_remove_dev(cpu);
>> +err_unbind_from_irqhnd:
>> +     unbind_from_irqhandler(irq, NULL);
>> +err_remove_drv:
>> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +     cpufreq_driver = NULL;
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +     return ret;
>> +}
>> +
>> +/**
>> + * xen_cpufreq_unregister_driver - unregister the current CPUFreq driver
>> + *
>> + *    Unregister the current CPUFreq driver. Only call this if you have
>> + * the right to do so, i.e. if you have succeeded in initialising before!
>> + * Returns zero if successful, and -EINVAL if the cpufreq_driver is
>> + * currently not initialised.
>> + */
>> +int xen_cpufreq_unregister_driver(struct cpufreq_driver *driver)
>> +{
>> +     unsigned long flags;
>> +     unsigned int cpu;
>> +
>> +     if (!cpufreq_driver || (driver != cpufreq_driver))
>> +             return -EINVAL;
>> +
>> +     pr_debug("unregistering driver %s\n", driver->name);
>> +
>> +     unbind_from_irqhandler(xen_irq, NULL);
>> +
>> +     for (cpu = 0; cpu < xen_nr_cpus; cpu++)
>> +             cpufreq_remove_dev(cpu);
>> +
>> +     spin_lock_irqsave(&cpufreq_driver_lock, flags);
>> +     cpufreq_driver = NULL;
>> +     spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +
>> +     return 0;
>> +}
>> +
>> +struct cpufreq_drv_ops xen_cpufreq_drv_ops = {
>> +     .notify_transition = xen_cpufreq_notify_transition,
>> +     .register_driver = xen_cpufreq_register_driver,
>> +     .unregister_driver = xen_cpufreq_unregister_driver,
>> +};
>> +
>> +static int __init xen_cpufreq_init(void)
>> +{
>> +     int ret;
>> +     int i;
>> +
>> +     struct xen_sysctl op = {
>> +             .cmd                    = XEN_SYSCTL_physinfo,
>> +             .interface_version      = XEN_SYSCTL_INTERFACE_VERSION,
>> +     };
>> +
>> +     ret = HYPERVISOR_sysctl(&op);
>> +     if (ret) {
>> +             pr_err("Hypervisor get physinfo error (%d)\n", ret);
>> +             return ret;
>> +     }
>> +
>> +     xen_nr_cpus = op.u.physinfo.nr_cpus;
>> +     if (xen_nr_cpus == 0 || xen_nr_cpus > NR_CPUS) {
>> +             xen_nr_cpus = 0;
>> +             pr_err("Wrong CPUs amount (%d)\n", xen_nr_cpus);
>> +             return -EINVAL;
>> +     }
>> +
>> +     for (i = 0; i < xen_nr_cpus; i++) {
>> +             per_cpu(cpufreq_policy_cpu, i) = -1;
>> +             init_rwsem(&per_cpu(cpu_policy_rwsem, i));
>> +     }
>> +
>> +     cpufreq_wq = alloc_workqueue("xen_cpufreq", 0, 1);
>> +     if (!cpufreq_wq) {
>> +             pr_err("Create workqueue error\n");
>> +             ret = -ENOMEM;
>> +             goto err_create_wq;
>> +     }
>> +
>> +     return 0;
>> +
>> +err_create_wq:
>> +     xen_nr_cpus = 0;
>> +     return ret;
>> +}
>> +
>> +MODULE_AUTHOR("Oleksandr Dmytryshyn 
>> <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>");
>> +MODULE_DESCRIPTION("Xen cpufreq driver which uploads PM data to Xen 
>> hypervisor");
>> +MODULE_LICENSE("GPL");
>> +
>> +core_initcall(xen_cpufreq_init);
>> diff --git a/include/xen/interface/platform.h 
>> b/include/xen/interface/platform.h
>> index c57d5f6..ee3b154 100644
>> --- a/include/xen/interface/platform.h
>> +++ b/include/xen/interface/platform.h
>> @@ -209,6 +209,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
>>  #define XEN_PX_PSS   2
>>  #define XEN_PX_PPC   4
>>  #define XEN_PX_PSD   8
>> +#define XEN_PX_DATA  16
>>
>>  struct xen_power_register {
>>       uint32_t     space_id;
>> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
>> index cf64566..9133110 100644
>> --- a/include/xen/interface/xen.h
>> +++ b/include/xen/interface/xen.h
>> @@ -81,6 +81,7 @@
>>  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
>>  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
>>  #define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
>> +#define VIRQ_CPUFREQ    14 /* (DOM0) Notify cpufreq driver                */
>>
>>  /* Architecture-specific VIRQ definitions. */
>>  #define VIRQ_ARCH_0    16

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.