[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC PATCH v4 9/9] xen/arm: cpufreq: add xen-cpufreq driver
On Tue, 4 Nov 2014, Oleksandr Dmytryshyn wrote: > Xen changes frequencies on CPUs using this high-level > cpufreq driver. > > Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx> You CC the wrong email address for Rafael in the entire series. > drivers/cpufreq/Kconfig | 20 + > drivers/cpufreq/Makefile | 1 + > drivers/cpufreq/cpufreq_drv_ops.c | 13 +- > drivers/cpufreq/cpufreq_drv_ops.h | 4 + > drivers/cpufreq/xen-cpufreq.c | 869 > ++++++++++++++++++++++++++++++++++++++ > include/xen/interface/platform.h | 1 + > include/xen/interface/xen.h | 1 + > 7 files changed, 907 insertions(+), 2 deletions(-) > create mode 100644 drivers/cpufreq/xen-cpufreq.c > > diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig > index f5a8f84..4847d8a 100644 > --- a/drivers/cpufreq/Kconfig > +++ b/drivers/cpufreq/Kconfig > @@ -19,6 +19,26 @@ config CPU_FREQ > > If in doubt, say N. > > +config XEN_CPUFREQ > + bool "Xen Cpufreq driver" > + depends on XEN_DOM0 > + depends on !CPUMASK_OFFSTACK > + default n > + select CPUFREQ_DRV_OPS > + help > + This driver uploads Power Management information to the Xen > + hypervisor and changes CPUs frequency using CPU Frequency scaling > + drivers. > + > + To do that the driver uses CPU Frequency scaling drivers to parse > + the Power Management data and uploads said information to the Xen > + hypervisor. Then the Xen hypervisor can select the proper Pxx states. > + > + Then the Xen hypervisor can change CPUs frequency by giving commands > + via this driver to the CPU Frequency scaling driver. > + > + If in doubt, say N. > + > if CPUFREQ_DRV_OPS > > config CPU_FREQ_TABLE > diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile > index f12a0d3..c8d5037 100644 > --- a/drivers/cpufreq/Makefile > +++ b/drivers/cpufreq/Makefile > @@ -1,5 +1,6 @@ > # CPUfreq core > obj-$(CONFIG_CPU_FREQ) += cpufreq.o > +obj-$(CONFIG_XEN_CPUFREQ) += xen-cpufreq.o > obj-$(CONFIG_CPUFREQ_DRV_OPS) += cpufreq_drv_ops.o > # CPUfreq stats > obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o > diff --git a/drivers/cpufreq/cpufreq_drv_ops.c > b/drivers/cpufreq/cpufreq_drv_ops.c > index c971442..71c3357 100644 > --- a/drivers/cpufreq/cpufreq_drv_ops.c > +++ b/drivers/cpufreq/cpufreq_drv_ops.c > @@ -18,6 +18,8 @@ > #include <linux/init.h> > #include <linux/export.h> > > +#include <xen/xen.h> > + > static struct cpufreq_drv_ops *ops; > > struct kobject *get_cpufreq_global_kobject(void) > @@ -177,10 +179,17 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); > > static int __init cpufreq_drv_ops_init(void) > { > + if (xen_initial_domain()) { > +#ifdef CONFIG_XEN_CPUFREQ > + ops = &xen_cpufreq_drv_ops; > + pr_debug("using xen_cpufreq_drv_ops\n"); > +#endif > + } else { > #ifdef CONFIG_CPU_FREQ > - ops = &kern_cpufreq_drv_ops; > - pr_debug("using kern_cpufreq_drv_ops\n"); > + ops = &kern_cpufreq_drv_ops; > + pr_debug("using kern_cpufreq_drv_ops\n"); > #endif > + } > > return 0; > } > diff --git a/drivers/cpufreq/cpufreq_drv_ops.h > b/drivers/cpufreq/cpufreq_drv_ops.h > index 5cc8e05..d02d509 100644 > --- a/drivers/cpufreq/cpufreq_drv_ops.h > +++ b/drivers/cpufreq/cpufreq_drv_ops.h > @@ -47,4 +47,8 @@ struct cpufreq_drv_ops { > extern struct cpufreq_drv_ops kern_cpufreq_drv_ops; > #endif > > +#ifdef CONFIG_XEN_CPUFREQ > +extern struct cpufreq_drv_ops xen_cpufreq_drv_ops; > +#endif > + > #endif /* _CPUFREQ_DRV_OPS_H */ > diff --git a/drivers/cpufreq/xen-cpufreq.c b/drivers/cpufreq/xen-cpufreq.c > new file mode 100644 > index 0000000..21062c7 > --- /dev/null > +++ b/drivers/cpufreq/xen-cpufreq.c > @@ -0,0 +1,869 @@ > +/* > + * Copyright (C) 2001 Russell King > + * (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx> > + * > + * Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx> > + * Added handling for CPU hotplug > + * Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx> > + * Fix handling for CPU hotplug -- affected CPUs > + * > + * (C) 2014 GlobalLogic Inc. > + * > + * Based on drivers/cpufreq/cpufreq.c > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + */ > + > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/init.h> > +#include <linux/notifier.h> > +#include <linux/types.h> > +#include <linux/slab.h> > +#include <linux/mutex.h> > +#include <linux/irq.h> > +#include <linux/workqueue.h> > +#include <linux/cpufreq.h> > + > +#include <trace/events/power.h> > + > +#include <xen/xen.h> > +#include <xen/events.h> > +#include <xen/interface/xen.h> > +#include <xen/interface/platform.h> > +#include <xen/interface/sysctl.h> > +#include <asm/xen/hypercall.h> > +#include <asm/xen/hypervisor.h> > + > +#include "cpufreq_drv_ops.h" > + > +static int xen_nr_cpus; > +static int xen_irq; > + > +#define for_each_xen_cpu(cpu, mask) \ > + for ((cpu) = -1; \ > + (cpu) = cpumask_next((cpu), (mask)), \ > + (cpu) < xen_nr_cpus;) > + > +static struct cpufreq_driver *cpufreq_driver; > +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); > + > +static DEFINE_SPINLOCK(cpufreq_driver_lock); > + > +/* > + * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure > + * all cpufreq/hotplug/workqueue/etc related lock issues. > + * > + * The rules for this semaphore: > + * - Any routine that wants to read from the policy structure will > + * do a down_read on this semaphore. > + * - Any routine that will write to the policy structure and/or may take away > + * the policy altogether (eg. CPU hotplug), will hold this lock in write > + * mode before doing so. > + * > + * Additional rules: > + * - Governor routines that can be called in cpufreq hotplug path should not > + * take this sem as top level hotplug notifier handler takes this. > + * - Lock should not be held across > + * __cpufreq_governor(data, CPUFREQ_GOV_STOP); > + */ > +static DEFINE_PER_CPU(int, cpufreq_policy_cpu); > +static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); > + > +#define lock_policy_rwsem(mode, cpu) \ > +static int lock_policy_rwsem_##mode \ > +(int cpu) \ > +{ \ > + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ > + BUG_ON(policy_cpu == -1); \ > + down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ > + \ > + return 0; \ > +} > + > +lock_policy_rwsem(write, cpu); > + > +static void unlock_policy_rwsem_write(int cpu) > +{ > + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); > + BUG_ON(policy_cpu == -1); > + up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); > +} > + > +/** > + * The "transition" notifier list for kernel code that needs to handle > + * changes to devices when the CPU clock speed changes. > + * The mutex locks this list. > + */ > +static struct srcu_notifier_head xen_cpufreq_transition_notifier_list; > + > +static bool init_cpufreq_transition_notifier_list_called; > +static int __init init_cpufreq_transition_notifier_list(void) > +{ > + srcu_init_notifier_head(&xen_cpufreq_transition_notifier_list); > + init_cpufreq_transition_notifier_list_called = true; > + return 0; > +} > +pure_initcall(init_cpufreq_transition_notifier_list); > + > +static struct cpufreq_policy *xen_cpufreq_cpu_get(unsigned int cpu) > +{ > + struct cpufreq_policy *data = NULL; > + unsigned long flags; > + > + if (cpu >= xen_nr_cpus) > + goto err_out; > + > + /* get the cpufreq driver */ > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + > + if (!cpufreq_driver) > + goto err_out_unlock; > + > + /* get the CPU */ > + data = per_cpu(cpufreq_cpu_data, cpu); > + > +err_out_unlock: > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > +err_out: > + return data; > +} > + > +static void xen_cpufreq_cpu_put(struct cpufreq_policy *data) > +{ > + module_put(cpufreq_driver->owner); > +} > + > +static int push_data_to_hypervisor(struct cpufreq_policy *policy, > + struct cpufreq_frequency_table *table) > +{ > + int ret = 0; > + unsigned int i; > + unsigned int cpu; > + uint32_t platform_limit = 0; > + unsigned int max_freq = 0; > + unsigned int state_count = 0; > + unsigned int prev_freq = 0; > + struct xen_processor_px *dst_states; > + struct xen_processor_performance *dst_perf; > + struct xen_platform_op op = { > + .cmd = XENPF_set_processor_pminfo, > + .interface_version = XENPF_INTERFACE_VERSION, > + .u.set_pminfo.type = XEN_PM_PX, > + }; > + > + dst_perf = &op.u.set_pminfo.perf; > + > + /* Check freq table and find max frequency */ > + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { > + unsigned int freq = table[i].frequency; > + if (freq == CPUFREQ_ENTRY_INVALID) > + continue; > + > + if (table[i].index != state_count || freq <= prev_freq) { > + pr_err("Frequency table format error\n"); > + return -EINVAL; > + } > + > + prev_freq = freq; > + state_count++; > + if (freq > max_freq) > + max_freq = freq; > + } > + > + if (!state_count) > + return -EINVAL; > + > + dst_perf->state_count = state_count; > + > + dst_states = kcalloc(state_count, > + sizeof(struct xen_processor_px), GFP_KERNEL); > + > + if (!dst_states) > + return -ENOMEM; > + > + set_xen_guest_handle(dst_perf->states, dst_states); > + > + /* > + * Freq table should start from lower values > + * dst_states should start from higer values > + */ > + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { > + unsigned int freq = table[i].frequency; > + unsigned int tbl_index = state_count - 1 - table[i].index; > + if (freq == CPUFREQ_ENTRY_INVALID) > + continue; > + > + if (freq == max_freq) > + platform_limit = tbl_index; > + > + dst_states[tbl_index].core_frequency = freq / 1000; > + dst_states[tbl_index].transition_latency = > + policy->cpuinfo.transition_latency / 1000; > + } > + > + dst_perf->shared_type = policy->shared_type; > + dst_perf->platform_limit = platform_limit; > + dst_perf->domain_info.domain = policy->cpu; > + dst_perf->domain_info.num_processors = xen_nr_cpus; > + dst_perf->flags = XEN_PX_DATA; > + > + for_each_xen_cpu(cpu, policy->cpus) { > + op.u.set_pminfo.id = cpu; > + ret = HYPERVISOR_dom0_op(&op); > + if (ret) { > + pr_debug("Hypervisor error(%d) for CPU%u\n", ret, cpu); > + goto err_free_states; > + } > + pr_debug("CPU%u - P-states uploaded\n", cpu); > + > + for (i = 0; i < dst_perf->state_count; i++) { > + pr_debug(" state %d: %d MHz, %d uS\n", > + i, (u32) dst_states[i].core_frequency, > + (u32) dst_states[i].transition_latency); > + } > + } > + > +err_free_states: > + kfree(dst_states); > + return ret; > +} > + > +/* > + * Returns: > + * Negative: Failure > + * 0: Success > + * Positive: When we have a managed CPU and the sysfs got symlinked > + */ > +static int xen_cpufreq_add_dev_policy(unsigned int cpu, > + struct cpufreq_policy *policy) > +{ > + int ret = 0; > +#ifdef CONFIG_SMP > + unsigned long flags; > + unsigned int j; > + > + for_each_cpu(j, policy->cpus) { > + struct cpufreq_policy *managed_policy; > + > + if (cpu == j) > + continue; > + > + /* Check for existing affected CPUs. > + * They may not be aware of it due to CPU Hotplug. > + * cpufreq_cpu_put is called when the device is removed > + * in __cpufreq_remove_dev() > + */ > + managed_policy = xen_cpufreq_cpu_get(j); > + if (unlikely(managed_policy)) { > + /* Set proper policy_cpu */ > + unlock_policy_rwsem_write(cpu); > + per_cpu(cpufreq_policy_cpu, cpu) = > + managed_policy->cpu; > + > + if (lock_policy_rwsem_write(cpu) < 0) { > + /* Should not go through policy unlock path */ > + if (cpufreq_driver->exit) > + cpufreq_driver->exit(policy); > + xen_cpufreq_cpu_put(managed_policy); > + return -EBUSY; > + } > + > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + cpumask_copy(managed_policy->cpus, policy->cpus); > + per_cpu(cpufreq_cpu_data, cpu) = managed_policy; > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + > + pr_debug("CPU already managed, adding link\n"); > + > + /* > + * Success. We only needed to be added to the mask. > + * Call driver->exit() because only the cpu parent of > + * the kobj needed to call init(). > + */ > + if (cpufreq_driver->exit) > + cpufreq_driver->exit(policy); > + > + return 1; > + } > + } > +#endif > + return ret; > +} > + > +/** > + * xen_cpufreq_add_dev - add a CPU device > + * > + * Adds the cpufreq interface for a CPU device. > + */ > +static int xen_cpufreq_add_dev(unsigned int cpu) > +{ > + int ret = 0; > + struct cpufreq_policy *policy; > + unsigned long flags; > + unsigned int j; > + > + pr_debug("adding CPU %u\n", cpu); > + > +#ifdef CONFIG_SMP > + /* check whether a different CPU already registered this > + * CPU because it is in the same boat. */ > + policy = xen_cpufreq_cpu_get(cpu); > + if (unlikely(policy)) { > + xen_cpufreq_cpu_put(policy); > + return 0; > + } > +#endif > + > + if (!try_module_get(cpufreq_driver->owner)) { > + ret = -EINVAL; > + goto module_out; > + } > + > + ret = -ENOMEM; > + policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); > + if (!policy) > + goto nomem_out; > + > + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) > + goto err_free_policy; > + > + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) > + goto err_free_cpumask; > + > + policy->cpu = cpu; > + cpumask_copy(policy->cpus, cpumask_of(cpu)); > + > + /* Initially set CPU itself as the policy_cpu */ > + per_cpu(cpufreq_policy_cpu, cpu) = cpu; > + ret = (lock_policy_rwsem_write(cpu) < 0); > + WARN_ON(ret); > + > + /* call driver. From then on the cpufreq must be able > + * to accept all calls to ->verify and ->setpolicy for this CPU > + */ > + ret = cpufreq_driver->init(policy); > + if (ret) { > + pr_debug("initialization failed\n"); > + goto err_unlock_policy; > + } > + ret = xen_cpufreq_add_dev_policy(cpu, policy); > + if (ret) { > + if (ret > 0) > + /* This is a managed cpu, symlink created, > + exit with 0 */ > + ret = 0; > + goto err_unlock_policy; > + } > + > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + for_each_cpu(j, policy->cpus) { > + per_cpu(cpufreq_cpu_data, j) = policy; > + per_cpu(cpufreq_policy_cpu, j) = policy->cpu; > + } > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + > + unlock_policy_rwsem_write(cpu); > + > + module_put(cpufreq_driver->owner); > + pr_debug("initialization complete\n"); > + > + return 0; > + > +err_unlock_policy: > + unlock_policy_rwsem_write(cpu); > + free_cpumask_var(policy->related_cpus); > +err_free_cpumask: > + free_cpumask_var(policy->cpus); > +err_free_policy: > + kfree(policy); > +nomem_out: > + module_put(cpufreq_driver->owner); > +module_out: > + return ret; > +} > + > +/** > + * __cpufreq_remove_dev - remove a CPU device > + * > + * Removes the cpufreq interface for a CPU device. > + * Caller should already have policy_rwsem in write mode for this CPU. > + * This routine frees the rwsem before returning. > + */ > +static int __cpufreq_remove_dev(unsigned int cpu) > +{ > + unsigned long flags; > + struct cpufreq_policy *data; > +#ifdef CONFIG_SMP > + unsigned int j; > +#endif > + > + pr_debug("unregistering CPU %u\n", cpu); > + > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + data = per_cpu(cpufreq_cpu_data, cpu); > + > + if (!data) { > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + unlock_policy_rwsem_write(cpu); > + return -EINVAL; > + } > + per_cpu(cpufreq_cpu_data, cpu) = NULL; > + > + > +#ifdef CONFIG_SMP > + /* if this isn't the CPU which is the parent of the kobj, we > + * only need to unlink, put and exit > + */ > + if (unlikely(cpu != data->cpu)) { > + pr_debug("removing link\n"); > + cpumask_clear_cpu(cpu, data->cpus); > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + xen_cpufreq_cpu_put(data); > + unlock_policy_rwsem_write(cpu); > + return 0; > + } > +#endif > + > +#ifdef CONFIG_SMP > + > + /* if we have other CPUs still registered, we need to unlink them, > + * or else wait_for_completion below will lock up. Clean the > + * per_cpu(cpufreq_cpu_data) while holding the lock, and remove > + * the sysfs links afterwards. > + */ > + if (unlikely(cpumask_weight(data->cpus) > 1)) { > + for_each_cpu(j, data->cpus) { > + if (j == cpu) > + continue; > + per_cpu(cpufreq_cpu_data, j) = NULL; > + } > + } > + > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + > + if (unlikely(cpumask_weight(data->cpus) > 1)) { > + for_each_cpu(j, data->cpus) { > + if (j == cpu) > + continue; > + pr_debug("removing link for cpu %u\n", j); > + unlock_policy_rwsem_write(cpu); > + lock_policy_rwsem_write(cpu); > + xen_cpufreq_cpu_put(data); > + } > + } > +#else > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > +#endif > + > + unlock_policy_rwsem_write(cpu); > + > + lock_policy_rwsem_write(cpu); > + if (cpufreq_driver->exit) > + cpufreq_driver->exit(data); > + unlock_policy_rwsem_write(cpu); > + > + free_cpumask_var(data->related_cpus); > + free_cpumask_var(data->cpus); > + kfree(data); > + > + return 0; > +} > + > +static int cpufreq_remove_dev(unsigned int cpu) > +{ > + int retval; > + > + if (unlikely(lock_policy_rwsem_write(cpu))) > + BUG(); > + > + retval = __cpufreq_remove_dev(cpu); > + return retval; > +} > + > +/********************************************************************* > + * EXTERNALLY AFFECTING FREQUENCY CHANGES * > + *********************************************************************/ > + > +/** > + * adjust_jiffies - adjust the system "loops_per_jiffy" > + * > + * This function alters the system "loops_per_jiffy" for the clock > + * speed change. Note that loops_per_jiffy cannot be updated on SMP > + * systems as each CPU might be scaled differently. So, use the arch > + * per-CPU loops_per_jiffy value wherever possible. > + */ > +#ifndef CONFIG_SMP > +static unsigned long l_p_j_ref; > +static unsigned int l_p_j_ref_freq; > + > +static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) > +{ > + if (ci->flags & CPUFREQ_CONST_LOOPS) > + return; > + > + if (!l_p_j_ref_freq) { > + l_p_j_ref = loops_per_jiffy; > + l_p_j_ref_freq = ci->old; > + pr_debug("saving %lu as reference value for loops_per_jiffy; " > + "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); > + } > + if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) || > + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { > + loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, > + ci->new); > + pr_debug("scaling loops_per_jiffy to %lu " > + "for frequency %u kHz\n", loops_per_jiffy, ci->new); > + } > +} > +#else > +static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs > *ci) > +{ > + return; > +} > +#endif There is quite a lot of code duplication with cpufreq.c, I don't think that is going to be acceptable for the upstream maintainers. > +/** > + * xen_cpufreq_notify_transition - call notifier chain and adjust_jiffies > + * on frequency transition. > + * > + * This function calls the transition notifiers and the "adjust_jiffies" > + * function. It is called twice on all CPU frequency changes that have > + * external effects. > + */ > +void xen_cpufreq_notify_transition(struct cpufreq_freqs *freqs, > + unsigned int state) > +{ > + struct cpufreq_policy *policy; > + > + BUG_ON(irqs_disabled()); > + > + freqs->flags = cpufreq_driver->flags; > + pr_debug("notification %u of frequency transition to %u kHz\n", > + state, freqs->new); > + > + policy = per_cpu(cpufreq_cpu_data, freqs->cpu); > + switch (state) { > + case CPUFREQ_PRECHANGE: > + /* detect if the driver reported a value as "old frequency" > + * which is not equal to what the cpufreq core thinks is > + * "old frequency". > + */ > + if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { > + if ((policy) && (policy->cpu == freqs->cpu) && > + (policy->cur) && (policy->cur != freqs->old)) { > + pr_debug("Warning: CPU frequency is" > + " %u, cpufreq assumed %u kHz.\n", > + freqs->old, policy->cur); > + freqs->old = policy->cur; > + } > + } > + srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list, > + CPUFREQ_PRECHANGE, freqs); > + adjust_jiffies(CPUFREQ_PRECHANGE, freqs); > + break; > + > + case CPUFREQ_POSTCHANGE: > + adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); > + pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new, > + (unsigned long)freqs->cpu); > + trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); > + trace_cpu_frequency(freqs->new, freqs->cpu); > + srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list, > + CPUFREQ_POSTCHANGE, freqs); > + if (likely(policy) && likely(policy->cpu == freqs->cpu)) > + policy->cur = freqs->new; > + break; > + } > +} > + > +/********************************************************************* > + * GOVERNORS * > + *********************************************************************/ > + > +int __xen_cpufreq_driver_target(struct cpufreq_policy *policy, > + unsigned int target_freq, > + unsigned int relation) > +{ > + int retval = -EINVAL; > + unsigned int old_target_freq = target_freq; > + > + /* Make sure that target_freq is within supported range */ > + if (target_freq > policy->max) > + target_freq = policy->max; > + if (target_freq < policy->min) > + target_freq = policy->min; > + > + pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", > + policy->cpu, target_freq, relation, old_target_freq); > + > + if (target_freq == policy->cur) > + return 0; > + > + if (cpufreq_driver->target) > + retval = cpufreq_driver->target(policy, target_freq, > + relation); > + > + return retval; > +} > + > +int xen_cpufreq_driver_target(struct cpufreq_policy *policy, > + unsigned int target_freq, > + unsigned int relation) > +{ > + int ret = -EINVAL; > + > + if (!policy) > + goto no_policy; > + > + if (unlikely(lock_policy_rwsem_write(policy->cpu))) > + goto fail; > + > + ret = __xen_cpufreq_driver_target(policy, target_freq, relation); > + > + unlock_policy_rwsem_write(policy->cpu); > + > +fail: > + xen_cpufreq_cpu_put(policy); > +no_policy: > + return ret; > +} > + > +/********************************************************************* > + * HANDLE COMMANDS FROM XEN * > + *********************************************************************/ > +static void cpufreq_work_hnd(struct work_struct *w); > + > +static struct workqueue_struct *cpufreq_wq; > +static DECLARE_WORK(cpufreq_work, cpufreq_work_hnd); > + > +static void cpufreq_work_hnd(struct work_struct *w) > +{ > + int ret; > + struct cpufreq_policy *policy; > + struct cpufreq_sh_info *cpufreq_info; > + > + cpufreq_info = &HYPERVISOR_shared_info->arch.cpufreq; > + > + policy = xen_cpufreq_cpu_get(cpufreq_info->cpu); > + ret = xen_cpufreq_driver_target(policy, > + cpufreq_info->freq, > + cpufreq_info->relation); > + > + cpufreq_info->result = ret; > +} No barriers? No locking? > +static irqreturn_t cpufreq_interrupt(int irq, void *data) > +{ > + queue_work(cpufreq_wq, &cpufreq_work); > + return IRQ_HANDLED; > +} > + > +/********************************************************************* > + * REGISTER / UNREGISTER CPUFREQ DRIVER * > + *********************************************************************/ > + > +/** > + * xen_cpufreq_register_driver - register a CPU Frequency driver > + * @driver_data: A struct cpufreq_driver containing the values# > + * submitted by the CPU Frequency driver. > + * > + * Registers a CPU Frequency driver to this core code. This code > + * returns zero on success, -EBUSY when another driver got here first > + * (and isn't unregistered in the meantime). > + * > + */ > +int xen_cpufreq_register_driver(struct cpufreq_driver *driver_data) > +{ > + unsigned long flags; > + int ret; > + unsigned int cpu; > + struct cpufreq_frequency_table *table; > + struct cpufreq_policy *policy; > + cpumask_var_t pushed_cpus; > + int irq; > + > + if (!xen_nr_cpus) > + return -EPROBE_DEFER; > + > + if (!driver_data || !driver_data->verify || !driver_data->init || > + (!driver_data->target)) > + return -EINVAL; > + > + pr_debug("trying to register driver %s\n", driver_data->name); > + > + if (driver_data->setpolicy) > + driver_data->flags |= CPUFREQ_CONST_LOOPS; > + > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + > + if (cpufreq_driver) { > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + return -EBUSY; > + } > + cpufreq_driver = driver_data; > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + > + irq = bind_virq_to_irq(VIRQ_CPUFREQ, 0); > + if (irq < 0) { > + pr_err("Bind virq (%d) error (%d)\n", VIRQ_CPUFREQ, irq); > + ret = irq; > + goto err_remove_drv; > + } > + > + irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN|IRQ_NOPROBE); > + > + ret = request_irq(irq, cpufreq_interrupt, 0, > + "xen_cpufreq", NULL); > + > + if (ret < 0) { > + pr_err("Request irq (%d) error (%d)\n", irq, ret); > + goto err_unbind_from_irqhnd; > + } > + > + xen_irq = irq; > + > + for (cpu = 0; cpu < xen_nr_cpus; cpu++) { > + ret = xen_cpufreq_add_dev(cpu); > + if (ret) > + goto err_remove_cpu; > + } > + > + if (!zalloc_cpumask_var(&pushed_cpus, GFP_KERNEL)) > + goto err_remove_cpu; > + > + for (cpu = 0; cpu < xen_nr_cpus; cpu++) { > + if (cpumask_test_cpu(cpu, pushed_cpus)) > + continue; > + > + policy = xen_cpufreq_cpu_get(cpu); > + if (!policy) { > + ret = -EINVAL; > + goto err_free_cpumask; > + } > + > + cpumask_or(pushed_cpus, pushed_cpus, policy->cpus); > + table = cpufreq_frequency_get_table(policy->cpu); > + if (!table) { > + ret = -EINVAL; > + goto err_free_cpumask; > + } > + > + ret = push_data_to_hypervisor(policy, table); > + if (ret) > + goto err_free_cpumask; > + } > + > + free_cpumask_var(pushed_cpus); > + > + pr_debug("driver %s up and running\n", driver_data->name); > + > + return 0; > + > +err_free_cpumask: > + free_cpumask_var(pushed_cpus); > +err_remove_cpu: > + for (cpu = 0; cpu < xen_nr_cpus; cpu++) > + cpufreq_remove_dev(cpu); > +err_unbind_from_irqhnd: > + unbind_from_irqhandler(irq, NULL); > +err_remove_drv: > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + cpufreq_driver = NULL; > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + return ret; > +} > + > +/** > + * xen_cpufreq_unregister_driver - unregister the current CPUFreq driver > + * > + * Unregister the current CPUFreq driver. Only call this if you have > + * the right to do so, i.e. if you have succeeded in initialising before! > + * Returns zero if successful, and -EINVAL if the cpufreq_driver is > + * currently not initialised. > + */ > +int xen_cpufreq_unregister_driver(struct cpufreq_driver *driver) > +{ > + unsigned long flags; > + unsigned int cpu; > + > + if (!cpufreq_driver || (driver != cpufreq_driver)) > + return -EINVAL; > + > + pr_debug("unregistering driver %s\n", driver->name); > + > + unbind_from_irqhandler(xen_irq, NULL); > + > + for (cpu = 0; cpu < xen_nr_cpus; cpu++) > + cpufreq_remove_dev(cpu); > + > + spin_lock_irqsave(&cpufreq_driver_lock, flags); > + cpufreq_driver = NULL; > + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); > + > + return 0; > +} > + > +struct cpufreq_drv_ops xen_cpufreq_drv_ops = { > + .notify_transition = xen_cpufreq_notify_transition, > + .register_driver = xen_cpufreq_register_driver, > + .unregister_driver = xen_cpufreq_unregister_driver, > +}; > + > +static int __init xen_cpufreq_init(void) > +{ > + int ret; > + int i; > + > + struct xen_sysctl op = { > + .cmd = XEN_SYSCTL_physinfo, > + .interface_version = XEN_SYSCTL_INTERFACE_VERSION, > + }; > + > + ret = HYPERVISOR_sysctl(&op); > + if (ret) { > + pr_err("Hypervisor get physinfo error (%d)\n", ret); > + return ret; > + } > + > + xen_nr_cpus = op.u.physinfo.nr_cpus; > + if (xen_nr_cpus == 0 || xen_nr_cpus > NR_CPUS) { > + xen_nr_cpus = 0; > + pr_err("Wrong CPUs amount (%d)\n", xen_nr_cpus); > + return -EINVAL; > + } > + > + for (i = 0; i < xen_nr_cpus; i++) { > + per_cpu(cpufreq_policy_cpu, i) = -1; > + init_rwsem(&per_cpu(cpu_policy_rwsem, i)); > + } > + > + cpufreq_wq = alloc_workqueue("xen_cpufreq", 0, 1); > + if (!cpufreq_wq) { > + pr_err("Create workqueue error\n"); > + ret = -ENOMEM; > + goto err_create_wq; > + } > + > + return 0; > + > +err_create_wq: > + xen_nr_cpus = 0; > + return ret; > +} > + > +MODULE_AUTHOR("Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>"); > +MODULE_DESCRIPTION("Xen cpufreq driver which uploads PM data to Xen > hypervisor"); > +MODULE_LICENSE("GPL"); > + > +core_initcall(xen_cpufreq_init); > diff --git a/include/xen/interface/platform.h > b/include/xen/interface/platform.h > index c57d5f6..ee3b154 100644 > --- a/include/xen/interface/platform.h > +++ b/include/xen/interface/platform.h > @@ -209,6 +209,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); > #define XEN_PX_PSS 2 > #define XEN_PX_PPC 4 > #define XEN_PX_PSD 8 > +#define XEN_PX_DATA 16 > > struct xen_power_register { > uint32_t space_id; > diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h > index cf64566..9133110 100644 > --- a/include/xen/interface/xen.h > +++ b/include/xen/interface/xen.h > @@ -81,6 +81,7 @@ > #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ > #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ > #define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */ > +#define VIRQ_CPUFREQ 14 /* (DOM0) Notify cpufreq driver */ > > /* Architecture-specific VIRQ definitions. */ > #define VIRQ_ARCH_0 16 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |