[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH][Retry 1] 4/4: cpufreq/PowerNow! in Xen: PowerNow! changes
Enable cpufreq support in Xen for AMD Operton processors by: 1) Allowing the PowerNow! driver in dom0 to write to the PowerNow! MSRs. 2) Adding the cpufreq notifier chain to time-xen.c in dom0. On a frequency change, a platform hypercall is performed to scale the frequency multiplier in the hypervisor. 3) Adding a platform hypercall to the hypervisor the scale the frequency multiplier and reset the time stamps so that next calibration remains reasonably correct. 4) Adding the cpufreq Xen option which pins the VCPUs to the physical CPU cores. Patch 1 covers the frequency scaling platform call in Xen. Patch 2 allows MSR accesses from the PowerNow! driver. Patch 3 covers the frequency scaling platform call in Linux. Patch 4 covers the changes necessary to the PowerNow! driver to make it correctly associate shared cores under Xen. This code can be readily expanded to cover Intel or other non-AMD processors by modifying xen/arch/x8/traps.c to allow the appropriate MSR accesses. Caveat: currently, this code does not support the in-kernel ondemand cpufreq governor. Dom0 must run a userspace daemon to monitor the utilization of the physical cpus with the getcpuinfo sysctl hypercall. Caveat 2: Even though the clock multipliers are being scaled and recorded correctly in both dom0 and the hypervisor, time errors appear immediately after a frequency change. They are not more likely when the frequency is constant. Signed-off-by: Mark Langsdorf <mark.langsdorf@xxxxxxx> diff -r 05c22f282023 arch/i386/kernel/cpu/cpufreq/powernow-k8.c --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Tue Aug 14 16:20:55 2007 +0100 +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Thu Aug 30 12:21:35 2007 -0500 @@ -738,6 +738,7 @@ static int find_psb_table(struct powerno data->numps = psb->numps; dprintk("numpstates: 0x%x\n", data->numps); + data->starting_core_affinity = cpumask_of_cpu(0); return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); } /* @@ -758,15 +759,43 @@ static int find_psb_table(struct powerno #ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { - if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) + if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE)) return; - data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; + data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK; +} + +static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; +static int preregister_valid = 0; + +static int powernow_k8_cpu_preinit_acpi() +{ + int i; + struct acpi_processor_performance *data; + for_each_possible_cpu(i) { + data = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + if (!data) { + int j; + for_each_possible_cpu(j) { + kfree(acpi_perf_data[j]); + acpi_perf_data[j] = NULL; + } + return -ENODEV; + } + acpi_perf_data[i] = data; + } + + if (acpi_processor_preregister_performance(acpi_perf_data)) + return -ENODEV; + else + preregister_valid = 1; + return 0; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) @@ -774,28 +803,29 @@ static int powernow_k8_cpu_init_acpi(str struct cpufreq_frequency_table *powernow_table; int ret_val; - if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { + data->acpi_data = acpi_perf_data[data->cpu]; + if (acpi_processor_register_performance(data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); return -EIO; } /* verify the data contained in the ACPI structures */ - if (data->acpi_data.state_count <= 1) { + if (data->acpi_data->state_count <= 1) { dprintk("No ACPI P-States\n"); goto err_out; } - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data.control_register.space_id, - data->acpi_data.status_register.space_id); + data->acpi_data->control_register.space_id, + data->acpi_data->status_register.space_id); goto err_out; } /* fill in data->powernow_table */ powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->acpi_data.state_count + 1)), GFP_KERNEL); + * (data->acpi_data->state_count + 1)), GFP_KERNEL); if (!powernow_table) { dprintk("powernow_table memory alloc failure\n"); goto err_out; @@ -808,28 +838,43 @@ static int powernow_k8_cpu_init_acpi(str if (ret_val) goto err_out_mem; - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; - powernow_table[data->acpi_data.state_count].index = 0; + powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data->state_count].index = 0; data->powernow_table = powernow_table; /* fill in data */ - data->numps = data->acpi_data.state_count; + data->numps = data->acpi_data->state_count; print_basics(data); powernow_k8_acpi_pst_values(data, 0); /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); + /* determine affinity, from ACPI if available */ + if (preregister_valid) { + if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) || + (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY)) + data->starting_core_affinity = data->acpi_data->shared_cpu_map; + else + data->starting_core_affinity = cpumask_of_cpu(data->cpu); + } else { + /* best guess from family if not */ + if (cpu_family == CPU_HW_PSTATE) + data->starting_core_affinity = cpumask_of_cpu(data->cpu); + else + data->starting_core_affinity = cpu_core_map[data->cpu]; + } + return 0; err_out_mem: kfree(powernow_table); err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - - /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ - data->acpi_data.state_count = 0; + acpi_processor_unregister_performance(data->acpi_data, data->cpu); + + /* data->acpi_data->state_count informs us at ->exit() whether ACPI was used */ + data->acpi_data->state_count = 0; return -ENODEV; } @@ -838,13 +883,13 @@ static int fill_powernow_table_pstate(st { int i; - for (i = 0; i < data->acpi_data.state_count; i++) { + for (i = 0; i < data->acpi_data->state_count; i++) { u32 index; u32 hi = 0, lo = 0; u32 fid; u32 did; - index = data->acpi_data.states[i].control & HW_PSTATE_MASK; + index = data->acpi_data->states[i].control & HW_PSTATE_MASK; if (index > MAX_HW_PSTATE) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); @@ -865,10 +910,10 @@ static int fill_powernow_table_pstate(st powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; } @@ -880,16 +925,16 @@ static int fill_powernow_table_fidvid(st { int i; int cntlofreq = 0; - for (i = 0; i < data->acpi_data.state_count; i++) { + for (i = 0; i < data->acpi_data->state_count; i++) { u32 fid; u32 vid; if (data->exttype) { - fid = data->acpi_data.states[i].status & EXT_FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + fid = data->acpi_data->states[i].status & EXT_FID_MASK; + vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK; } else { - fid = data->acpi_data.states[i].control & FID_MASK; - vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + fid = data->acpi_data->states[i].control & FID_MASK; + vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK; } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); @@ -930,10 +975,10 @@ static int fill_powernow_table_fidvid(st cntlofreq = i; } - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; } @@ -943,14 +988,15 @@ static int fill_powernow_table_fidvid(st static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { - if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + if (data->acpi_data->state_count) + acpi_processor_unregister_performance(data->acpi_data, data->cpu); } #else static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int powernow_k8_cpu_preinit_acpi() { return -ENODEV; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ @@ -1164,7 +1210,7 @@ static int __cpuinit powernowk8_cpu_init * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); + printk(KERN_ERR PFX "Your BIOS does not provide _PSS objects. PowerNow! does not work on SMP systems without _PSS objects. Complain to your BIOS vendor.\n"); kfree(data); return -ENODEV; } @@ -1204,10 +1250,7 @@ static int __cpuinit powernowk8_cpu_init set_cpus_allowed(current, oldmask); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; - if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); - else - pol->cpus = cpu_core_map[pol->cpu]; + pol->cpus = data->starting_core_affinity; data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1323,6 +1366,7 @@ static int __cpuinit powernowk8_init(voi } if (supported_cpus == num_online_cpus()) { + powernow_k8_cpu_preinit_acpi(); printk(KERN_INFO PFX "Found %d %s " "processors (" VERSION ")\n", supported_cpus, boot_cpu_data.x86_model_id); diff -r 05c22f282023 arch/i386/kernel/cpu/cpufreq/powernow-k8.h --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h Tue Aug 14 16:20:55 2007 +0100 +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h Thu Aug 30 12:21:35 2007 -0500 @@ -32,12 +32,13 @@ struct powernow_k8_data { #ifdef CONFIG_X86_POWERNOW_K8_ACPI /* the acpi table needs to be kept. it's only available if ACPI was * used to determine valid frequency/vid/fid states */ - struct acpi_processor_performance acpi_data; + struct acpi_processor_performance *acpi_data; #endif /* we need to keep track of associated cores, but let cpufreq * handle hotplug events - so just point at cpufreq pol->cpus * structure */ cpumask_t *available_cores; + cpumask_t starting_core_affinity; }; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |