cpufreq: allocate CPU masks dynamically struct cpufreq_policy, including a cpumask_t member, gets copied in cpufreq_limit_change(), cpufreq_add_cpu(), set_cpufreq_gov(), and set_cpufreq_para(). Make the member a cpumask_var_t, thus reducing the amount of data needing copying (particularly with large NR_CPUS). Signed-off-by: Jan Beulich --- 2011-09-20.orig/xen/arch/x86/acpi/cpufreq/cpufreq.c 2011-10-12 08:35:12.000000000 +0200 +++ 2011-09-20/xen/arch/x86/acpi/cpufreq/cpufreq.c 2011-10-14 14:55:07.000000000 +0200 @@ -446,7 +446,7 @@ static int acpi_cpufreq_target(struct cp if (unlikely(result)) return -ENODEV; - cpumask_and(&online_policy_cpus, &cpu_online_map, &policy->cpus); + cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus); next_perf_state = data->freq_table[next_state].index; if (perf->state == next_perf_state) { --- 2011-09-20.orig/xen/arch/x86/acpi/cpufreq/powernow.c 2011-10-14 09:48:25.000000000 +0200 +++ 2011-09-20/xen/arch/x86/acpi/cpufreq/powernow.c 2011-10-14 14:55:41.000000000 +0200 @@ -107,7 +107,7 @@ static int powernow_cpufreq_target(struc if (unlikely(result)) return -ENODEV; - cpumask_and(&online_policy_cpus, &policy->cpus, &cpu_online_map); + cpumask_and(&online_policy_cpus, policy->cpus, &cpu_online_map); next_perf_state = data->freq_table[next_state].index; if (perf->state == next_perf_state) { @@ -206,7 +206,7 @@ static int powernow_cpufreq_cpu_init(str result = -ENODEV; goto err_unreg; } - cpumask_copy(&policy->cpus, cpumask_of(cpu)); + cpumask_copy(policy->cpus, cpumask_of(cpu)); /* capability check */ if (perf->state_count <= 1) { --- 2011-09-20.orig/xen/drivers/acpi/pmstat.c 2011-09-21 16:37:46.000000000 +0200 +++ 2011-09-20/xen/drivers/acpi/pmstat.c 2011-10-14 14:53:51.000000000 +0200 @@ -211,11 +211,11 @@ static int get_cpufreq_para(struct xen_s list_for_each(pos, &cpufreq_governor_list) gov_num++; - if ( (op->u.get_para.cpu_num != cpus_weight(policy->cpus)) || + if ( (op->u.get_para.cpu_num != cpumask_weight(policy->cpus)) || (op->u.get_para.freq_num != pmpt->perf.state_count) || (op->u.get_para.gov_num != gov_num) ) { - op->u.get_para.cpu_num = cpus_weight(policy->cpus); + op->u.get_para.cpu_num = cpumask_weight(policy->cpus); op->u.get_para.freq_num = pmpt->perf.state_count; op->u.get_para.gov_num = gov_num; return -EAGAIN; @@ -223,7 +223,7 @@ static int get_cpufreq_para(struct xen_s if ( !(affected_cpus = xzalloc_array(uint32_t, op->u.get_para.cpu_num)) ) return -ENOMEM; - for_each_cpu_mask(cpu, policy->cpus) + for_each_cpu_mask(cpu, *policy->cpus) affected_cpus[j++] = cpu; ret = copy_to_guest(op->u.get_para.affected_cpus, affected_cpus, op->u.get_para.cpu_num); --- 2011-09-20.orig/xen/drivers/cpufreq/cpufreq.c 2011-10-13 17:34:59.000000000 +0200 +++ 2011-09-20/xen/drivers/cpufreq/cpufreq.c 2011-10-14 14:58:01.000000000 +0200 @@ -53,7 +53,7 @@ static void cpufreq_cmdline_common_para( struct cpufreq_dom { unsigned int dom; - cpumask_t map; + cpumask_var_t map; struct list_head node; }; static LIST_HEAD_READ_MOSTLY(cpufreq_dom_list_head); @@ -152,11 +152,16 @@ int cpufreq_add_cpu(unsigned int cpu) if (!cpufreq_dom) return -ENOMEM; + if (!zalloc_cpumask_var(&cpufreq_dom->map)) { + xfree(cpufreq_dom); + return -ENOMEM; + } + cpufreq_dom->dom = dom; list_add(&cpufreq_dom->node, &cpufreq_dom_list_head); } else { /* domain sanity check under whatever coordination type */ - firstcpu = first_cpu(cpufreq_dom->map); + firstcpu = cpumask_first(cpufreq_dom->map); if ((perf->domain_info.coord_type != processor_pminfo[firstcpu]->perf.domain_info.coord_type) || (perf->domain_info.num_processors != @@ -181,11 +186,18 @@ int cpufreq_add_cpu(unsigned int cpu) goto err0; } + if (!zalloc_cpumask_var(&policy->cpus)) { + xfree(policy); + ret = -ENOMEM; + goto err0; + } + policy->cpu = cpu; per_cpu(cpufreq_cpu_policy, cpu) = policy; ret = cpufreq_driver->init(policy); if (ret) { + free_cpumask_var(policy->cpus); xfree(policy); per_cpu(cpufreq_cpu_policy, cpu) = NULL; goto err0; @@ -193,7 +205,7 @@ int cpufreq_add_cpu(unsigned int cpu) if (cpufreq_verbose) printk("CPU %u initialization completed\n", cpu); } else { - firstcpu = first_cpu(cpufreq_dom->map); + firstcpu = cpumask_first(cpufreq_dom->map); policy = per_cpu(cpufreq_cpu_policy, firstcpu); per_cpu(cpufreq_cpu_policy, cpu) = policy; @@ -201,15 +213,15 @@ int cpufreq_add_cpu(unsigned int cpu) printk("adding CPU %u\n", cpu); } - cpu_set(cpu, policy->cpus); - cpu_set(cpu, cpufreq_dom->map); + cpumask_set_cpu(cpu, policy->cpus); + cpumask_set_cpu(cpu, cpufreq_dom->map); ret = cpufreq_statistic_init(cpu); if (ret) goto err1; - if (hw_all || - (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) { + if (hw_all || (cpumask_weight(cpufreq_dom->map) == + perf->domain_info.num_processors)) { memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); policy->governor = NULL; @@ -238,16 +250,18 @@ err2: cpufreq_statistic_exit(cpu); err1: per_cpu(cpufreq_cpu_policy, cpu) = NULL; - cpu_clear(cpu, policy->cpus); - cpu_clear(cpu, cpufreq_dom->map); + cpumask_clear_cpu(cpu, policy->cpus); + cpumask_clear_cpu(cpu, cpufreq_dom->map); - if (cpus_empty(policy->cpus)) { + if (cpumask_empty(policy->cpus)) { cpufreq_driver->exit(policy); + free_cpumask_var(policy->cpus); xfree(policy); } err0: - if (cpus_empty(cpufreq_dom->map)) { + if (cpumask_empty(cpufreq_dom->map)) { list_del(&cpufreq_dom->node); + free_cpumask_var(cpufreq_dom->map); xfree(cpufreq_dom); } @@ -291,24 +305,26 @@ int cpufreq_del_cpu(unsigned int cpu) /* for HW_ALL, stop gov for each core of the _PSD domain */ /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */ - if (hw_all || - (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) + if (hw_all || (cpumask_weight(cpufreq_dom->map) == + perf->domain_info.num_processors)) __cpufreq_governor(policy, CPUFREQ_GOV_STOP); cpufreq_statistic_exit(cpu); per_cpu(cpufreq_cpu_policy, cpu) = NULL; - cpu_clear(cpu, policy->cpus); - cpu_clear(cpu, cpufreq_dom->map); + cpumask_clear_cpu(cpu, policy->cpus); + cpumask_clear_cpu(cpu, cpufreq_dom->map); - if (cpus_empty(policy->cpus)) { + if (cpumask_empty(policy->cpus)) { cpufreq_driver->exit(policy); + free_cpumask_var(policy->cpus); xfree(policy); } /* for the last cpu of the domain, clean room */ /* It's safe here to free freq_table, drv_data and policy */ - if (cpus_empty(cpufreq_dom->map)) { + if (cpumask_empty(cpufreq_dom->map)) { list_del(&cpufreq_dom->node); + free_cpumask_var(cpufreq_dom->map); xfree(cpufreq_dom); } --- 2011-09-20.orig/xen/drivers/cpufreq/cpufreq_ondemand.c 2011-05-11 09:58:42.000000000 +0200 +++ 2011-09-20/xen/drivers/cpufreq/cpufreq_ondemand.c 2011-10-14 14:58:41.000000000 +0200 @@ -122,7 +122,7 @@ static void dbs_check_cpu(struct cpu_dbs return; /* Get Idle Time */ - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask(j, *policy->cpus) { uint64_t idle_ns, total_idle_ns; uint64_t load, load_freq, freq_avg; struct cpu_dbs_info_s *j_dbs_info; @@ -233,7 +233,7 @@ int cpufreq_governor_dbs(struct cpufreq_ dbs_enable++; - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask(j, *policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; --- 2011-09-20.orig/xen/include/acpi/cpufreq/cpufreq.h 2011-06-20 08:41:50.000000000 +0200 +++ 2011-09-20/xen/include/acpi/cpufreq/cpufreq.h 2011-10-14 14:45:28.000000000 +0200 @@ -42,7 +42,7 @@ struct cpufreq_cpuinfo { }; struct cpufreq_policy { - cpumask_t cpus; /* affected CPUs */ + cpumask_var_t cpus; /* affected CPUs */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */