From 2f1d407adab026b34a105ed27b1d4d7e910c4448 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 24 Oct 2016 23:20:25 +0200 Subject: [PATCH] cpufreq: intel_pstate: Always set max P-state in performance mode The only times at which intel_pstate checks the policy set for a given CPU is the initialization of that CPU and updates of its policy settings from cpufreq when intel_pstate_set_policy() is invoked. That is insufficient, however, because intel_pstate uses the same P-state selection function for all CPUs regardless of the policy setting for each of them and the P-state limits are shared between them. Thus if the policy is set to "performance" for a particular CPU, it may not behave as expected if the cpufreq settings are changed subsequently for another CPU. That can be easily demonstrated by writing "performance" to scaling_governor for all CPUs and then switching it to "powersave" for one of them in which case all of the CPUs will behave as though their scaling_governor were all "powersave" (even though the policy still appears to be "performance" for the remaining CPUs). Fix this problem by modifying intel_pstate_adjust_busy_pstate() to always set the P-state to the maximum allowed by the current limits for all CPUs whose policy is set to "performance". Note that it still is recommended to always change the policy setting in the same way for all CPUs even with this fix applied to avoid confusion. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ac7c58d20b58..4737520ec823 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,6 +179,7 @@ struct _pid { /** * struct cpudata - Per CPU instance data storage * @cpu: CPU number for this instance data + * @policy: CPUFreq policy value * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set * @iowait_boost: iowait-related boost fraction @@ -201,6 +202,7 @@ struct _pid { struct cpudata { int cpu; + unsigned int policy; struct update_util_data update_util; bool update_util_set; @@ -1337,7 +1339,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) from = cpu->pstate.current_pstate; - target_pstate = pstate_funcs.get_target_pstate(cpu); + target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? + cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); intel_pstate_update_pstate(cpu, target_pstate); @@ -1504,6 +1507,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->cpuinfo.max_freq, policy->max); cpu = all_cpu_data[policy->cpu]; + cpu->policy = policy->policy; + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { @@ -1511,7 +1516,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->max = policy->cpuinfo.max_freq; } - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; if (policy->max >= policy->cpuinfo.max_freq) { pr_debug("set performance\n"); @@ -1547,7 +1552,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = round_up(limits->max_perf, FRAC_BITS); out: - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* * NOHZ_FULL CPUs need this as the governor callback may not * be invoked on them.