From 7de32556dfc62b9e1203730cc26b71292da8a244 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Mar 2017 00:57:39 +0100 Subject: [PATCH 1/5] cpufreq: intel_pstate: One set of global limits in active mode In the active mode intel_pstate currently uses two sets of global limits, each associated with one of the possible scaling_governor settings in that mode: "powersave" or "performance". The driver switches over from one of those sets to the other depending on the scaling_governor setting for the last CPU whose per-policy cpufreq interface in sysfs was last used to change parameters exposed in there. That obviously leads to no end of issues when the scaling_governor settings differ between CPUs. The most recent issue was introduced by commit a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy) that eliminated the reinitialization of "performance" limits in intel_pstate_set_policy() preventing the max limit from being set to anything below 100, among other things. Namely, an undesirable side effect of commit a240c4aa5d0f is that now, after setting scaling_governor to "performance" in the active mode, the per-policy limits for the CPU in question go to the highest level and stay there even when it is switched back to "powersave" later. As it turns out, some distributions set scaling_governor to "performance" temporarily for all CPUs to speed-up system initialization, so that change causes them to misbehave later. To fix that, get rid of the performance/powersave global limits split and use just one set of global limits for everything. From the user's persepctive, after this modification, when scaling_governor is switched from "performance" to "powersave" or the other way around on one CPU, the limits settings (ie. the global max/min_perf_pct and per-policy scaling_max/min_freq for any CPUs) will not change. Still, switching from "performance" to "powersave" or the other way around changes the way in which P-states are selected and in particular "performance" causes the driver to always request the highest P-state it is allowed to ask for for the given CPU. Fixes: a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 142 +++++++++++---------------------- 1 file changed, 46 insertions(+), 96 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 08e134ffba68..7b07803e7042 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -364,9 +364,7 @@ static bool driver_registered __read_mostly; static bool acpi_ppc; #endif -static struct perf_limits performance_limits; -static struct perf_limits powersave_limits; -static struct perf_limits *limits; +static struct perf_limits global; static void intel_pstate_init_limits(struct perf_limits *limits) { @@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits) limits->max_sysfs_pct = 100; } -static void intel_pstate_set_performance_limits(struct perf_limits *limits) -{ - intel_pstate_init_limits(limits); - limits->min_perf_pct = 100; - limits->min_perf = int_ext_tofp(1); - limits->min_sysfs_pct = 100; -} - static DEFINE_MUTEX(intel_pstate_driver_lock); static DEFINE_MUTEX(intel_pstate_limits_lock); @@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) * correct max turbo frequency based on the turbo state. * Also need to convert to MHz as _PSS freq is in MHz. */ - if (!limits->turbo_disabled) + if (!global.turbo_disabled) cpu->acpi_perf_data.states[0].core_frequency = policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; @@ -626,7 +616,7 @@ static inline void update_turbo_state(void) cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - limits->turbo_disabled = + global.turbo_disabled = (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } @@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { static void intel_pstate_hwp_set(struct cpufreq_policy *policy) { int min, hw_min, max, hw_max, cpu; - struct perf_limits *perf_limits = limits; + struct perf_limits *perf_limits = &global; u64 value, cap; for_each_cpu(cpu, policy->cpus) { @@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); hw_min = HWP_LOWEST_PERF(cap); - if (limits->no_turbo) + if (global.no_turbo) hw_max = HWP_GUARANTEED_PERF(cap); else hw_max = HWP_HIGHEST_PERF(cap); - min = fp_ext_toint(hw_max * perf_limits->min_perf); + max = fp_ext_toint(hw_max * perf_limits->max_perf); + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) + min = max; + else + min = fp_ext_toint(hw_max * perf_limits->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - max = fp_ext_toint(hw_max * perf_limits->max_perf); value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); @@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy) } static void intel_pstate_update_policies(void) - __releases(&intel_pstate_limits_lock) - __acquires(&intel_pstate_limits_lock) { - struct perf_limits *saved_limits = limits; int cpu; - mutex_unlock(&intel_pstate_limits_lock); - for_each_possible_cpu(cpu) cpufreq_update_policy(cpu); - - mutex_lock(&intel_pstate_limits_lock); - - limits = saved_limits; } /************************** debugfs begin ************************/ @@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void) static ssize_t show_##file_name \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - return sprintf(buf, "%u\n", limits->object); \ + return sprintf(buf, "%u\n", global.object); \ } static ssize_t intel_pstate_show_status(char *buf); @@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj, } update_turbo_state(); - if (limits->turbo_disabled) - ret = sprintf(buf, "%u\n", limits->turbo_disabled); + if (global.turbo_disabled) + ret = sprintf(buf, "%u\n", global.turbo_disabled); else - ret = sprintf(buf, "%u\n", limits->no_turbo); + ret = sprintf(buf, "%u\n", global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); update_turbo_state(); - if (limits->turbo_disabled) { + if (global.turbo_disabled) { pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); mutex_unlock(&intel_pstate_driver_lock); return -EPERM; } - limits->no_turbo = clamp_t(int, input, 0, 1); - - intel_pstate_update_policies(); + global.no_turbo = clamp_t(int, input, 0, 1); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); - limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); - limits->max_perf_pct = min(limits->max_policy_pct, - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); - limits->max_perf_pct = max(limits->min_perf_pct, - limits->max_perf_pct); - limits->max_perf = percent_ext_fp(limits->max_perf_pct); - - intel_pstate_update_policies(); + global.max_sysfs_pct = clamp_t(int, input, 0 , 100); + global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct); + global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct); + global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct); + global.max_perf = percent_ext_fp(global.max_perf_pct); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); - limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); - limits->min_perf_pct = max(limits->min_policy_pct, - limits->min_sysfs_pct); - limits->min_perf_pct = min(limits->max_policy_pct, - limits->min_perf_pct); - limits->min_perf_pct = min(limits->max_perf_pct, - limits->min_perf_pct); - limits->min_perf = percent_ext_fp(limits->min_perf_pct); - - intel_pstate_update_policies(); + global.min_sysfs_pct = clamp_t(int, input, 0 , 100); + global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); + global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct); + global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct); + global.min_perf = percent_ext_fp(global.min_perf_pct); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (limits->no_turbo && !limits->turbo_disabled) + if (global.no_turbo && !global.turbo_disabled) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (limits->no_turbo && !limits->turbo_disabled) + if (global.no_turbo && !global.turbo_disabled) val |= (u64)1 << 32; return val; @@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) int max_perf = cpu->pstate.turbo_pstate; int max_perf_adj; int min_perf; - struct perf_limits *perf_limits = limits; + struct perf_limits *perf_limits = &global; - if (limits->no_turbo || limits->turbo_disabled) + if (global.no_turbo || global.turbo_disabled) max_perf = cpu->pstate.max_pstate; if (per_cpu_limits) @@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) sample->busy_scaled = busy_frac * 100; - target = limits->no_turbo || limits->turbo_disabled ? + target = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; target += target >> 2; target = mul_fp(target, busy_frac); @@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, static int intel_pstate_set_policy(struct cpufreq_policy *policy) { struct cpudata *cpu; - struct perf_limits *perf_limits = NULL; + struct perf_limits *perf_limits = &global; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("set performance\n"); - if (!perf_limits) { - limits = &performance_limits; - perf_limits = limits; - } - } else { - pr_debug("set powersave\n"); - if (!perf_limits) { - limits = &powersave_limits; - perf_limits = limits; - } - - } - intel_pstate_update_perf_limits(policy, perf_limits); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) static int intel_pstate_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - struct perf_limits *perf_limits; - - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) - perf_limits = &performance_limits; - else - perf_limits = &powersave_limits; update_turbo_state(); - policy->cpuinfo.max_freq = perf_limits->turbo_disabled || - perf_limits->no_turbo ? + policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; @@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) unsigned int max_freq, min_freq; max_freq = policy->cpuinfo.max_freq * - perf_limits->max_sysfs_pct / 100; + global.max_sysfs_pct / 100; min_freq = policy->cpuinfo.max_freq * - perf_limits->min_sysfs_pct / 100; + global.min_sysfs_pct / 100; cpufreq_verify_within_limits(policy, min_freq, max_freq); } @@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); - policy->cpuinfo.max_freq = limits->turbo_disabled ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; @@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) return ret; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) + if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE)) policy->policy = CPUFREQ_POLICY_PERFORMANCE; else policy->policy = CPUFREQ_POLICY_POWERSAVE; @@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = limits->turbo_disabled ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; cpufreq_verify_within_cpu_limits(policy); @@ -2317,7 +2273,7 @@ static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, update_turbo_state(); - max_freq = limits->no_turbo || limits->turbo_disabled ? + max_freq = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; policy->cpuinfo.max_freq = max_freq; if (policy->max > max_freq) @@ -2425,13 +2381,7 @@ static int intel_pstate_register_driver(void) { int ret; - intel_pstate_init_limits(&powersave_limits); - intel_pstate_set_performance_limits(&performance_limits); - if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) && - intel_pstate_driver == &intel_pstate) - limits = &performance_limits; - else - limits = &powersave_limits; + intel_pstate_init_limits(&global); ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { From 4296f23ed49a15d36949458adcc66ff993dee2a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 19 Mar 2017 14:30:02 +0100 Subject: [PATCH 2/5] cpufreq: schedutil: Fix per-CPU structure initialization in sugov_start() sugov_start() only initializes struct sugov_cpu per-CPU structures for shared policies, but it should do that for single-CPU policies too. That in particular makes the IO-wait boost mechanism work in the cases when cpufreq policies correspond to individual CPUs. Fixes: 21ca6d2c52f8 (cpufreq: schedutil: Add iowait boosting) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 4.9+ # 4.9+ --- kernel/sched/cpufreq_schedutil.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index cd7cd489f739..54c577578da6 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -584,20 +584,14 @@ static int sugov_start(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_RT; - sg_cpu->last_update = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } + sg_cpu->flags = SCHED_CPUFREQ_RT; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + policy_is_shared(policy) ? + sugov_update_shared : + sugov_update_single); } return 0; } From 64897b20eed29cee2b998fb5ba362e65523dba3c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 21 Mar 2017 22:19:07 +0100 Subject: [PATCH 3/5] cpufreq: intel_pstate: Fix policy data management in passive mode The policy->cpuinfo.max_freq and policy->max updates in intel_cpufreq_turbo_update() are excessive as they are done for no good reason and may lead to problems in principle, so they should be dropped. However, after dropping them intel_cpufreq_turbo_update() becomes almost entirely pointless, because the check made by it is made again down the road in intel_pstate_prepare_request(). The only thing in it that still needs to be done is the call to update_turbo_state(), so drop intel_cpufreq_turbo_update() altogether and make its callers invoke update_turbo_state() directly instead of it. In addition to that, fix intel_cpufreq_verify_policy() so that it checks global.no_turbo in addition to global.turbo_disabled when updating policy->cpuinfo.max_freq to make it consistent with intel_pstate_verify_policy(). Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7b07803e7042..283491f742d3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2257,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; cpufreq_verify_within_cpu_limits(policy); @@ -2265,26 +2265,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) return 0; } -static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, - struct cpufreq_policy *policy, - unsigned int target_freq) -{ - unsigned int max_freq; - - update_turbo_state(); - - max_freq = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_freq : cpu->pstate.turbo_freq; - policy->cpuinfo.max_freq = max_freq; - if (policy->max > max_freq) - policy->max = max_freq; - - if (target_freq > max_freq) - target_freq = max_freq; - - return target_freq; -} - static int intel_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -2293,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; + update_turbo_state(); + freqs.old = policy->cur; - freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq); + freqs.new = target_freq; cpufreq_freq_transition_begin(policy, &freqs); switch (relation) { @@ -2326,7 +2308,8 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); + update_turbo_state(); + target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); intel_pstate_update_pstate(cpu, target_pstate); From ad0a45fd9c14feebd000b6e84189d0edff265170 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Sun, 19 Mar 2017 00:51:59 +0530 Subject: [PATCH 4/5] cpuidle: Validate cpu_dev in cpuidle_add_sysfs() If a given cpu is not in cpu_present and cpu hotplug is disabled, arch can skip setting up the cpu_dev. Arch cpuidle driver should pass correct cpu mask for registration, but failing to do so by the driver causes error to propagate and crash like this: [ 30.076045] Unable to handle kernel paging request for data at address 0x00000048 [ 30.076100] Faulting instruction address: 0xc0000000007b2f30 cpu 0x4d: Vector: 300 (Data Access) at [c000003feb18b670] pc: c0000000007b2f30: kobject_get+0x20/0x70 lr: c0000000007b3c94: kobject_add_internal+0x54/0x3f0 sp: c000003feb18b8f0 msr: 9000000000009033 dar: 48 dsisr: 40000000 current = 0xc000003fd2ed8300 paca = 0xc00000000fbab500 softe: 0 irq_happened: 0x01 pid = 1, comm = swapper/0 Linux version 4.11.0-rc2-svaidy+ (sv@sagarika) (gcc version 6.2.0 20161005 (Ubuntu 6.2.0-5ubuntu12) ) #10 SMP Sun Mar 19 00:08:09 IST 2017 enter ? for help [c000003feb18b960] c0000000007b3c94 kobject_add_internal+0x54/0x3f0 [c000003feb18b9f0] c0000000007b43a4 kobject_init_and_add+0x64/0xa0 [c000003feb18ba70] c000000000e284f4 cpuidle_add_sysfs+0xb4/0x130 [c000003feb18baf0] c000000000e26038 cpuidle_register_device+0x118/0x1c0 [c000003feb18bb30] c000000000e26c48 cpuidle_register+0x78/0x120 [c000003feb18bbc0] c00000000168fd9c powernv_processor_idle_init+0x110/0x1c4 [c000003feb18bc40] c00000000000cff8 do_one_initcall+0x68/0x1d0 [c000003feb18bd00] c0000000016242f4 kernel_init_freeable+0x280/0x360 [c000003feb18bdc0] c00000000000d864 kernel_init+0x24/0x160 [c000003feb18be30] c00000000000b4e8 ret_from_kernel_thread+0x5c/0x74 Validating cpu_dev fixes the crash and reports correct error message like: [ 30.163506] Failed to register cpuidle device for cpu136 [ 30.173329] Registration of powernv driver failed. Signed-off-by: Vaidyanathan Srinivasan [ rjw: Comment massage ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index c5adc8c9ac43..ae948b1da93a 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -615,6 +615,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; + /* + * Return if cpu_device is not setup for this CPU. + * + * This could happen if the arch did not set up cpu_device + * since this CPU is not in cpu_present mask and the + * driver did not send a correct CPU mask during registration. + * Without this check we would end up passing bogus + * value for &cpu_dev->kobj in kobject_init_and_add() + */ + if (!cpu_dev) + return -ENODEV; + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); if (!kdev) return -ENOMEM; From ff010472fb75670cb5c08671e820eeea3af59c87 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 21 Mar 2017 11:36:06 +0530 Subject: [PATCH 5/5] cpufreq: Restore policy min/max limits on CPU online On CPU online the cpufreq core restores the previous governor (or the previous "policy" setting for ->setpolicy drivers), but it does not restore the min/max limits at the same time, which is confusing, inconsistent and real pain for users who set the limits and then suspend/resume the system (using full suspend), in which case the limits are reset on all CPUs except for the boot one. Fix this by making cpufreq_online() restore the limits when an inactive policy is brought online. The commit log and patch are inspired from Rafael's earlier work. Reported-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Cc: 4.3+ # 4.3+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b8ff617d449d..5dbdd261aa73 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1184,6 +1184,9 @@ static int cpufreq_online(unsigned int cpu) for_each_cpu(j, policy->related_cpus) per_cpu(cpufreq_cpu_data, j) = policy; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + } else { + policy->min = policy->user_policy.min; + policy->max = policy->user_policy.max; } if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {