diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb95fad81c79..8deb5a89328a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -703,6 +703,11 @@ cpufreq.off=1 [CPU_FREQ] disable the cpufreq sub-system + cpufreq.default_governor= + [CPU_FREQ] Name of the default cpufreq governor or + policy to use. This governor must be registered in the + kernel before the cpufreq driver probes. + cpu_init_udelay=N [X86] Delay for N microsec between assert and de-assert of APIC INIT to start processors. This delay occurs diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 0c74a7784964..368e612145d2 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -147,9 +147,9 @@ CPUs in it. The next major initialization step for a new policy object is to attach a scaling governor to it (to begin with, that is the default scaling governor -determined by the kernel configuration, but it may be changed later -via ``sysfs``). First, a pointer to the new policy object is passed to the -governor's ``->init()`` callback which is expected to initialize all of the +determined by the kernel command line or configuration, but it may be changed +later via ``sysfs``). First, a pointer to the new policy object is passed to +the governor's ``->init()`` callback which is expected to initialize all of the data structures necessary to handle the given policy and, possibly, to add a governor ``sysfs`` interface to it. Next, the governor is started by invoking its ``->start()`` callback. diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 39d80bc29ccd..40d481cca368 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -431,6 +431,17 @@ argument is passed to the kernel in the command line. supported in the current configuration, writes to this attribute will fail with an appropriate error. +``energy_efficiency`` + This attribute is only present on platforms, which have CPUs matching + Kaby Lake or Coffee Lake desktop CPU model. By default + energy efficiency optimizations are disabled on these CPU models in HWP + mode by this driver. Enabling energy efficiency may limit maximum + operating frequency in both HWP and non HWP mode. In non HWP mode, + optimizations are done only in the turbo frequency range. In HWP mode, + optimizations are done in the entire frequency range. Setting this + attribute to "1" enables energy efficiency optimizations and setting + to "0" disables energy efficiency optimizations. + Interpretation of Policy Attributes ----------------------------------- @@ -554,7 +565,11 @@ somewhere between the two extremes: Strings written to the ``energy_performance_preference`` attribute are internally translated to integer values written to the processor's Energy-Performance Preference (EPP) knob (if supported) or its -Energy-Performance Bias (EPB) knob. +Energy-Performance Bias (EPB) knob. It is also possible to write a positive +integer value between 0 to 255, if the EPP feature is present. If the EPP +feature is not present, writing integer value to this attribute is not +supported. In this case, user can use + "/sys/devices/system/cpu/cpu*/power/energy_perf_bias" interface. [Note that tasks may by migrated from one CPU to another by the scheduler's load-balancing algorithm and if different energy vs performance hints are diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 55b31eadb3c8..ca7849e113d7 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -126,30 +126,8 @@ static struct cpufreq_governor spu_governor = { .stop = spu_gov_stop, .owner = THIS_MODULE, }; - -/* - * module init and destoy - */ - -static int __init spu_gov_init(void) -{ - int ret; - - ret = cpufreq_register_governor(&spu_governor); - if (ret) - printk(KERN_ERR "registration of governor failed\n"); - return ret; -} - -static void __exit spu_gov_exit(void) -{ - cpufreq_unregister_governor(&spu_governor); -} - - -module_init(spu_gov_init); -module_exit(spu_gov_exit); +cpufreq_governor_init(spu_governor); +cpufreq_governor_exit(spu_governor); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Krafft "); - diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e8370e64a155..21b409195b46 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -149,6 +149,10 @@ #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 + +#define MSR_IA32_POWER_CTL 0x000001fc +#define MSR_IA32_POWER_CTL_BIT_EE 19 + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -253,8 +257,6 @@ #define MSR_PEBS_FRONTEND 0x000003f7 -#define MSR_IA32_POWER_CTL 0x000001fc - #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 429e5a36c08a..e4ff681faaaa 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -244,7 +244,7 @@ static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) { - u32 val, dummy; + u32 val, dummy __always_unused; rdmsr(MSR_IA32_PERF_CTL, val, dummy); return val; @@ -261,7 +261,7 @@ static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) { - u32 val, dummy; + u32 val, dummy __always_unused; rdmsr(MSR_AMD_PERF_CTL, val, dummy); return val; @@ -612,7 +612,7 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { /* Intel Xeon Processor 7100 Series Specification Update - * http://www.intel.com/Assets/PDF/specupdate/314554.pdf + * https://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause * Both Processor Cores to Lock Up. */ @@ -993,14 +993,14 @@ MODULE_PARM_DESC(acpi_pstate_strict, late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); -static const struct x86_cpu_id acpi_cpufreq_ids[] = { +static const struct x86_cpu_id __maybe_unused acpi_cpufreq_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_ACPI, NULL), X86_MATCH_FEATURE(X86_FEATURE_HW_PSTATE, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids); -static const struct acpi_device_id processor_device_ids[] = { +static const struct acpi_device_id __maybe_unused processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, }, {ACPI_PROCESSOR_DEVICE_HID, }, {}, diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f7c4206d4c90..d0b10baf039a 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -144,7 +144,7 @@ static void __exit amd_freq_sensitivity_exit(void) } module_exit(amd_freq_sensitivity_exit); -static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { +static const struct x86_cpu_id __maybe_unused amd_freq_sensitivity_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_PROC_FEEDBACK, NULL), {} }; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0128de3603df..17c1c3becd92 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -50,7 +50,9 @@ static LIST_HEAD(cpufreq_governor_list); #define for_each_governor(__governor) \ list_for_each_entry(__governor, &cpufreq_governor_list, governor_list) -/** +static char default_governor[CPUFREQ_NAME_LEN]; + +/* * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock * also protects the cpufreq_cpu_data array. @@ -78,7 +80,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); -/** +/* * Two notifier lists: the "policy" list is involved in the * validation process for a new CPU frequency policy; the * "transition" list for kernel code that needs to handle @@ -298,7 +300,7 @@ struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ -/** +/* * adjust_jiffies - adjust the system "loops_per_jiffy" * * This function alters the system "loops_per_jiffy" for the clock @@ -524,6 +526,7 @@ EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); /** * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported * one. + * @policy: associated policy to interrogate * @target_freq: target frequency to resolve. * * The target to driver frequency mapping is cached in the policy. @@ -621,6 +624,24 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } +static struct cpufreq_governor *get_governor(const char *str_governor) +{ + struct cpufreq_governor *t; + + mutex_lock(&cpufreq_governor_mutex); + t = find_governor(str_governor); + if (!t) + goto unlock; + + if (!try_module_get(t->owner)) + t = NULL; + +unlock: + mutex_unlock(&cpufreq_governor_mutex); + + return t; +} + static unsigned int cpufreq_parse_policy(char *str_governor) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) @@ -640,31 +661,17 @@ static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) { struct cpufreq_governor *t; - mutex_lock(&cpufreq_governor_mutex); + t = get_governor(str_governor); + if (t) + return t; - t = find_governor(str_governor); - if (!t) { - int ret; + if (request_module("cpufreq_%s", str_governor)) + return NULL; - mutex_unlock(&cpufreq_governor_mutex); - - ret = request_module("cpufreq_%s", str_governor); - if (ret) - return NULL; - - mutex_lock(&cpufreq_governor_mutex); - - t = find_governor(str_governor); - } - if (t && !try_module_get(t->owner)) - t = NULL; - - mutex_unlock(&cpufreq_governor_mutex); - - return t; + return get_governor(str_governor); } -/** +/* * cpufreq_per_cpu_attr_read() / show_##file_name() - * print out cpufreq information * @@ -706,7 +713,7 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) return ret; } -/** +/* * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access */ #define store_one(file_name, object) \ @@ -727,7 +734,7 @@ static ssize_t store_##file_name \ store_one(scaling_min_freq, min); store_one(scaling_max_freq, max); -/** +/* * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware */ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, @@ -741,7 +748,7 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, return sprintf(buf, "\n"); } -/** +/* * show_scaling_governor - show the current policy for the specified CPU */ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) @@ -756,7 +763,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) return -EINVAL; } -/** +/* * store_scaling_governor - store policy for the specified CPU */ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, @@ -793,7 +800,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return ret ? ret : count; } -/** +/* * show_scaling_driver - show the cpufreq driver currently loaded */ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) @@ -801,7 +808,7 @@ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name); } -/** +/* * show_scaling_available_governors - show the available CPUfreq governors */ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, @@ -815,12 +822,14 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, goto out; } + mutex_lock(&cpufreq_governor_mutex); for_each_governor(t) { if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) - goto out; + break; i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name); } + mutex_unlock(&cpufreq_governor_mutex); out: i += sprintf(&buf[i], "\n"); return i; @@ -843,7 +852,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf) } EXPORT_SYMBOL_GPL(cpufreq_show_cpus); -/** +/* * show_related_cpus - show the CPUs affected by each transition even if * hw coordination is in use */ @@ -852,7 +861,7 @@ static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) return cpufreq_show_cpus(policy->related_cpus, buf); } -/** +/* * show_affected_cpus - show the CPUs affected by each transition */ static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) @@ -886,7 +895,7 @@ static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) return policy->governor->show_setspeed(policy, buf); } -/** +/* * show_bios_limit - show the current cpufreq HW/BIOS limitation */ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) @@ -1048,36 +1057,36 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return 0; } -__weak struct cpufreq_governor *cpufreq_default_governor(void) -{ - return NULL; -} - static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *def_gov = cpufreq_default_governor(); struct cpufreq_governor *gov = NULL; unsigned int pol = CPUFREQ_POLICY_UNKNOWN; + int ret; if (has_target()) { /* Update policy governor to the one used before hotplug. */ - gov = find_governor(policy->last_governor); + gov = get_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, policy->cpu); - } else if (def_gov) { - gov = def_gov; + gov->name, policy->cpu); } else { - return -ENODATA; + gov = get_governor(default_governor); } + + if (!gov) { + gov = cpufreq_default_governor(); + __module_get(gov->owner); + } + } else { + /* Use the default policy if there is no last_policy. */ if (policy->last_policy) { pol = policy->last_policy; - } else if (def_gov) { - pol = cpufreq_parse_policy(def_gov->name); + } else { + pol = cpufreq_parse_policy(default_governor); /* - * In case the default governor is neiter "performance" + * In case the default governor is neither "performance" * nor "powersave", fall back to the initial policy * value set by the driver. */ @@ -1089,7 +1098,11 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) return -ENODATA; } - return cpufreq_set_policy(policy, gov, pol); + ret = cpufreq_set_policy(policy, gov, pol); + if (gov) + module_put(gov->owner); + + return ret; } static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) @@ -1604,7 +1617,7 @@ unlock: return 0; } -/** +/* * cpufreq_remove_dev - remove a CPU device * * Removes the cpufreq interface for a CPU device. @@ -2361,6 +2374,7 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); * cpufreq_get_policy - get the current cpufreq_policy * @policy: struct cpufreq_policy into which the current cpufreq_policy * is written + * @cpu: CPU to find the policy for * * Reads the current cpufreq policy. */ @@ -2747,7 +2761,7 @@ out: } EXPORT_SYMBOL_GPL(cpufreq_register_driver); -/** +/* * cpufreq_unregister_driver - unregister the current CPUFreq driver * * Unregister the current CPUFreq driver. Only call this if you have @@ -2783,13 +2797,19 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); static int __init cpufreq_core_init(void) { + struct cpufreq_governor *gov = cpufreq_default_governor(); + if (cpufreq_disabled()) return -ENODEV; cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); + if (!strlen(default_governor)) + strncpy(default_governor, gov->name, CPUFREQ_NAME_LEN); + return 0; } module_param(off, int, 0444); +module_param_string(default_governor, default_governor, CPUFREQ_NAME_LEN, 0444); core_initcall(cpufreq_core_init); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 737ff3b9c2c0..aa39ff31ec9f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -322,17 +322,7 @@ static struct dbs_governor cs_governor = { .start = cs_start, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_governor.gov) - -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); -} +#define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) MODULE_AUTHOR("Alexander Clouter "); MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " @@ -343,11 +333,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_CONSERVATIVE; + return &CPU_FREQ_GOV_CONSERVATIVE; } - -core_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_CONSERVATIVE); +cpufreq_governor_exit(CPU_FREQ_GOV_CONSERVATIVE); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index f99ae45efaea..63f7c219062b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -26,7 +26,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ -/** +/* * store_sampling_rate - update sampling rate effective immediately if needed. * * If new rate is smaller than the old, simply updating diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 82a4d37ddecb..ac361a8b1d3b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -408,7 +408,7 @@ static struct dbs_governor od_dbs_gov = { .start = od_start, }; -#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) +#define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) static void od_set_powersave_bias(unsigned int powersave_bias) { @@ -429,7 +429,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) continue; policy = cpufreq_cpu_get_raw(cpu); - if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) + if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) continue; policy_dbs = policy->governor_data; @@ -461,16 +461,6 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); -} - MODULE_AUTHOR("Venkatesh Pallipadi "); MODULE_AUTHOR("Alexey Starikovskiy "); MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " @@ -480,11 +470,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_ONDEMAND; + return &CPU_FREQ_GOV_ONDEMAND; } - -core_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); +cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index def9afe0f5b8..71c1d9aba772 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -23,16 +23,6 @@ static struct cpufreq_governor cpufreq_gov_performance = { .limits = cpufreq_gov_performance_limits, }; -static int __init cpufreq_gov_performance_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_performance); -} - -static void __exit cpufreq_gov_performance_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_performance); -} - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE struct cpufreq_governor *cpufreq_default_governor(void) { @@ -50,5 +40,5 @@ MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); -core_initcall(cpufreq_gov_performance_init); -module_exit(cpufreq_gov_performance_exit); +cpufreq_governor_init(cpufreq_gov_performance); +cpufreq_governor_exit(cpufreq_gov_performance); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 1ae66019eb83..7749522355b5 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -23,16 +23,6 @@ static struct cpufreq_governor cpufreq_gov_powersave = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_powersave_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_powersave); -} - -static void __exit cpufreq_gov_powersave_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_powersave); -} - MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); @@ -42,9 +32,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_powersave; } - -core_initcall(cpufreq_gov_powersave_init); -#else -module_init(cpufreq_gov_powersave_init); #endif -module_exit(cpufreq_gov_powersave_exit); + +cpufreq_governor_init(cpufreq_gov_powersave); +cpufreq_governor_exit(cpufreq_gov_powersave); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index b43e7cd502c5..50a4d7846580 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -126,16 +126,6 @@ static struct cpufreq_governor cpufreq_gov_userspace = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_userspace_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_userspace); -} - -static void __exit cpufreq_gov_userspace_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_userspace); -} - MODULE_AUTHOR("Dominik Brodowski , " "Russell King "); MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); @@ -146,9 +136,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_userspace; } - -core_initcall(cpufreq_gov_userspace_init); -#else -module_init(cpufreq_gov_userspace_init); #endif -module_exit(cpufreq_gov_userspace_exit); + +cpufreq_governor_init(cpufreq_gov_userspace); +cpufreq_governor_exit(cpufreq_gov_userspace); diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 297d23cad8b5..91f477a6cbc4 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -2,7 +2,7 @@ /* * CPU frequency scaling for DaVinci * - * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2009 Texas Instruments Incorporated - https://www.ti.com/ * * Based on linux/arch/arm/plat-omap/cpu-omap.c. Original Copyright follows: * diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index e117b0059123..f839dc9852c0 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -221,7 +221,7 @@ int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_index); -/** +/* * show_available_freqs - show available frequencies for the specified CPU */ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, @@ -260,7 +260,7 @@ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, struct freq_attr cpufreq_freq_attr_##_name##_freqs = \ __ATTR_RO(_name##_frequencies) -/** +/* * show_scaling_available_frequencies - show available normal frequencies for * the specified CPU */ @@ -272,7 +272,7 @@ static ssize_t scaling_available_frequencies_show(struct cpufreq_policy *policy, cpufreq_attr_available_freq(scaling_available); EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs); -/** +/* * show_available_boost_freqs - show available boost frequencies for * the specified CPU */ diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7e0f7880b21a..7f5d81931483 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -201,9 +201,7 @@ struct global_params { * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @last_sample_time: Last Sample time - * @aperf_mperf_shift: Number of clock cycles after aperf, merf is incremented - * This shift is a multiplier to mperf delta to - * calculate CPU busy. + * @aperf_mperf_shift: APERF vs MPERF counting frequency difference * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value @@ -275,6 +273,7 @@ static struct cpudata **all_cpu_data; * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor + * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms * @@ -602,11 +601,12 @@ static const unsigned int epp_values[] = { HWP_EPP_POWERSAVE }; -static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) +static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp) { s16 epp; int index = -EINVAL; + *raw_epp = 0; epp = intel_pstate_get_epp(cpu_data, 0); if (epp < 0) return epp; @@ -614,12 +614,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (epp == HWP_EPP_PERFORMANCE) return 1; - if (epp <= HWP_EPP_BALANCE_PERFORMANCE) + if (epp == HWP_EPP_BALANCE_PERFORMANCE) return 2; - if (epp <= HWP_EPP_BALANCE_POWERSAVE) + if (epp == HWP_EPP_BALANCE_POWERSAVE) return 3; - else + if (epp == HWP_EPP_POWERSAVE) return 4; + *raw_epp = epp; + return 0; } else if (boot_cpu_has(X86_FEATURE_EPB)) { /* * Range: @@ -638,7 +640,8 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) } static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, - int pref_index) + int pref_index, bool use_raw, + u32 raw_epp) { int epp = -EINVAL; int ret; @@ -646,29 +649,34 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, if (!pref_index) epp = cpu_data->epp_default; - mutex_lock(&intel_pstate_limits_lock); - if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { - u64 value; - - ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value); - if (ret) - goto return_pref; + /* + * Use the cached HWP Request MSR value, because the register + * itself may be updated by intel_pstate_hwp_boost_up() or + * intel_pstate_hwp_boost_down() at any time. + */ + u64 value = READ_ONCE(cpu_data->hwp_req_cached); value &= ~GENMASK_ULL(31, 24); - if (epp == -EINVAL) + if (use_raw) + epp = raw_epp; + else if (epp == -EINVAL) epp = epp_values[pref_index - 1]; value |= (u64)epp << 24; + /* + * The only other updater of hwp_req_cached in the active mode, + * intel_pstate_hwp_set(), is called under the same lock as this + * function, so it cannot run in parallel with the update below. + */ + WRITE_ONCE(cpu_data->hwp_req_cached, value); ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); } else { if (epp == -EINVAL) epp = (pref_index - 1) << 2; ret = intel_pstate_set_epb(cpu_data->cpu, epp); } -return_pref: - mutex_unlock(&intel_pstate_limits_lock); return ret; } @@ -694,31 +702,54 @@ static ssize_t store_energy_performance_preference( { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; char str_preference[21]; - int ret; + bool raw = false; + ssize_t ret; + u32 epp = 0; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) return -EINVAL; ret = match_string(energy_perf_strings, -1, str_preference); - if (ret < 0) - return ret; + if (ret < 0) { + if (!boot_cpu_has(X86_FEATURE_HWP_EPP)) + return ret; - intel_pstate_set_energy_pref_index(cpu_data, ret); - return count; + ret = kstrtouint(buf, 10, &epp); + if (ret) + return ret; + + if (epp > 255) + return -EINVAL; + + raw = true; + } + + mutex_lock(&intel_pstate_limits_lock); + + ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw, epp); + if (!ret) + ret = count; + + mutex_unlock(&intel_pstate_limits_lock); + + return ret; } static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; - int preference; + int preference, raw_epp; - preference = intel_pstate_get_energy_pref_index(cpu_data); + preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp); if (preference < 0) return preference; - return sprintf(buf, "%s\n", energy_perf_strings[preference]); + if (raw_epp) + return sprintf(buf, "%d\n", raw_epp); + else + return sprintf(buf, "%s\n", energy_perf_strings[preference]); } cpufreq_freq_attr_rw(energy_performance_preference); @@ -866,10 +897,39 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) return 0; } +#define POWER_CTL_EE_ENABLE 1 +#define POWER_CTL_EE_DISABLE 2 + +static int power_ctl_ee_state; + +static void set_power_ctl_ee_state(bool input) +{ + u64 power_ctl; + + mutex_lock(&intel_pstate_driver_lock); + rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + if (input) { + power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE); + power_ctl_ee_state = POWER_CTL_EE_ENABLE; + } else { + power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); + power_ctl_ee_state = POWER_CTL_EE_DISABLE; + } + wrmsrl(MSR_IA32_POWER_CTL, power_ctl); + mutex_unlock(&intel_pstate_driver_lock); +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata); static int intel_pstate_resume(struct cpufreq_policy *policy) { + + /* Only restore if the system default is changed */ + if (power_ctl_ee_state == POWER_CTL_EE_ENABLE) + set_power_ctl_ee_state(true); + else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE) + set_power_ctl_ee_state(false); + if (!hwp_active) return 0; @@ -1218,6 +1278,32 @@ static ssize_t store_hwp_dynamic_boost(struct kobject *a, return count; } +static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 power_ctl; + int enable; + + rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE)); + return sprintf(buf, "%d\n", !enable); +} + +static ssize_t store_energy_efficiency(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + bool input; + int ret; + + ret = kstrtobool(buf, &input); + if (ret) + return ret; + + set_power_ctl_ee_state(input); + + return count; +} + show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -1228,6 +1314,7 @@ define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); define_one_global_ro(num_pstates); define_one_global_rw(hwp_dynamic_boost); +define_one_global_rw(energy_efficiency); static struct attribute *intel_pstate_attributes[] = { &status.attr, @@ -1241,6 +1328,8 @@ static const struct attribute_group intel_pstate_attr_group = { .attrs = intel_pstate_attributes, }; +static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[]; + static void __init intel_pstate_sysfs_expose_params(void) { struct kobject *intel_pstate_kobject; @@ -1273,6 +1362,11 @@ static void __init intel_pstate_sysfs_expose_params(void) &hwp_dynamic_boost.attr); WARN_ON(rc); } + + if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) { + rc = sysfs_create_file(intel_pstate_kobject, &energy_efficiency.attr); + WARN_ON(rc); + } } /************************** sysfs end ************************/ @@ -1288,25 +1382,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); } -#define MSR_IA32_POWER_CTL_BIT_EE 19 - -/* Disable energy efficiency optimization */ -static void intel_pstate_disable_ee(int cpu) -{ - u64 power_ctl; - int ret; - - ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl); - if (ret) - return; - - if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) { - pr_info("Disabling energy efficiency optimization\n"); - power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); - wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl); - } -} - static int atom_get_min_pstate(void) { u64 value; @@ -1982,10 +2057,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) if (hwp_active) { const struct x86_cpu_id *id; - id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); - if (id) - intel_pstate_disable_ee(cpunum); - intel_pstate_hwp_enable(cpu); id = x86_match_cpu(intel_pstate_hwp_boost_ids); @@ -2754,7 +2825,12 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { copy_cpu_funcs(&core_funcs); - if (!no_hwp) { + /* + * Avoid enabling HWP for processors without EPP support, + * because that means incomplete HWP implementation which is a + * corner case and supporting it is generally problematic. + */ + if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; @@ -2808,8 +2884,17 @@ hwp_cpu_matched: if (rc) return rc; - if (hwp_active) + if (hwp_active) { + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); + if (id) { + set_power_ctl_ee_state(false); + pr_info("Disabling energy efficiency optimization\n"); + } + pr_info("HWP enabled\n"); + } return 0; } diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index c66f566a854c..815645170c4d 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -22,6 +22,8 @@ #include #include +#include + #define SDCASR_REG 0x0100 #define SDCASR_REG_STRIDE 0x1000 #define SDCPWR_CFGA0_REG 0x0100 diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 5789fe7a94bd..9f3fc7a073d0 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -616,7 +616,7 @@ static void __exit pcc_cpufreq_exit(void) free_percpu(pcc_cpu_info); } -static const struct acpi_device_id processor_device_ids[] = { +static const struct acpi_device_id __maybe_unused processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, }, {ACPI_PROCESSOR_DEVICE_HID, }, {}, diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 3984959eed1d..0acc9e241cd7 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -86,7 +86,7 @@ static u32 convert_fid_to_vco_fid(u32 fid) */ static int pending_bit_stuck(void) { - u32 lo, hi; + u32 lo, hi __always_unused; rdmsr(MSR_FIDVID_STATUS, lo, hi); return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; @@ -282,7 +282,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, { u32 rvosteps = data->rvo; u32 savefid = data->currfid; - u32 maxvid, lo, rvomult = 1; + u32 maxvid, lo __always_unused, rvomult = 1; pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 8646eb197cd9..a9af15e994cc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -64,13 +64,14 @@ * highest_lpstate_idx * @last_sampled_time: Time from boot in ms when global pstates were * last set - * @last_lpstate_idx, Last set value of local pstate and global - * last_gpstate_idx pstate in terms of cpufreq table index + * @last_lpstate_idx: Last set value of local pstate and global + * @last_gpstate_idx: pstate in terms of cpufreq table index * @timer: Is used for ramping down if cpu goes idle for * a long time with global pstate held high * @gpstate_lock: A spinlock to maintain synchronization between * routines called by the timer handler and * governer's target_index calls + * @policy: Associated CPUFreq policy */ struct global_pstate_info { int highest_lpstate_idx; @@ -85,7 +86,7 @@ struct global_pstate_info { static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); +static DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); /** * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap * indexed by a function of pstate id. @@ -170,7 +171,7 @@ static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift) /* Use following functions for conversions between pstate_id and index */ -/** +/* * idx_to_pstate : Returns the pstate id corresponding to the * frequency in the cpufreq frequency table * powernv_freqs indexed by @i. @@ -188,7 +189,7 @@ static inline u8 idx_to_pstate(unsigned int i) return powernv_freqs[i].driver_data; } -/** +/* * pstate_to_idx : Returns the index in the cpufreq frequencytable * powernv_freqs for the frequency whose corresponding * pstate id is @pstate. @@ -380,7 +381,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, powernv_freqs[powernv_pstate_info.nominal].frequency); } -struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = +static struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = __ATTR_RO(cpuinfo_nominal_freq); #define SCALING_BOOST_FREQS_ATTR_INDEX 2 @@ -660,13 +661,13 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) /** * gpstate_timer_handler * - * @data: pointer to cpufreq_policy on which timer was queued + * @t: Timer context used to fetch global pstate info struct * * This handler brings down the global pstate closer to the local pstate * according quadratic equation. Queues a new timer if it is still not equal * to local pstate */ -void gpstate_timer_handler(struct timer_list *t) +static void gpstate_timer_handler(struct timer_list *t) { struct global_pstate_info *gpstates = from_timer(gpstates, t, timer); struct cpufreq_policy *policy = gpstates->policy; @@ -899,7 +900,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; -void powernv_cpufreq_work_fn(struct work_struct *work) +static void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); struct cpufreq_policy *policy; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3494f6763597..e62b022cb07e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,6 +577,20 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +#define cpufreq_governor_init(__governor) \ +static int __init __governor##_init(void) \ +{ \ + return cpufreq_register_governor(&__governor); \ +} \ +core_initcall(__governor##_init) + +#define cpufreq_governor_exit(__governor) \ +static void __exit __governor##_exit(void) \ +{ \ + return cpufreq_unregister_governor(&__governor); \ +} \ +module_exit(__governor##_exit) + struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 7fbaee24c824..402a09af9f43 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -909,11 +909,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) } #endif -static int __init sugov_register(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} -core_initcall(sugov_register); +cpufreq_governor_init(schedutil_gov); #ifdef CONFIG_ENERGY_MODEL extern bool sched_energy_update;