Merge branch 'pm-x86'
* pm-x86: x86: tsc: Rework time_cpufreq_notifier() admin-guide: pm: intel_epb: Add SPDX license tag and copyright notice PM / arch: x86: MSR_IA32_ENERGY_PERF_BIAS sysfs interface PM / arch: x86: Rework the MSR_IA32_ENERGY_PERF_BIAS handling
This commit is contained in:
commit
4566e2dd4a
|
@ -518,3 +518,21 @@ Description: Control Symetric Multi Threading (SMT)
|
|||
|
||||
If control status is "forceoff" or "notsupported" writes
|
||||
are rejected.
|
||||
|
||||
What: /sys/devices/system/cpu/cpu#/power/energy_perf_bias
|
||||
Date: March 2019
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description: Intel Energy and Performance Bias Hint (EPB)
|
||||
|
||||
EPB for the given CPU in a sliding scale 0 - 15, where a value
|
||||
of 0 corresponds to a hint preference for highest performance
|
||||
and a value of 15 corresponds to the maximum energy savings.
|
||||
|
||||
In order to change the EPB value for the CPU, write either
|
||||
a number in the 0 - 15 sliding scale above, or one of the
|
||||
strings: "performance", "balance-performance", "normal",
|
||||
"balance-power", "power" (that represent values reflected by
|
||||
their meaning), to this attribute.
|
||||
|
||||
This attribute is present for all online CPUs supporting the
|
||||
Intel EPB feature.
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
======================================
|
||||
Intel Performance and Energy Bias Hint
|
||||
======================================
|
||||
|
||||
:Copyright: |copy| 2019 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
|
||||
.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
|
||||
:doc: overview
|
||||
|
||||
Intel Performance and Energy Bias Attribute in ``sysfs``
|
||||
========================================================
|
||||
|
||||
The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU
|
||||
can be checked or updated through a ``sysfs`` attribute (file) under
|
||||
:file:`/sys/devices/system/cpu/cpu<N>/power/`, where the CPU number ``<N>``
|
||||
is allocated at the system initialization time:
|
||||
|
||||
``energy_perf_bias``
|
||||
Shows the current EPB value for the CPU in a sliding scale 0 - 15, where
|
||||
a value of 0 corresponds to a hint preference for highest performance
|
||||
and a value of 15 corresponds to the maximum energy savings.
|
||||
|
||||
In order to update the EPB value for the CPU, this attribute can be
|
||||
written to, either with a number in the 0 - 15 sliding scale above, or
|
||||
with one of the strings: "performance", "balance-performance", "normal",
|
||||
"balance-power", "power" that represent values reflected by their
|
||||
meaning.
|
||||
|
||||
This attribute is present for all online CPUs supporting the EPB
|
||||
feature.
|
||||
|
||||
Note that while the EPB interface to the processor is defined at the logical CPU
|
||||
level, the physical register backing it may be shared by multiple CPUs (for
|
||||
example, SMT siblings or cores in one package). For this reason, updating the
|
||||
EPB value for one CPU may cause the EPB values for other CPUs to change.
|
|
@ -8,3 +8,4 @@ Working-State Power Management
|
|||
cpuidle
|
||||
cpufreq
|
||||
intel_pstate
|
||||
intel_epb
|
||||
|
|
|
@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
|
|||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
|
||||
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
|
||||
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
|
||||
|
|
|
@ -1864,23 +1864,6 @@ void cpu_init(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
static void bsp_resume(void)
|
||||
{
|
||||
if (this_cpu->c_bsp_resume)
|
||||
this_cpu->c_bsp_resume(&boot_cpu_data);
|
||||
}
|
||||
|
||||
static struct syscore_ops cpu_syscore_ops = {
|
||||
.resume = bsp_resume,
|
||||
};
|
||||
|
||||
static int __init init_cpu_syscore(void)
|
||||
{
|
||||
register_syscore_ops(&cpu_syscore_ops);
|
||||
return 0;
|
||||
}
|
||||
core_initcall(init_cpu_syscore);
|
||||
|
||||
/*
|
||||
* The microcode loader calls this upon late microcode load to recheck features,
|
||||
* only when microcode has been updated. Caller holds microcode_mutex and CPU
|
||||
|
|
|
@ -14,7 +14,6 @@ struct cpu_dev {
|
|||
void (*c_init)(struct cpuinfo_x86 *);
|
||||
void (*c_identify)(struct cpuinfo_x86 *);
|
||||
void (*c_detect_tlb)(struct cpuinfo_x86 *);
|
||||
void (*c_bsp_resume)(struct cpuinfo_x86 *);
|
||||
int c_x86_vendor;
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Optional vendor specific routine to obtain the cache size. */
|
||||
|
|
|
@ -596,36 +596,6 @@ detect_keyid_bits:
|
|||
c->x86_phys_bits -= keyid_bits;
|
||||
}
|
||||
|
||||
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 epb;
|
||||
|
||||
/*
|
||||
* Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
|
||||
* (x86_energy_perf_policy(8) is available to change it at run-time.)
|
||||
*/
|
||||
if (!cpu_has(c, X86_FEATURE_EPB))
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
|
||||
return;
|
||||
|
||||
pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
|
||||
pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
|
||||
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
|
||||
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
}
|
||||
|
||||
static void intel_bsp_resume(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
|
||||
* so reinitialize it properly like during bootup:
|
||||
*/
|
||||
init_intel_energy_perf(c);
|
||||
}
|
||||
|
||||
static void init_cpuid_fault(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr;
|
||||
|
@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c)
|
|||
if (cpu_has(c, X86_FEATURE_TME))
|
||||
detect_tme(c);
|
||||
|
||||
init_intel_energy_perf(c);
|
||||
|
||||
init_intel_misc_features(c);
|
||||
}
|
||||
|
||||
|
@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = {
|
|||
.c_detect_tlb = intel_detect_tlb,
|
||||
.c_early_init = early_init_intel,
|
||||
.c_init = init_intel,
|
||||
.c_bsp_resume = intel_bsp_resume,
|
||||
.c_x86_vendor = X86_VENDOR_INTEL,
|
||||
};
|
||||
|
||||
cpu_dev_register(intel_cpu_dev);
|
||||
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Intel Performance and Energy Bias Hint support.
|
||||
*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* Author:
|
||||
* Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
*/
|
||||
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/pm.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/**
|
||||
* DOC: overview
|
||||
*
|
||||
* The Performance and Energy Bias Hint (EPB) allows software to specify its
|
||||
* preference with respect to the power-performance tradeoffs present in the
|
||||
* processor. Generally, the EPB is expected to be set by user space (directly
|
||||
* via sysfs or with the help of the x86_energy_perf_policy tool), but there are
|
||||
* two reasons for the kernel to update it.
|
||||
*
|
||||
* First, there are systems where the platform firmware resets the EPB during
|
||||
* system-wide transitions from sleep states back into the working state
|
||||
* effectively causing the previous EPB updates by user space to be lost.
|
||||
* Thus the kernel needs to save the current EPB values for all CPUs during
|
||||
* system-wide transitions to sleep states and restore them on the way back to
|
||||
* the working state. That can be achieved by saving EPB for secondary CPUs
|
||||
* when they are taken offline during transitions into system sleep states and
|
||||
* for the boot CPU in a syscore suspend operation, so that it can be restored
|
||||
* for the boot CPU in a syscore resume operation and for the other CPUs when
|
||||
* they are brought back online. However, CPUs that are already offline when
|
||||
* a system-wide PM transition is started are not taken offline again, but their
|
||||
* EPB values may still be reset by the platform firmware during the transition,
|
||||
* so in fact it is necessary to save the EPB of any CPU taken offline and to
|
||||
* restore it when the given CPU goes back online at all times.
|
||||
*
|
||||
* Second, on many systems the initial EPB value coming from the platform
|
||||
* firmware is 0 ('performance') and at least on some of them that is because
|
||||
* the platform firmware does not initialize EPB at all with the assumption that
|
||||
* the OS will do that anyway. That sometimes is problematic, as it may cause
|
||||
* the system battery to drain too fast, for example, so it is better to adjust
|
||||
* it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
|
||||
* kernel changes it to 6 ('normal').
|
||||
*/
|
||||
|
||||
static DEFINE_PER_CPU(u8, saved_epb);
|
||||
|
||||
#define EPB_MASK 0x0fULL
|
||||
#define EPB_SAVED 0x10ULL
|
||||
#define MAX_EPB EPB_MASK
|
||||
|
||||
static int intel_epb_save(void)
|
||||
{
|
||||
u64 epb;
|
||||
|
||||
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
/*
|
||||
* Ensure that saved_epb will always be nonzero after this write even if
|
||||
* the EPB value read from the MSR is 0.
|
||||
*/
|
||||
this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void intel_epb_restore(void)
|
||||
{
|
||||
u64 val = this_cpu_read(saved_epb);
|
||||
u64 epb;
|
||||
|
||||
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
if (val) {
|
||||
val &= EPB_MASK;
|
||||
} else {
|
||||
/*
|
||||
* Because intel_epb_save() has not run for the current CPU yet,
|
||||
* it is going online for the first time, so if its EPB value is
|
||||
* 0 ('performance') at this point, assume that it has not been
|
||||
* initialized by the platform firmware and set it to 6
|
||||
* ('normal').
|
||||
*/
|
||||
val = epb & EPB_MASK;
|
||||
if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
|
||||
val = ENERGY_PERF_BIAS_NORMAL;
|
||||
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
|
||||
}
|
||||
}
|
||||
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
|
||||
}
|
||||
|
||||
static struct syscore_ops intel_epb_syscore_ops = {
|
||||
.suspend = intel_epb_save,
|
||||
.resume = intel_epb_restore,
|
||||
};
|
||||
|
||||
static const char * const energy_perf_strings[] = {
|
||||
"performance",
|
||||
"balance-performance",
|
||||
"normal",
|
||||
"balance-power",
|
||||
"power"
|
||||
};
|
||||
static const u8 energ_perf_values[] = {
|
||||
ENERGY_PERF_BIAS_PERFORMANCE,
|
||||
ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
|
||||
ENERGY_PERF_BIAS_NORMAL,
|
||||
ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
|
||||
ENERGY_PERF_BIAS_POWERSAVE
|
||||
};
|
||||
|
||||
static ssize_t energy_perf_bias_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
unsigned int cpu = dev->id;
|
||||
u64 epb;
|
||||
int ret;
|
||||
|
||||
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return sprintf(buf, "%llu\n", epb);
|
||||
}
|
||||
|
||||
static ssize_t energy_perf_bias_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned int cpu = dev->id;
|
||||
u64 epb, val;
|
||||
int ret;
|
||||
|
||||
ret = __sysfs_match_string(energy_perf_strings,
|
||||
ARRAY_SIZE(energy_perf_strings), buf);
|
||||
if (ret >= 0)
|
||||
val = energ_perf_values[ret];
|
||||
else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
|
||||
return -EINVAL;
|
||||
|
||||
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
|
||||
(epb & ~EPB_MASK) | val);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RW(energy_perf_bias);
|
||||
|
||||
static struct attribute *intel_epb_attrs[] = {
|
||||
&dev_attr_energy_perf_bias.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group intel_epb_attr_group = {
|
||||
.name = power_group_name,
|
||||
.attrs = intel_epb_attrs
|
||||
};
|
||||
|
||||
static int intel_epb_online(unsigned int cpu)
|
||||
{
|
||||
struct device *cpu_dev = get_cpu_device(cpu);
|
||||
|
||||
intel_epb_restore();
|
||||
if (!cpuhp_tasks_frozen)
|
||||
sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int intel_epb_offline(unsigned int cpu)
|
||||
{
|
||||
struct device *cpu_dev = get_cpu_device(cpu);
|
||||
|
||||
if (!cpuhp_tasks_frozen)
|
||||
sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
|
||||
|
||||
intel_epb_save();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int intel_epb_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_EPB))
|
||||
return -ENODEV;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
|
||||
"x86/intel/epb:online", intel_epb_online,
|
||||
intel_epb_offline);
|
||||
if (ret < 0)
|
||||
goto err_out_online;
|
||||
|
||||
register_syscore_ops(&intel_epb_syscore_ops);
|
||||
return 0;
|
||||
|
||||
err_out_online:
|
||||
cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
|
||||
return ret;
|
||||
}
|
||||
subsys_initcall(intel_epb_init);
|
|
@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void)
|
|||
/*
|
||||
* Secondary CPUs do not run through tsc_init(), so set up
|
||||
* all the scale factors for all CPUs, assuming the same
|
||||
* speed as the bootup CPU. (cpufreq notifiers will fix this
|
||||
* up if their speed diverges)
|
||||
* speed as the bootup CPU.
|
||||
*/
|
||||
static void __init cyc2ns_init_secondary_cpus(void)
|
||||
{
|
||||
|
@ -937,12 +936,12 @@ void tsc_restore_sched_clock_state(void)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
|
||||
/*
|
||||
* Frequency scaling support. Adjust the TSC based timer when the CPU frequency
|
||||
* changes.
|
||||
*
|
||||
* RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
|
||||
* not that important because current Opteron setups do not support
|
||||
* scaling on SMP anyroads.
|
||||
* NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
|
||||
* as unstable and give up in those cases.
|
||||
*
|
||||
* Should fix up last_tsc too. Currently gettimeofday in the
|
||||
* first tick after the change will be slightly wrong.
|
||||
|
@ -956,22 +955,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
|
|||
void *data)
|
||||
{
|
||||
struct cpufreq_freqs *freq = data;
|
||||
unsigned long *lpj;
|
||||
|
||||
lpj = &boot_cpu_data.loops_per_jiffy;
|
||||
#ifdef CONFIG_SMP
|
||||
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
|
||||
lpj = &cpu_data(freq->cpu).loops_per_jiffy;
|
||||
#endif
|
||||
if (num_online_cpus() > 1) {
|
||||
mark_tsc_unstable("cpufreq changes on SMP");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!ref_freq) {
|
||||
ref_freq = freq->old;
|
||||
loops_per_jiffy_ref = *lpj;
|
||||
loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
|
||||
tsc_khz_ref = tsc_khz;
|
||||
}
|
||||
|
||||
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
|
||||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
|
||||
*lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
|
||||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
|
||||
boot_cpu_data.loops_per_jiffy =
|
||||
cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
|
||||
|
||||
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
|
||||
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
|
||||
|
|
|
@ -147,6 +147,7 @@ enum cpuhp_state {
|
|||
CPUHP_AP_X86_VDSO_VMA_ONLINE,
|
||||
CPUHP_AP_IRQ_AFFINITY_ONLINE,
|
||||
CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
|
||||
CPUHP_AP_X86_INTEL_EPB_ONLINE,
|
||||
CPUHP_AP_PERF_ONLINE,
|
||||
CPUHP_AP_PERF_X86_ONLINE,
|
||||
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
|
||||
|
|
Loading…
Reference in New Issue