Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq

* master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq:
  [CPUFREQ] Fix up whitespace in conservative governor.
  [CPUFREQ] Make cpufreq_conservative handle out-of-sync events properly
  [CPUFREQ] architectural pstate driver for powernow-k8
This commit is contained in:
Linus Torvalds 2007-11-16 18:30:26 -08:00
commit 4c5cdb1e1f
3 changed files with 119 additions and 144 deletions

View File

@ -46,7 +46,7 @@
#define PFX "powernow-k8: "
#define BFX PFX "BIOS error: "
#define VERSION "version 2.00.00"
#define VERSION "version 2.20.00"
#include "powernow-k8.h"
/* serialize freq changes */
@ -73,33 +73,11 @@ static u32 find_khz_freq_from_fid(u32 fid)
return 1000 * find_freq_from_fid(fid);
}
/* Return a frequency in MHz, given an input fid and did */
static u32 find_freq_from_fiddid(u32 fid, u32 did)
static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate)
{
if (current_cpu_data.x86 == 0x10)
return 100 * (fid + 0x10) >> did;
else
return 100 * (fid + 0x8) >> did;
return data[pstate].frequency;
}
static u32 find_khz_freq_from_fiddid(u32 fid, u32 did)
{
return 1000 * find_freq_from_fiddid(fid, did);
}
static u32 find_fid_from_pstate(u32 pstate)
{
u32 hi, lo;
rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
return lo & HW_PSTATE_FID_MASK;
}
static u32 find_did_from_pstate(u32 pstate)
{
u32 hi, lo;
rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
}
/* Return the vco fid for an input fid
*
@ -142,9 +120,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
if (cpu_family == CPU_HW_PSTATE) {
rdmsr(MSR_PSTATE_STATUS, lo, hi);
i = lo & HW_PSTATE_MASK;
rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi);
data->currfid = lo & HW_PSTATE_FID_MASK;
data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
data->currpstate = i;
return 0;
}
do {
@ -295,7 +271,7 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid,
static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
{
wrmsr(MSR_PSTATE_CTRL, pstate, 0);
data->currfid = find_fid_from_pstate(pstate);
data->currpstate = pstate;
return 0;
}
@ -845,17 +821,20 @@ err_out:
static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
{
int i;
u32 hi = 0, lo = 0;
rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 index;
u32 hi = 0, lo = 0;
u32 fid;
u32 did;
index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > MAX_HW_PSTATE) {
if (index > data->max_hw_pstate) {
printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
if (!(hi & HW_PSTATE_VALID_MASK)) {
@ -864,22 +843,9 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
continue;
}
fid = lo & HW_PSTATE_FID_MASK;
did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
powernow_table[i].index = index;
dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did);
powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT);
powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did);
if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
powernow_table[i].frequency,
(unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
}
return 0;
}
@ -1020,22 +986,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
/* Take a frequency, and issue the hardware pstate transition command */
static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
{
u32 fid = 0;
u32 did = 0;
u32 pstate = 0;
int res, i;
struct cpufreq_freqs freqs;
dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
/* get fid did for hardware pstate transition */
/* get MSR index for hardware pstate transition */
pstate = index & HW_PSTATE_MASK;
if (pstate > MAX_HW_PSTATE)
if (pstate > data->max_hw_pstate)
return 0;
fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT;
did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT;
freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid);
freqs.new = find_khz_freq_from_fiddid(fid, did);
freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
for_each_cpu_mask(i, *(data->available_cores)) {
freqs.cpu = i;
@ -1043,9 +1005,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
}
res = transition_pstate(data, pstate);
data->currfid = find_fid_from_pstate(pstate);
data->currdid = find_did_from_pstate(pstate);
freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
for_each_cpu_mask(i, *(data->available_cores)) {
freqs.cpu = i;
@ -1090,10 +1050,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
if (query_current_values_with_pending_wait(data))
goto err_out;
if (cpu_family == CPU_HW_PSTATE)
dprintk("targ: curr fid 0x%x, did 0x%x\n",
data->currfid, data->currdid);
else {
if (cpu_family != CPU_HW_PSTATE) {
dprintk("targ: curr fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
@ -1124,7 +1081,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
mutex_unlock(&fidvid_mutex);
if (cpu_family == CPU_HW_PSTATE)
pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate);
else
pol->cur = find_khz_freq_from_fid(data->currfid);
ret = 0;
@ -1223,7 +1180,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
+ (3 * (1 << data->irt) * 10)) * 1000;
if (cpu_family == CPU_HW_PSTATE)
pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
else
pol->cur = find_khz_freq_from_fid(data->currfid);
dprintk("policy current frequency %d kHz\n", pol->cur);
@ -1240,8 +1197,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
if (cpu_family == CPU_HW_PSTATE)
dprintk("cpu_init done, current fid 0x%x, did 0x%x\n",
data->currfid, data->currdid);
dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate);
else
dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
@ -1297,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
goto out;
if (cpu_family == CPU_HW_PSTATE)
khz = find_khz_freq_from_fiddid(data->currfid, data->currdid);
khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
else
khz = find_khz_freq_from_fid(data->currfid);

View File

@ -10,6 +10,7 @@ struct powernow_k8_data {
u32 numps; /* number of p-states */
u32 batps; /* number of p-states supported on battery */
u32 max_hw_pstate; /* maximum legal hardware pstate */
/* these values are constant when the PSB is used to determine
* vid/fid pairings, but are modified during the ->target() call
@ -21,8 +22,8 @@ struct powernow_k8_data {
u32 plllock; /* pll lock time, units 1 us */
u32 exttype; /* extended interface = 1 */
/* keep track of the current fid / vid or did */
u32 currvid, currfid, currdid;
/* keep track of the current fid / vid or pstate */
u32 currvid, currfid, currpstate;
/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
@ -87,23 +88,14 @@ struct powernow_k8_data {
/* Hardware Pstate _PSS and MSR definitions */
#define USE_HW_PSTATE 0x00000080
#define HW_PSTATE_FID_MASK 0x0000003f
#define HW_PSTATE_DID_MASK 0x000001c0
#define HW_PSTATE_DID_SHIFT 6
#define HW_PSTATE_MASK 0x00000007
#define HW_PSTATE_VALID_MASK 0x80000000
#define HW_FID_INDEX_SHIFT 8
#define HW_FID_INDEX_MASK 0x0000ff00
#define HW_DID_INDEX_SHIFT 16
#define HW_DID_INDEX_MASK 0x00ff0000
#define HW_WATTS_MASK 0xff
#define HW_PWR_DVR_MASK 0x300
#define HW_PWR_DVR_SHIFT 8
#define HW_PWR_MAX_MULT 3
#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */
#define HW_PSTATE_MAX_MASK 0x000000f0
#define HW_PSTATE_MAX_SHIFT 4
#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */
#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */
#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */
#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */
/* define the two driver architectures */
#define CPU_OPTERON 0

View File

@ -37,17 +37,17 @@
#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
/*
* The polling frequency of this governor depends on the capability of
/*
* The polling frequency of this governor depends on the capability of
* the processor. Default polling frequency is 1000 times the transition
* latency of the processor. The governor will work on any processor with
* transition latency <= 10mS, using appropriate sampling
* latency of the processor. The governor will work on any processor with
* transition latency <= 10mS, using appropriate sampling
* rate.
* For CPUs with transition latency > 10mS (mostly drivers
* with CPUFREQ_ETERNAL), this governor will not work.
* All times here are in uS.
*/
static unsigned int def_sampling_rate;
static unsigned int def_sampling_rate;
#define MIN_SAMPLING_RATE_RATIO (2)
/* for correct statistics, we need at least 10 ticks between each measure */
#define MIN_STAT_SAMPLING_RATE \
@ -63,12 +63,12 @@ static unsigned int def_sampling_rate;
static void do_dbs_timer(struct work_struct *work);
struct cpu_dbs_info_s {
struct cpufreq_policy *cur_policy;
unsigned int prev_cpu_idle_up;
unsigned int prev_cpu_idle_down;
unsigned int enable;
unsigned int down_skip;
unsigned int requested_freq;
struct cpufreq_policy *cur_policy;
unsigned int prev_cpu_idle_up;
unsigned int prev_cpu_idle_down;
unsigned int enable;
unsigned int down_skip;
unsigned int requested_freq;
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
@ -82,24 +82,24 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
* cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
* is recursive for the same process. -Venki
*/
static DEFINE_MUTEX (dbs_mutex);
static DEFINE_MUTEX (dbs_mutex);
static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer);
struct dbs_tuners {
unsigned int sampling_rate;
unsigned int sampling_down_factor;
unsigned int up_threshold;
unsigned int down_threshold;
unsigned int ignore_nice;
unsigned int freq_step;
unsigned int sampling_rate;
unsigned int sampling_down_factor;
unsigned int up_threshold;
unsigned int down_threshold;
unsigned int ignore_nice;
unsigned int freq_step;
};
static struct dbs_tuners dbs_tuners_ins = {
.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
.down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
.ignore_nice = 0,
.freq_step = 5,
.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
.down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
.ignore_nice = 0,
.freq_step = 5,
};
static inline unsigned int get_cpu_idle_time(unsigned int cpu)
@ -109,13 +109,34 @@ static inline unsigned int get_cpu_idle_time(unsigned int cpu)
if (dbs_tuners_ins.ignore_nice)
add_nice = kstat_cpu(cpu).cpustat.nice;
ret = kstat_cpu(cpu).cpustat.idle +
ret = kstat_cpu(cpu).cpustat.idle +
kstat_cpu(cpu).cpustat.iowait +
add_nice;
return ret;
}
/* keep track of frequency transitions */
static int
dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info,
freq->cpu);
if (!this_dbs_info->enable)
return 0;
this_dbs_info->requested_freq = freq->new;
return 0;
}
static struct notifier_block dbs_cpufreq_notifier_block = {
.notifier_call = dbs_cpufreq_notifier
};
/************************** sysfs interface ************************/
static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
{
@ -127,8 +148,8 @@ static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
}
#define define_one_ro(_name) \
static struct freq_attr _name = \
#define define_one_ro(_name) \
static struct freq_attr _name = \
__ATTR(_name, 0444, show_##_name, NULL)
define_one_ro(sampling_rate_max);
@ -148,7 +169,7 @@ show_one(down_threshold, down_threshold);
show_one(ignore_nice_load, ignore_nice);
show_one(freq_step, freq_step);
static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
const char *buf, size_t count)
{
unsigned int input;
@ -164,7 +185,7 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
return count;
}
static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
const char *buf, size_t count)
{
unsigned int input;
@ -183,7 +204,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
return count;
}
static ssize_t store_up_threshold(struct cpufreq_policy *unused,
static ssize_t store_up_threshold(struct cpufreq_policy *unused,
const char *buf, size_t count)
{
unsigned int input;
@ -202,7 +223,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused,
return count;
}
static ssize_t store_down_threshold(struct cpufreq_policy *unused,
static ssize_t store_down_threshold(struct cpufreq_policy *unused,
const char *buf, size_t count)
{
unsigned int input;
@ -228,16 +249,16 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
int ret;
unsigned int j;
ret = sscanf (buf, "%u", &input);
if ( ret != 1 )
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
if ( input > 1 )
if (input > 1)
input = 1;
mutex_lock(&dbs_mutex);
if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
mutex_unlock(&dbs_mutex);
return count;
}
@ -261,14 +282,14 @@ static ssize_t store_freq_step(struct cpufreq_policy *policy,
unsigned int input;
int ret;
ret = sscanf (buf, "%u", &input);
ret = sscanf(buf, "%u", &input);
if ( ret != 1 )
if (ret != 1)
return -EINVAL;
if ( input > 100 )
if (input > 100)
input = 100;
/* no need to test here if freq_step is zero as the user might actually
* want this, they would be crazy though :) */
mutex_lock(&dbs_mutex);
@ -322,18 +343,18 @@ static void dbs_check_cpu(int cpu)
policy = this_dbs_info->cur_policy;
/*
* The default safe range is 20% to 80%
/*
* The default safe range is 20% to 80%
* Every sampling_rate, we check
* - If current idle time is less than 20%, then we try to
* increase frequency
* - If current idle time is less than 20%, then we try to
* increase frequency
* Every sampling_rate*sampling_down_factor, we check
* - If current idle time is more than 80%, then we try to
* decrease frequency
* - If current idle time is more than 80%, then we try to
* decrease frequency
*
* Any frequency increase takes it to the maximum frequency.
* Frequency reduction happens at minimum steps of
* 5% (default) of max_frequency
* Any frequency increase takes it to the maximum frequency.
* Frequency reduction happens at minimum steps of
* 5% (default) of max_frequency
*/
/* Check for frequency increase */
@ -361,13 +382,13 @@ static void dbs_check_cpu(int cpu)
/* if we are already at full speed then break out early */
if (this_dbs_info->requested_freq == policy->max)
return;
freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100;
/* max freq cannot be less than 100. But who knows.... */
if (unlikely(freq_step == 0))
freq_step = 5;
this_dbs_info->requested_freq += freq_step;
if (this_dbs_info->requested_freq > policy->max)
this_dbs_info->requested_freq = policy->max;
@ -427,15 +448,15 @@ static void dbs_check_cpu(int cpu)
}
static void do_dbs_timer(struct work_struct *work)
{
{
int i;
mutex_lock(&dbs_mutex);
for_each_online_cpu(i)
dbs_check_cpu(i);
schedule_delayed_work(&dbs_work,
schedule_delayed_work(&dbs_work,
usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
mutex_unlock(&dbs_mutex);
}
}
static inline void dbs_timer_init(void)
{
@ -462,13 +483,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
switch (event) {
case CPUFREQ_GOV_START:
if ((!cpu_online(cpu)) ||
(!policy->cur))
if ((!cpu_online(cpu)) || (!policy->cur))
return -EINVAL;
if (this_dbs_info->enable) /* Already enabled */
break;
mutex_lock(&dbs_mutex);
rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
@ -481,7 +501,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
j_dbs_info->cur_policy = policy;
j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu);
j_dbs_info->prev_cpu_idle_down
= j_dbs_info->prev_cpu_idle_up;
@ -511,8 +531,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
dbs_tuners_ins.sampling_rate = def_sampling_rate;
dbs_timer_init();
cpufreq_register_notifier(
&dbs_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
mutex_unlock(&dbs_mutex);
break;
@ -525,9 +548,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
* Stop the timerschedule work, when this governor
* is used for first time
*/
if (dbs_enable == 0)
if (dbs_enable == 0) {
dbs_timer_exit();
cpufreq_unregister_notifier(
&dbs_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
mutex_unlock(&dbs_mutex);
break;
@ -537,11 +564,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
if (policy->max < this_dbs_info->cur_policy->cur)
__cpufreq_driver_target(
this_dbs_info->cur_policy,
policy->max, CPUFREQ_RELATION_H);
policy->max, CPUFREQ_RELATION_H);
else if (policy->min > this_dbs_info->cur_policy->cur)
__cpufreq_driver_target(
this_dbs_info->cur_policy,
policy->min, CPUFREQ_RELATION_L);
policy->min, CPUFREQ_RELATION_L);
mutex_unlock(&dbs_mutex);
break;
}