From 1ca3abdb6a4b87246b00292f048acd344325fd12 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 23 Jan 2009 09:25:02 -0500 Subject: [PATCH 1/2] [CPUFREQ] Make ignore_nice_load setting of ondemand work as expected. ondemand micro-accounting of idle time changes broke ignore_nice_load sysfs setting due to a thinko in the code. The bug entry: http://bugzilla.kernel.org/show_bug.cgi?id=12310 Reported-by: Jim Bray Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 47 ++++++++++++++++-------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 6a2b036c9389..6f45b1658a67 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -117,11 +117,7 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); - - if (!dbs_tuners_ins.ignore_nice) { - busy_time = cputime64_add(busy_time, - kstat_cpu(cpu).cpustat.nice); - } + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); idle_time = cputime64_sub(cur_wall_time, busy_time); if (wall) @@ -137,23 +133,6 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); - if (dbs_tuners_ins.ignore_nice) { - cputime64_t cur_nice; - unsigned long cur_nice_jiffies; - struct cpu_dbs_info_s *dbs_info; - - dbs_info = &per_cpu(cpu_dbs_info, cpu); - cur_nice = cputime64_sub(kstat_cpu(cpu).cpustat.nice, - dbs_info->prev_cpu_nice); - /* - * Assumption: nice time between sampling periods will be - * less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); - dbs_info->prev_cpu_nice = kstat_cpu(cpu).cpustat.nice; - return idle_time + jiffies_to_usecs(cur_nice_jiffies); - } return idle_time; } @@ -319,6 +298,9 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, dbs_info = &per_cpu(cpu_dbs_info, j); dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + } mutex_unlock(&dbs_mutex); @@ -419,6 +401,23 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) j_dbs_info->prev_cpu_idle); j_dbs_info->prev_cpu_idle = cur_idle_time; + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + if (unlikely(!wall_time || wall_time < idle_time)) continue; @@ -575,6 +574,10 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } } this_dbs_info->cpu = cpu; /* From 732553e567c2700ba5b9bccc6ec885c75779a94b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 3 Feb 2009 17:46:43 +0100 Subject: [PATCH 2/2] [CPUFREQ] powernow-k8: Get transition latency from ACPI _PSS table At this time, the PowerNow! driver for K8 uses an experimentally derived formula to calculate transition latency. The value it provides is orders of magnitude too large on modern systems. This patch replaces the formula with ACPI _PSS latency values for more accuracy and better performance. I've tested it on two 2nd generation Opteron systems, a 3rd generation Operton system, and a Turion X2 without seeing any stability problems. Signed-off-by: Mark Langsdorf Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 5c28b37dea11..fb039cd345d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) free_cpumask_var(data->acpi_data.shared_cpu_map); } +static int get_transition_latency(struct powernow_k8_data *data) +{ + int max_latency = 0; + int i; + for (i = 0; i < data->acpi_data.state_count; i++) { + int cur_latency = data->acpi_data.states[i].transition_latency + + data->acpi_data.states[i].bus_master_latency; + if (cur_latency > max_latency) + max_latency = cur_latency; + } + /* value in usecs, needs to be in nanoseconds */ + return 1000 * max_latency; +} + #else static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ @@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (rc) { goto err_out; } - } + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = ( + ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + ((1 << data->irt) * 30)) * 1000; + } else /* ACPI _PSS objects available */ + pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); data->available_cores = pol->cpus; - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else