cpufreq: Add mechanism for registering utilization update callbacks
Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The update_load_avg() changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
de1df26b7c
commit
34e2c555f3
|
@ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list);
|
||||||
static struct cpufreq_driver *cpufreq_driver;
|
static struct cpufreq_driver *cpufreq_driver;
|
||||||
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
|
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
|
||||||
static DEFINE_RWLOCK(cpufreq_driver_lock);
|
static DEFINE_RWLOCK(cpufreq_driver_lock);
|
||||||
|
|
||||||
|
static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
|
||||||
|
* @cpu: The CPU to set the pointer for.
|
||||||
|
* @data: New pointer value.
|
||||||
|
*
|
||||||
|
* Set and publish the update_util_data pointer for the given CPU. That pointer
|
||||||
|
* points to a struct update_util_data object containing a callback function
|
||||||
|
* to call from cpufreq_update_util(). That function will be called from an RCU
|
||||||
|
* read-side critical section, so it must not sleep.
|
||||||
|
*
|
||||||
|
* Callers must use RCU callbacks to free any memory that might be accessed
|
||||||
|
* via the old update_util_data pointer or invoke synchronize_rcu() right after
|
||||||
|
* this function to avoid use-after-free.
|
||||||
|
*/
|
||||||
|
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
|
||||||
|
{
|
||||||
|
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpufreq_update_util - Take a note about CPU utilization changes.
|
||||||
|
* @time: Current time.
|
||||||
|
* @util: Current utilization.
|
||||||
|
* @max: Utilization ceiling.
|
||||||
|
*
|
||||||
|
* This function is called by the scheduler on every invocation of
|
||||||
|
* update_load_avg() on the CPU whose utilization is being updated.
|
||||||
|
*/
|
||||||
|
void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
|
||||||
|
{
|
||||||
|
struct update_util_data *data;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data));
|
||||||
|
if (data && data->func)
|
||||||
|
data->func(data, time, util, max);
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_MUTEX(cpufreq_governor_lock);
|
DEFINE_MUTEX(cpufreq_governor_lock);
|
||||||
|
|
||||||
/* Flag to suspend/resume CPUFreq governors */
|
/* Flag to suspend/resume CPUFreq governors */
|
||||||
|
|
|
@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
|
||||||
extern struct kobject *cpufreq_global_kobject;
|
extern struct kobject *cpufreq_global_kobject;
|
||||||
|
|
||||||
#ifdef CONFIG_CPU_FREQ
|
#ifdef CONFIG_CPU_FREQ
|
||||||
|
void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
|
||||||
|
* @time: Current time.
|
||||||
|
*
|
||||||
|
* The way cpufreq is currently arranged requires it to evaluate the CPU
|
||||||
|
* performance state (frequency/voltage) on a regular basis to prevent it from
|
||||||
|
* being stuck in a completely inadequate performance level for too long.
|
||||||
|
* That is not guaranteed to happen if the updates are only triggered from CFS,
|
||||||
|
* though, because they may not be coming in if RT or deadline tasks are active
|
||||||
|
* all the time (or there are RT and DL tasks only).
|
||||||
|
*
|
||||||
|
* As a workaround for that issue, this function is called by the RT and DL
|
||||||
|
* sched classes to trigger extra cpufreq updates to prevent it from stalling,
|
||||||
|
* but that really is a band-aid. Going forward it should be replaced with
|
||||||
|
* solutions targeted more specifically at RT and DL tasks.
|
||||||
|
*/
|
||||||
|
static inline void cpufreq_trigger_update(u64 time)
|
||||||
|
{
|
||||||
|
cpufreq_update_util(time, ULONG_MAX, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct update_util_data {
|
||||||
|
void (*func)(struct update_util_data *data,
|
||||||
|
u64 time, unsigned long util, unsigned long max);
|
||||||
|
};
|
||||||
|
|
||||||
|
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
|
||||||
|
|
||||||
unsigned int cpufreq_get(unsigned int cpu);
|
unsigned int cpufreq_get(unsigned int cpu);
|
||||||
unsigned int cpufreq_quick_get(unsigned int cpu);
|
unsigned int cpufreq_quick_get(unsigned int cpu);
|
||||||
unsigned int cpufreq_quick_get_max(unsigned int cpu);
|
unsigned int cpufreq_quick_get_max(unsigned int cpu);
|
||||||
|
@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu);
|
||||||
bool have_governor_per_policy(void);
|
bool have_governor_per_policy(void);
|
||||||
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
|
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
|
||||||
#else
|
#else
|
||||||
|
static inline void cpufreq_update_util(u64 time, unsigned long util,
|
||||||
|
unsigned long max) {}
|
||||||
|
static inline void cpufreq_trigger_update(u64 time) {}
|
||||||
|
|
||||||
static inline unsigned int cpufreq_get(unsigned int cpu)
|
static inline unsigned int cpufreq_get(unsigned int cpu)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
|
||||||
if (!dl_task(curr) || !on_dl_rq(dl_se))
|
if (!dl_task(curr) || !on_dl_rq(dl_se))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
|
||||||
|
if (cpu_of(rq) == smp_processor_id())
|
||||||
|
cpufreq_trigger_update(rq_clock(rq));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Consumed budget is computed considering the time as
|
* Consumed budget is computed considering the time as
|
||||||
* observed by schedulable tasks (excluding time spent
|
* observed by schedulable tasks (excluding time spent
|
||||||
|
|
|
@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
||||||
{
|
{
|
||||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||||
u64 now = cfs_rq_clock_task(cfs_rq);
|
u64 now = cfs_rq_clock_task(cfs_rq);
|
||||||
int cpu = cpu_of(rq_of(cfs_rq));
|
struct rq *rq = rq_of(cfs_rq);
|
||||||
|
int cpu = cpu_of(rq);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Track task load average for carrying it to new CPU after migrated, and
|
* Track task load average for carrying it to new CPU after migrated, and
|
||||||
|
@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
||||||
|
|
||||||
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
|
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
|
||||||
update_tg_load_avg(cfs_rq, 0);
|
update_tg_load_avg(cfs_rq, 0);
|
||||||
|
|
||||||
|
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
|
||||||
|
unsigned long max = rq->cpu_capacity_orig;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are a few boundary cases this might miss but it should
|
||||||
|
* get called often enough that that should (hopefully) not be
|
||||||
|
* a real problem -- added to that it only calls on the local
|
||||||
|
* CPU, so if we enqueue remotely we'll miss an update, but
|
||||||
|
* the next tick/schedule should update.
|
||||||
|
*
|
||||||
|
* It will not get called when we go idle, because the idle
|
||||||
|
* thread is a different class (!fair), nor will the utilization
|
||||||
|
* number include things like RT tasks.
|
||||||
|
*
|
||||||
|
* As is, the util number is not freq-invariant (we'd have to
|
||||||
|
* implement arch_scale_freq_capacity() for that).
|
||||||
|
*
|
||||||
|
* See cpu_util().
|
||||||
|
*/
|
||||||
|
cpufreq_update_util(rq_clock(rq),
|
||||||
|
min(cfs_rq->avg.util_avg, max), max);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
|
|
|
@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
|
||||||
if (curr->sched_class != &rt_sched_class)
|
if (curr->sched_class != &rt_sched_class)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
|
||||||
|
if (cpu_of(rq) == smp_processor_id())
|
||||||
|
cpufreq_trigger_update(rq_clock(rq));
|
||||||
|
|
||||||
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
||||||
if (unlikely((s64)delta_exec <= 0))
|
if (unlikely((s64)delta_exec <= 0))
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <linux/irq_work.h>
|
#include <linux/irq_work.h>
|
||||||
#include <linux/tick.h>
|
#include <linux/tick.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/cpufreq.h>
|
||||||
|
|
||||||
#include "cpupri.h"
|
#include "cpupri.h"
|
||||||
#include "cpudeadline.h"
|
#include "cpudeadline.h"
|
||||||
|
|
Loading…
Reference in New Issue