sched: Scale down cpu_power due to RT tasks
Keep an average on the amount of time spend on RT tasks and use that fraction to scale down the cpu_power for regular tasks. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com> Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com> Acked-by: Gautham R Shenoy <ego@in.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> LKML-Reference: <20090901083826.287778431@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
ab29230e67
commit
e9e9250bc7
|
@ -1831,6 +1831,7 @@ extern unsigned int sysctl_sched_child_runs_first;
|
|||
extern unsigned int sysctl_sched_features;
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
extern unsigned int sysctl_sched_time_avg;
|
||||
extern unsigned int sysctl_timer_migration;
|
||||
|
||||
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
|
|
|
@ -627,6 +627,9 @@ struct rq {
|
|||
|
||||
struct task_struct *migration_thread;
|
||||
struct list_head migration_queue;
|
||||
|
||||
u64 rt_avg;
|
||||
u64 age_stamp;
|
||||
#endif
|
||||
|
||||
/* calc_load related fields */
|
||||
|
@ -862,6 +865,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000;
|
|||
*/
|
||||
unsigned int sysctl_sched_shares_thresh = 4;
|
||||
|
||||
/*
|
||||
* period over which we average the RT time consumption, measured
|
||||
* in ms.
|
||||
*
|
||||
* default: 1s
|
||||
*/
|
||||
const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
|
||||
|
||||
/*
|
||||
* period over which we measure -rt task cpu usage in us.
|
||||
* default: 1s
|
||||
|
@ -1280,12 +1291,37 @@ void wake_up_idle_cpu(int cpu)
|
|||
}
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
|
||||
static u64 sched_avg_period(void)
|
||||
{
|
||||
return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
|
||||
}
|
||||
|
||||
static void sched_avg_update(struct rq *rq)
|
||||
{
|
||||
s64 period = sched_avg_period();
|
||||
|
||||
while ((s64)(rq->clock - rq->age_stamp) > period) {
|
||||
rq->age_stamp += period;
|
||||
rq->rt_avg /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
|
||||
{
|
||||
rq->rt_avg += rt_delta;
|
||||
sched_avg_update(rq);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_SMP */
|
||||
static void resched_task(struct task_struct *p)
|
||||
{
|
||||
assert_spin_locked(&task_rq(p)->lock);
|
||||
set_tsk_need_resched(p);
|
||||
}
|
||||
|
||||
static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
|
@ -3699,7 +3735,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
|
|||
}
|
||||
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
||||
|
||||
unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
|
||||
unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
unsigned long weight = cpumask_weight(sched_domain_span(sd));
|
||||
unsigned long smt_gain = sd->smt_gain;
|
||||
|
@ -3709,6 +3745,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
|
|||
return smt_gain;
|
||||
}
|
||||
|
||||
unsigned long scale_rt_power(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
u64 total, available;
|
||||
|
||||
sched_avg_update(rq);
|
||||
|
||||
total = sched_avg_period() + (rq->clock - rq->age_stamp);
|
||||
available = total - rq->rt_avg;
|
||||
|
||||
if (unlikely((s64)total < SCHED_LOAD_SCALE))
|
||||
total = SCHED_LOAD_SCALE;
|
||||
|
||||
total >>= SCHED_LOAD_SHIFT;
|
||||
|
||||
return div_u64(available, total);
|
||||
}
|
||||
|
||||
static void update_cpu_power(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
unsigned long weight = cpumask_weight(sched_domain_span(sd));
|
||||
|
@ -3719,11 +3773,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
|
|||
/* here we could scale based on cpufreq */
|
||||
|
||||
if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
|
||||
power *= arch_smt_gain(sd, cpu);
|
||||
power *= arch_scale_smt_power(sd, cpu);
|
||||
power >>= SCHED_LOAD_SHIFT;
|
||||
}
|
||||
|
||||
/* here we could scale based on RT time */
|
||||
power *= scale_rt_power(cpu);
|
||||
power >>= SCHED_LOAD_SHIFT;
|
||||
|
||||
if (!power)
|
||||
power = 1;
|
||||
|
||||
if (power != old) {
|
||||
sdg->__cpu_power = power;
|
||||
|
|
|
@ -615,6 +615,8 @@ static void update_curr_rt(struct rq *rq)
|
|||
curr->se.exec_start = rq->clock;
|
||||
cpuacct_charge(curr, delta_exec);
|
||||
|
||||
sched_rt_avg_update(rq, delta_exec);
|
||||
|
||||
if (!rt_bandwidth_enabled())
|
||||
return;
|
||||
|
||||
|
@ -887,8 +889,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
|||
|
||||
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
||||
inc_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||
|
@ -899,8 +899,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
|||
dequeue_rt_entity(rt_se);
|
||||
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
dec_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -330,6 +330,14 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_time_avg",
|
||||
.data = &sysctl_sched_time_avg,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "timer_migration",
|
||||
|
|
Loading…
Reference in New Issue