sched/fair: Revert sched-domain iteration breakage
Patchesc22402a2f
("sched/fair: Let minimally loaded cpu balance the group") and0ce90475
("sched/fair: Add some serialization to the sched_domain load-balance walk") are horribly broken so revert them. The problem is that while it sounds good to have the minimally loaded cpu do the pulling of more load, the way we walk the domains there is absolutely no guarantee this cpu will actually get to the domain. In fact its very likely it wont. Therefore the higher up the tree we get, the less likely it is we'll balance at all. The first of mask always walks up, while sucky in that it accumulates load on the first cpu and needs extra passes to spread it out at least guarantees a cpu gets up that far and load-balancing happens at all. Since its now always the first and idle cpus should always be able to balance so they get a task as fast as possible we can also do away with the added serialization. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-rpuhs5s56aiv1aw7khv9zkw6@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
316ad24830
commit
04f733b4af
|
@ -927,7 +927,6 @@ struct sched_group_power {
|
||||||
struct sched_group {
|
struct sched_group {
|
||||||
struct sched_group *next; /* Must be a circular list */
|
struct sched_group *next; /* Must be a circular list */
|
||||||
atomic_t ref;
|
atomic_t ref;
|
||||||
int balance_cpu;
|
|
||||||
|
|
||||||
unsigned int group_weight;
|
unsigned int group_weight;
|
||||||
struct sched_group_power *sgp;
|
struct sched_group_power *sgp;
|
||||||
|
|
|
@ -5976,7 +5976,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
||||||
|
|
||||||
sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
|
sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
|
||||||
atomic_inc(&sg->sgp->ref);
|
atomic_inc(&sg->sgp->ref);
|
||||||
sg->balance_cpu = -1;
|
|
||||||
|
|
||||||
if (cpumask_test_cpu(cpu, sg_span))
|
if (cpumask_test_cpu(cpu, sg_span))
|
||||||
groups = sg;
|
groups = sg;
|
||||||
|
@ -6052,7 +6051,6 @@ build_sched_groups(struct sched_domain *sd, int cpu)
|
||||||
|
|
||||||
cpumask_clear(sched_group_cpus(sg));
|
cpumask_clear(sched_group_cpus(sg));
|
||||||
sg->sgp->power = 0;
|
sg->sgp->power = 0;
|
||||||
sg->balance_cpu = -1;
|
|
||||||
|
|
||||||
for_each_cpu(j, span) {
|
for_each_cpu(j, span) {
|
||||||
if (get_group(j, sdd, NULL) != group)
|
if (get_group(j, sdd, NULL) != group)
|
||||||
|
|
|
@ -3776,8 +3776,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||||
int *balance, struct sg_lb_stats *sgs)
|
int *balance, struct sg_lb_stats *sgs)
|
||||||
{
|
{
|
||||||
unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
|
unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
|
||||||
unsigned int balance_cpu = -1;
|
unsigned int balance_cpu = -1, first_idle_cpu = 0;
|
||||||
unsigned long balance_load = ~0UL;
|
|
||||||
unsigned long avg_load_per_task = 0;
|
unsigned long avg_load_per_task = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -3794,11 +3793,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||||
|
|
||||||
/* Bias balancing toward cpus of our domain */
|
/* Bias balancing toward cpus of our domain */
|
||||||
if (local_group) {
|
if (local_group) {
|
||||||
load = target_load(i, load_idx);
|
if (idle_cpu(i) && !first_idle_cpu) {
|
||||||
if (load < balance_load || idle_cpu(i)) {
|
first_idle_cpu = 1;
|
||||||
balance_load = load;
|
|
||||||
balance_cpu = i;
|
balance_cpu = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
load = target_load(i, load_idx);
|
||||||
} else {
|
} else {
|
||||||
load = source_load(i, load_idx);
|
load = source_load(i, load_idx);
|
||||||
if (load > max_cpu_load) {
|
if (load > max_cpu_load) {
|
||||||
|
@ -3824,8 +3824,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||||
*/
|
*/
|
||||||
if (local_group) {
|
if (local_group) {
|
||||||
if (env->idle != CPU_NEWLY_IDLE) {
|
if (env->idle != CPU_NEWLY_IDLE) {
|
||||||
if (balance_cpu != env->dst_cpu ||
|
if (balance_cpu != env->dst_cpu) {
|
||||||
cmpxchg(&group->balance_cpu, -1, balance_cpu) != -1) {
|
|
||||||
*balance = 0;
|
*balance = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -4919,7 +4918,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
|
||||||
int balance = 1;
|
int balance = 1;
|
||||||
struct rq *rq = cpu_rq(cpu);
|
struct rq *rq = cpu_rq(cpu);
|
||||||
unsigned long interval;
|
unsigned long interval;
|
||||||
struct sched_domain *sd, *last = NULL;
|
struct sched_domain *sd;
|
||||||
/* Earliest time when we have to do rebalance again */
|
/* Earliest time when we have to do rebalance again */
|
||||||
unsigned long next_balance = jiffies + 60*HZ;
|
unsigned long next_balance = jiffies + 60*HZ;
|
||||||
int update_next_balance = 0;
|
int update_next_balance = 0;
|
||||||
|
@ -4929,7 +4928,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for_each_domain(cpu, sd) {
|
for_each_domain(cpu, sd) {
|
||||||
last = sd;
|
|
||||||
if (!(sd->flags & SD_LOAD_BALANCE))
|
if (!(sd->flags & SD_LOAD_BALANCE))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -4974,9 +4972,6 @@ out:
|
||||||
if (!balance)
|
if (!balance)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
for (sd = last; sd; sd = sd->child)
|
|
||||||
(void)cmpxchg(&sd->groups->balance_cpu, cpu, -1);
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue