sched/fair: Fix and optimize the fork() path
The task_fork_fair() callback already calls __set_task_cpu() and takes rq->lock. If we move the sched_class::task_fork callback in sched_fork() under the existing p->pi_lock, right after its set_task_cpu() call, we can avoid doing two such calls and omit the IRQ disabling on the rq->lock. Change to __set_task_cpu() to skip the migration bits, this is a new task, not a migration. Similarly, make wake_up_new_task() use __set_task_cpu() for the same reason, the task hasn't actually migrated as it hasn't ever ran. This cures the problem of calling migrate_task_rq_fair(), which does remove_entity_from_load_avg() on tasks that have never been added to the load avg to begin with. This bug would result in transiently messed up load_avg values, averaged out after a few dozen milliseconds. This is probably the reason why this bug was not found for such a long time. Reported-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
630741fb60
commit
e210bffd39
|
@ -2383,9 +2383,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
p->sched_class = &fair_sched_class;
|
||||
}
|
||||
|
||||
if (p->sched_class->task_fork)
|
||||
p->sched_class->task_fork(p);
|
||||
|
||||
/*
|
||||
* The child is not yet in the pid-hash so no cgroup attach races,
|
||||
* and the cgroup is pinned to this child due to cgroup_fork()
|
||||
|
@ -2394,7 +2391,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
* Silence PROVE_RCU.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
set_task_cpu(p, cpu);
|
||||
/*
|
||||
* We're setting the cpu for the first time, we don't migrate,
|
||||
* so use __set_task_cpu().
|
||||
*/
|
||||
__set_task_cpu(p, cpu);
|
||||
if (p->sched_class->task_fork)
|
||||
p->sched_class->task_fork(p);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
#ifdef CONFIG_SCHED_INFO
|
||||
|
@ -2534,8 +2537,11 @@ void wake_up_new_task(struct task_struct *p)
|
|||
* Fork balancing, do it here and not earlier because:
|
||||
* - cpus_allowed can change in the fork path
|
||||
* - any previously selected cpu might disappear through hotplug
|
||||
*
|
||||
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
||||
* as we're not fully set-up yet.
|
||||
*/
|
||||
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
#endif
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
post_init_entity_util_avg(&p->se);
|
||||
|
|
|
@ -4448,7 +4448,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
*
|
||||
* note: in the case of encountering a throttled cfs_rq we will
|
||||
* post the final h_nr_running increment below.
|
||||
*/
|
||||
*/
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
cfs_rq->h_nr_running++;
|
||||
|
@ -8289,31 +8289,17 @@ static void task_fork_fair(struct task_struct *p)
|
|||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se, *curr;
|
||||
int this_cpu = smp_processor_id();
|
||||
struct rq *rq = this_rq();
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
|
||||
cfs_rq = task_cfs_rq(current);
|
||||
curr = cfs_rq->curr;
|
||||
|
||||
/*
|
||||
* Not only the cpu but also the task_group of the parent might have
|
||||
* been changed after parent->se.parent,cfs_rq were copied to
|
||||
* child->se.parent,cfs_rq. So call __set_task_cpu() to make those
|
||||
* of child point to valid ones.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
__set_task_cpu(p, this_cpu);
|
||||
rcu_read_unlock();
|
||||
|
||||
update_curr(cfs_rq);
|
||||
|
||||
if (curr)
|
||||
if (curr) {
|
||||
update_curr(cfs_rq);
|
||||
se->vruntime = curr->vruntime;
|
||||
}
|
||||
place_entity(cfs_rq, se, 1);
|
||||
|
||||
if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
|
||||
|
@ -8326,8 +8312,7 @@ static void task_fork_fair(struct task_struct *p)
|
|||
}
|
||||
|
||||
se->vruntime -= cfs_rq->min_vruntime;
|
||||
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue