sched/fair: Refactor cpu_util_without()
Except the 'task has no contribution or is new' condition at the
beginning of cpu_util_without(), which it shares with the load and
runnable counterpart functions, a cpu_util_next(..., dst_cpu = -1)
call can replace the rest of it.
The UTIL_EST specific check that task util_est has to be subtracted
from the CPU one in case of an enqueued (or current (to cater for the
wakeup - lb race)) task has to be moved to cpu_util_next().
This was initially introduced by commit c469933e77
("sched/fair: Fix cpu_util_wake() for 'execl' type workloads").
UnixBench's `execl` throughput tests were run on the dual socket 40
CPUs Intel E5-2690 v2 to make sure it doesn't regress again.
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20220318163656.954440-1-dietmar.eggemann@arm.com
This commit is contained in:
parent
a658353167
commit
4e3c7d338a
|
@ -6543,6 +6543,68 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|||
return target;
|
||||
}
|
||||
|
||||
/*
|
||||
* Predicts what cpu_util(@cpu) would return if @p was removed from @cpu
|
||||
* (@dst_cpu = -1) or migrated to @dst_cpu.
|
||||
*/
|
||||
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
/*
|
||||
* If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
|
||||
* contribution. If @p migrates from another CPU to @cpu add its
|
||||
* contribution. In all the other cases @cpu is not impacted by the
|
||||
* migration so its util_avg is already correct.
|
||||
*/
|
||||
if (task_cpu(p) == cpu && dst_cpu != cpu)
|
||||
lsub_positive(&util, task_util(p));
|
||||
else if (task_cpu(p) != cpu && dst_cpu == cpu)
|
||||
util += task_util(p);
|
||||
|
||||
if (sched_feat(UTIL_EST)) {
|
||||
unsigned long util_est;
|
||||
|
||||
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
||||
|
||||
/*
|
||||
* During wake-up @p isn't enqueued yet and doesn't contribute
|
||||
* to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.
|
||||
* If @dst_cpu == @cpu add it to "simulate" cpu_util after @p
|
||||
* has been enqueued.
|
||||
*
|
||||
* During exec (@dst_cpu = -1) @p is enqueued and does
|
||||
* contribute to cpu_rq(cpu)->cfs.util_est.enqueued.
|
||||
* Remove it to "simulate" cpu_util without @p's contribution.
|
||||
*
|
||||
* Despite the task_on_rq_queued(@p) check there is still a
|
||||
* small window for a possible race when an exec
|
||||
* select_task_rq_fair() races with LB's detach_task().
|
||||
*
|
||||
* detach_task()
|
||||
* deactivate_task()
|
||||
* p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
* -------------------------------- A
|
||||
* dequeue_task() \
|
||||
* dequeue_task_fair() + Race Time
|
||||
* util_est_dequeue() /
|
||||
* -------------------------------- B
|
||||
*
|
||||
* The additional check "current == p" is required to further
|
||||
* reduce the race window.
|
||||
*/
|
||||
if (dst_cpu == cpu)
|
||||
util_est += _task_util_est(p);
|
||||
else if (unlikely(task_on_rq_queued(p) || current == p))
|
||||
lsub_positive(&util_est, _task_util_est(p));
|
||||
|
||||
util = max(util, util_est);
|
||||
}
|
||||
|
||||
return min(util, capacity_orig_of(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu_util_without: compute cpu utilization without any contributions from *p
|
||||
* @cpu: the CPU which utilization is requested
|
||||
|
@ -6558,116 +6620,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|||
*/
|
||||
static unsigned long cpu_util_without(int cpu, struct task_struct *p)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
unsigned int util;
|
||||
|
||||
/* Task has no contribution or is new */
|
||||
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
|
||||
return cpu_util_cfs(cpu);
|
||||
|
||||
cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
/* Discount task's util from CPU's util */
|
||||
lsub_positive(&util, task_util(p));
|
||||
|
||||
/*
|
||||
* Covered cases:
|
||||
*
|
||||
* a) if *p is the only task sleeping on this CPU, then:
|
||||
* cpu_util (== task_util) > util_est (== 0)
|
||||
* and thus we return:
|
||||
* cpu_util_without = (cpu_util - task_util) = 0
|
||||
*
|
||||
* b) if other tasks are SLEEPING on this CPU, which is now exiting
|
||||
* IDLE, then:
|
||||
* cpu_util >= task_util
|
||||
* cpu_util > util_est (== 0)
|
||||
* and thus we discount *p's blocked utilization to return:
|
||||
* cpu_util_without = (cpu_util - task_util) >= 0
|
||||
*
|
||||
* c) if other tasks are RUNNABLE on that CPU and
|
||||
* util_est > cpu_util
|
||||
* then we use util_est since it returns a more restrictive
|
||||
* estimation of the spare capacity on that CPU, by just
|
||||
* considering the expected utilization of tasks already
|
||||
* runnable on that CPU.
|
||||
*
|
||||
* Cases a) and b) are covered by the above code, while case c) is
|
||||
* covered by the following code when estimated utilization is
|
||||
* enabled.
|
||||
*/
|
||||
if (sched_feat(UTIL_EST)) {
|
||||
unsigned int estimated =
|
||||
READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
||||
|
||||
/*
|
||||
* Despite the following checks we still have a small window
|
||||
* for a possible race, when an execl's select_task_rq_fair()
|
||||
* races with LB's detach_task():
|
||||
*
|
||||
* detach_task()
|
||||
* p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
* ---------------------------------- A
|
||||
* deactivate_task() \
|
||||
* dequeue_task() + RaceTime
|
||||
* util_est_dequeue() /
|
||||
* ---------------------------------- B
|
||||
*
|
||||
* The additional check on "current == p" it's required to
|
||||
* properly fix the execl regression and it helps in further
|
||||
* reducing the chances for the above race.
|
||||
*/
|
||||
if (unlikely(task_on_rq_queued(p) || current == p))
|
||||
lsub_positive(&estimated, _task_util_est(p));
|
||||
|
||||
util = max(util, estimated);
|
||||
}
|
||||
|
||||
/*
|
||||
* Utilization (estimated) can exceed the CPU capacity, thus let's
|
||||
* clamp to the maximum CPU capacity to ensure consistency with
|
||||
* cpu_util.
|
||||
*/
|
||||
return min_t(unsigned long, util, capacity_orig_of(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
|
||||
* to @dst_cpu.
|
||||
*/
|
||||
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
/*
|
||||
* If @p migrates from @cpu to another, remove its contribution. Or,
|
||||
* if @p migrates from another CPU to @cpu, add its contribution. In
|
||||
* the other cases, @cpu is not impacted by the migration, so the
|
||||
* util_avg should already be correct.
|
||||
*/
|
||||
if (task_cpu(p) == cpu && dst_cpu != cpu)
|
||||
lsub_positive(&util, task_util(p));
|
||||
else if (task_cpu(p) != cpu && dst_cpu == cpu)
|
||||
util += task_util(p);
|
||||
|
||||
if (sched_feat(UTIL_EST)) {
|
||||
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
||||
|
||||
/*
|
||||
* During wake-up, the task isn't enqueued yet and doesn't
|
||||
* appear in the cfs_rq->avg.util_est.enqueued of any rq,
|
||||
* so just add it (if needed) to "simulate" what will be
|
||||
* cpu_util after the task has been enqueued.
|
||||
*/
|
||||
if (dst_cpu == cpu)
|
||||
util_est += _task_util_est(p);
|
||||
|
||||
util = max(util, util_est);
|
||||
}
|
||||
|
||||
return min(util, capacity_orig_of(cpu));
|
||||
return cpu_util_next(cpu, p, -1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue