sched, cgroup: Restore meaning to hierarchical_quota
In cgroupv2 cfs_b->hierarchical_quota is set to -1 for all task
groups due to the previous fix simply taking the min. It should
reflect a limit imposed at that level or by an ancestor. Even
though cgroupv2 does not require child quota to be less than or
equal to that of its ancestors the task group will still be
constrained by such a quota so this should be shown here. Cgroupv1
continues to set this correctly.
In both cases, add initialization when a new task group is created
based on the current parent's value (or RUNTIME_INF in the case of
root_task_group). Otherwise, the field is wrong until a quota is
changed after creation and __cfs_schedulable() is called.
Fixes: c53593e5cb
("sched, cgroup: Don't reject lower cpu.max on ancestors")
Signed-off-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230714125746.812891-1-pauld@redhat.com
This commit is contained in:
parent
113d0a6b39
commit
c98c18270b
|
@ -9953,7 +9953,7 @@ void __init sched_init(void)
|
||||||
ptr += nr_cpu_ids * sizeof(void **);
|
ptr += nr_cpu_ids * sizeof(void **);
|
||||||
|
|
||||||
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
|
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
|
||||||
init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
|
init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
|
||||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
root_task_group.rt_se = (struct sched_rt_entity **)ptr;
|
root_task_group.rt_se = (struct sched_rt_entity **)ptr;
|
||||||
|
@ -11087,11 +11087,16 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure max(child_quota) <= parent_quota. On cgroup2,
|
* Ensure max(child_quota) <= parent_quota. On cgroup2,
|
||||||
* always take the min. On cgroup1, only inherit when no
|
* always take the non-RUNTIME_INF min. On cgroup1, only
|
||||||
* limit is set:
|
* inherit when no limit is set. In both cases this is used
|
||||||
|
* by the scheduler to determine if a given CFS task has a
|
||||||
|
* bandwidth constraint at some higher level.
|
||||||
*/
|
*/
|
||||||
if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
|
if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
|
||||||
quota = min(quota, parent_quota);
|
if (quota == RUNTIME_INF)
|
||||||
|
quota = parent_quota;
|
||||||
|
else if (parent_quota != RUNTIME_INF)
|
||||||
|
quota = min(quota, parent_quota);
|
||||||
} else {
|
} else {
|
||||||
if (quota == RUNTIME_INF)
|
if (quota == RUNTIME_INF)
|
||||||
quota = parent_quota;
|
quota = parent_quota;
|
||||||
|
|
|
@ -6045,13 +6045,14 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
|
||||||
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
|
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
|
||||||
{
|
{
|
||||||
raw_spin_lock_init(&cfs_b->lock);
|
raw_spin_lock_init(&cfs_b->lock);
|
||||||
cfs_b->runtime = 0;
|
cfs_b->runtime = 0;
|
||||||
cfs_b->quota = RUNTIME_INF;
|
cfs_b->quota = RUNTIME_INF;
|
||||||
cfs_b->period = ns_to_ktime(default_cfs_period());
|
cfs_b->period = ns_to_ktime(default_cfs_period());
|
||||||
cfs_b->burst = 0;
|
cfs_b->burst = 0;
|
||||||
|
cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
|
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
|
||||||
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
|
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
|
||||||
|
@ -6217,7 +6218,7 @@ static inline int throttled_lb_pair(struct task_group *tg,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
|
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent) {}
|
||||||
|
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
||||||
|
@ -12599,7 +12600,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||||
|
|
||||||
tg->shares = NICE_0_LOAD;
|
tg->shares = NICE_0_LOAD;
|
||||||
|
|
||||||
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
||||||
|
|
|
@ -454,7 +454,7 @@ extern void unregister_fair_sched_group(struct task_group *tg);
|
||||||
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||||
struct sched_entity *se, int cpu,
|
struct sched_entity *se, int cpu,
|
||||||
struct sched_entity *parent);
|
struct sched_entity *parent);
|
||||||
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent);
|
||||||
|
|
||||||
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
|
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
|
||||||
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
||||||
|
|
Loading…
Reference in New Issue