Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - introduce and use task_rcu_dereference()/try_get_task_struct() to fix and generalize task_struct handling (Oleg Nesterov) - do various per entity load tracking (PELT) fixes and optimizations (Peter Zijlstra) - cputime virt-steal time accounting enhancements/fixes (Wanpeng Li) - introduce consolidated cputime output file cpuacct.usage_all and related refactorings (Zhao Lei) - ... plus misc fixes and enhancements * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/core: Panic on scheduling while atomic bugs if kernel.panic_on_warn is set sched/cpuacct: Introduce cpuacct.usage_all to show all CPU stats together sched/cpuacct: Use loop to consolidate code in cpuacct_stats_show() sched/cpuacct: Merge cpuacct_usage_index and cpuacct_stat_index enums sched/fair: Rework throttle_count sync sched/core: Fix sched_getaffinity() return value kerneldoc comment sched/fair: Reorder cgroup creation code sched/fair: Apply more PELT fixes sched/fair: Fix PELT integrity for new tasks sched/cgroup: Fix cpu_cgroup_fork() handling sched/fair: Fix PELT integrity for new groups sched/fair: Fix and optimize the fork() path sched/cputime: Add steal time support to full dynticks CPU time accounting sched/cputime: Fix prev steal time accouting during CPU hotplug KVM: Fix steal clock warp during guest CPU hotplug sched/debug: Always show 'nr_migrations' sched/fair: Use task_rcu_dereference() sched/api: Introduce task_rcu_dereference() and try_get_task_struct() sched/idle: Optimize the generic idle loop sched/fair: Fix the wrong throttled clock time for cfs_rq_clock_task()
This commit is contained in:
commit
cca08cd66c
|
@ -301,8 +301,6 @@ static void kvm_register_steal_time(void)
|
|||
if (!has_steal_clock)
|
||||
return;
|
||||
|
||||
memset(st, 0, sizeof(*st));
|
||||
|
||||
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
|
||||
pr_info("kvm-stealtime: cpu %d, msr %llx\n",
|
||||
cpu, (unsigned long long) slow_virt_to_phys(st));
|
||||
|
|
|
@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p);
|
|||
#define TASK_WAKING 256
|
||||
#define TASK_PARKED 512
|
||||
#define TASK_NOLOAD 1024
|
||||
#define TASK_STATE_MAX 2048
|
||||
#define TASK_NEW 2048
|
||||
#define TASK_STATE_MAX 4096
|
||||
|
||||
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
|
||||
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
|
||||
|
||||
extern char ___assert_task_state[1 - 2*!!(
|
||||
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
|
||||
|
@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t)
|
|||
__put_task_struct(t);
|
||||
}
|
||||
|
||||
struct task_struct *task_rcu_dereference(struct task_struct **ptask);
|
||||
struct task_struct *try_get_task_struct(struct task_struct **ptask);
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
extern void task_cputime(struct task_struct *t,
|
||||
cputime_t *utime, cputime_t *stime);
|
||||
|
|
|
@ -210,6 +210,82 @@ repeat:
|
|||
goto repeat;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that if this function returns a valid task_struct pointer (!NULL)
|
||||
* task->usage must remain >0 for the duration of the RCU critical section.
|
||||
*/
|
||||
struct task_struct *task_rcu_dereference(struct task_struct **ptask)
|
||||
{
|
||||
struct sighand_struct *sighand;
|
||||
struct task_struct *task;
|
||||
|
||||
/*
|
||||
* We need to verify that release_task() was not called and thus
|
||||
* delayed_put_task_struct() can't run and drop the last reference
|
||||
* before rcu_read_unlock(). We check task->sighand != NULL,
|
||||
* but we can read the already freed and reused memory.
|
||||
*/
|
||||
retry:
|
||||
task = rcu_dereference(*ptask);
|
||||
if (!task)
|
||||
return NULL;
|
||||
|
||||
probe_kernel_address(&task->sighand, sighand);
|
||||
|
||||
/*
|
||||
* Pairs with atomic_dec_and_test() in put_task_struct(). If this task
|
||||
* was already freed we can not miss the preceding update of this
|
||||
* pointer.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (unlikely(task != READ_ONCE(*ptask)))
|
||||
goto retry;
|
||||
|
||||
/*
|
||||
* We've re-checked that "task == *ptask", now we have two different
|
||||
* cases:
|
||||
*
|
||||
* 1. This is actually the same task/task_struct. In this case
|
||||
* sighand != NULL tells us it is still alive.
|
||||
*
|
||||
* 2. This is another task which got the same memory for task_struct.
|
||||
* We can't know this of course, and we can not trust
|
||||
* sighand != NULL.
|
||||
*
|
||||
* In this case we actually return a random value, but this is
|
||||
* correct.
|
||||
*
|
||||
* If we return NULL - we can pretend that we actually noticed that
|
||||
* *ptask was updated when the previous task has exited. Or pretend
|
||||
* that probe_slab_address(&sighand) reads NULL.
|
||||
*
|
||||
* If we return the new task (because sighand is not NULL for any
|
||||
* reason) - this is fine too. This (new) task can't go away before
|
||||
* another gp pass.
|
||||
*
|
||||
* And note: We could even eliminate the false positive if re-read
|
||||
* task->sighand once again to avoid the falsely NULL. But this case
|
||||
* is very unlikely so we don't care.
|
||||
*/
|
||||
if (!sighand)
|
||||
return NULL;
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
struct task_struct *try_get_task_struct(struct task_struct **ptask)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
rcu_read_lock();
|
||||
task = task_rcu_dereference(ptask);
|
||||
if (task)
|
||||
get_task_struct(task);
|
||||
rcu_read_unlock();
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if a process group is "orphaned", according to the POSIX
|
||||
* definition in 2.2.2.52. Orphaned process groups are not to be affected
|
||||
|
|
|
@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
|
||||
__sched_fork(clone_flags, p);
|
||||
/*
|
||||
* We mark the process as running here. This guarantees that
|
||||
* We mark the process as NEW here. This guarantees that
|
||||
* nobody will actually run it, and a signal or other external
|
||||
* event cannot wake it up and insert it on the runqueue either.
|
||||
*/
|
||||
p->state = TASK_RUNNING;
|
||||
p->state = TASK_NEW;
|
||||
|
||||
/*
|
||||
* Make sure we do not leak PI boosting priority to the child.
|
||||
|
@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
p->sched_class = &fair_sched_class;
|
||||
}
|
||||
|
||||
if (p->sched_class->task_fork)
|
||||
p->sched_class->task_fork(p);
|
||||
init_entity_runnable_average(&p->se);
|
||||
|
||||
/*
|
||||
* The child is not yet in the pid-hash so no cgroup attach races,
|
||||
|
@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||
* Silence PROVE_RCU.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
set_task_cpu(p, cpu);
|
||||
/*
|
||||
* We're setting the cpu for the first time, we don't migrate,
|
||||
* so use __set_task_cpu().
|
||||
*/
|
||||
__set_task_cpu(p, cpu);
|
||||
if (p->sched_class->task_fork)
|
||||
p->sched_class->task_fork(p);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
#ifdef CONFIG_SCHED_INFO
|
||||
|
@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p)
|
|||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
/* Initialize new task's runnable average */
|
||||
init_entity_runnable_average(&p->se);
|
||||
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
|
||||
p->state = TASK_RUNNING;
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Fork balancing, do it here and not earlier because:
|
||||
* - cpus_allowed can change in the fork path
|
||||
* - any previously selected cpu might disappear through hotplug
|
||||
*
|
||||
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
||||
* as we're not fully set-up yet.
|
||||
*/
|
||||
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
#endif
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
post_init_entity_util_avg(&p->se);
|
||||
|
@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
|
|||
pr_cont("\n");
|
||||
}
|
||||
#endif
|
||||
if (panic_on_warn)
|
||||
panic("scheduling while atomic\n");
|
||||
|
||||
dump_stack();
|
||||
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
|
||||
}
|
||||
|
@ -4752,7 +4762,8 @@ out_unlock:
|
|||
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
|
||||
* @user_mask_ptr: user-space pointer to hold the current cpu mask
|
||||
*
|
||||
* Return: 0 on success. An error code otherwise.
|
||||
* Return: size of CPU mask copied to user_mask_ptr on success. An
|
||||
* error code otherwise.
|
||||
*/
|
||||
SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
|
||||
unsigned long __user *, user_mask_ptr)
|
||||
|
@ -7233,7 +7244,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
|
|||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
rq->calc_load_update = calc_load_update;
|
||||
account_reset_rq(rq);
|
||||
update_max_interval();
|
||||
}
|
||||
|
||||
|
@ -7713,6 +7723,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
|
|||
INIT_LIST_HEAD(&tg->children);
|
||||
list_add_rcu(&tg->siblings, &parent->children);
|
||||
spin_unlock_irqrestore(&task_group_lock, flags);
|
||||
|
||||
online_fair_sched_group(tg);
|
||||
}
|
||||
|
||||
/* rcu callback to free various structures associated with a task group */
|
||||
|
@ -7741,14 +7753,37 @@ void sched_offline_group(struct task_group *tg)
|
|||
spin_unlock_irqrestore(&task_group_lock, flags);
|
||||
}
|
||||
|
||||
/* change task's runqueue when it moves between groups.
|
||||
* The caller of this function should have put the task in its new group
|
||||
* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
|
||||
* reflect its new group.
|
||||
static void sched_change_group(struct task_struct *tsk, int type)
|
||||
{
|
||||
struct task_group *tg;
|
||||
|
||||
/*
|
||||
* All callers are synchronized by task_rq_lock(); we do not use RCU
|
||||
* which is pointless here. Thus, we pass "true" to task_css_check()
|
||||
* to prevent lockdep warnings.
|
||||
*/
|
||||
tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
|
||||
struct task_group, css);
|
||||
tg = autogroup_task_group(tsk, tg);
|
||||
tsk->sched_task_group = tg;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
if (tsk->sched_class->task_change_group)
|
||||
tsk->sched_class->task_change_group(tsk, type);
|
||||
else
|
||||
#endif
|
||||
set_task_rq(tsk, task_cpu(tsk));
|
||||
}
|
||||
|
||||
/*
|
||||
* Change task's runqueue when it moves between groups.
|
||||
*
|
||||
* The caller of this function should have put the task in its new group by
|
||||
* now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
|
||||
* its new group.
|
||||
*/
|
||||
void sched_move_task(struct task_struct *tsk)
|
||||
{
|
||||
struct task_group *tg;
|
||||
int queued, running;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
@ -7763,22 +7798,7 @@ void sched_move_task(struct task_struct *tsk)
|
|||
if (unlikely(running))
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
/*
|
||||
* All callers are synchronized by task_rq_lock(); we do not use RCU
|
||||
* which is pointless here. Thus, we pass "true" to task_css_check()
|
||||
* to prevent lockdep warnings.
|
||||
*/
|
||||
tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
|
||||
struct task_group, css);
|
||||
tg = autogroup_task_group(tsk, tg);
|
||||
tsk->sched_task_group = tg;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
if (tsk->sched_class->task_move_group)
|
||||
tsk->sched_class->task_move_group(tsk);
|
||||
else
|
||||
#endif
|
||||
set_task_rq(tsk, task_cpu(tsk));
|
||||
sched_change_group(tsk, TASK_MOVE_GROUP);
|
||||
|
||||
if (unlikely(running))
|
||||
tsk->sched_class->set_curr_task(rq);
|
||||
|
@ -8206,15 +8226,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
|
|||
sched_free_group(tg);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called before wake_up_new_task(), therefore we really only
|
||||
* have to set its group bits, all the other stuff does not apply.
|
||||
*/
|
||||
static void cpu_cgroup_fork(struct task_struct *task)
|
||||
{
|
||||
sched_move_task(task);
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
rq = task_rq_lock(task, &rf);
|
||||
|
||||
sched_change_group(task, TASK_SET_GROUP);
|
||||
|
||||
task_rq_unlock(rq, task, &rf);
|
||||
}
|
||||
|
||||
static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *css;
|
||||
int ret = 0;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
@ -8225,8 +8257,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
|
|||
if (task->sched_class != &fair_sched_class)
|
||||
return -EINVAL;
|
||||
#endif
|
||||
/*
|
||||
* Serialize against wake_up_new_task() such that if its
|
||||
* running, we're sure to observe its full state.
|
||||
*/
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
/*
|
||||
* Avoid calling sched_move_task() before wake_up_new_task()
|
||||
* has happened. This would lead to problems with PELT, due to
|
||||
* move wanting to detach+attach while we're not attached yet.
|
||||
*/
|
||||
if (task->state == TASK_NEW)
|
||||
ret = -EINVAL;
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cpu_cgroup_attach(struct cgroup_taskset *tset)
|
||||
|
|
|
@ -25,15 +25,13 @@ enum cpuacct_stat_index {
|
|||
CPUACCT_STAT_NSTATS,
|
||||
};
|
||||
|
||||
enum cpuacct_usage_index {
|
||||
CPUACCT_USAGE_USER, /* ... user mode */
|
||||
CPUACCT_USAGE_SYSTEM, /* ... kernel mode */
|
||||
|
||||
CPUACCT_USAGE_NRUSAGE,
|
||||
static const char * const cpuacct_stat_desc[] = {
|
||||
[CPUACCT_STAT_USER] = "user",
|
||||
[CPUACCT_STAT_SYSTEM] = "system",
|
||||
};
|
||||
|
||||
struct cpuacct_usage {
|
||||
u64 usages[CPUACCT_USAGE_NRUSAGE];
|
||||
u64 usages[CPUACCT_STAT_NSTATS];
|
||||
};
|
||||
|
||||
/* track cpu usage of a group of tasks and its child groups */
|
||||
|
@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
|
|||
}
|
||||
|
||||
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
|
||||
enum cpuacct_usage_index index)
|
||||
enum cpuacct_stat_index index)
|
||||
{
|
||||
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
u64 data;
|
||||
|
||||
/*
|
||||
* We allow index == CPUACCT_USAGE_NRUSAGE here to read
|
||||
* We allow index == CPUACCT_STAT_NSTATS here to read
|
||||
* the sum of suages.
|
||||
*/
|
||||
BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
|
||||
BUG_ON(index > CPUACCT_STAT_NSTATS);
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
/*
|
||||
|
@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
|
|||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
#endif
|
||||
|
||||
if (index == CPUACCT_USAGE_NRUSAGE) {
|
||||
if (index == CPUACCT_STAT_NSTATS) {
|
||||
int i = 0;
|
||||
|
||||
data = 0;
|
||||
for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
|
||||
for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
||||
data += cpuusage->usages[i];
|
||||
} else {
|
||||
data = cpuusage->usages[index];
|
||||
|
@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
|||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
|
||||
for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
||||
cpuusage->usages[i] = val;
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
|
@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
|||
|
||||
/* return total cpu usage (in nanoseconds) of a group */
|
||||
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
|
||||
enum cpuacct_usage_index index)
|
||||
enum cpuacct_stat_index index)
|
||||
{
|
||||
struct cpuacct *ca = css_ca(css);
|
||||
u64 totalcpuusage = 0;
|
||||
|
@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css,
|
|||
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
return __cpuusage_read(css, CPUACCT_USAGE_USER);
|
||||
return __cpuusage_read(css, CPUACCT_STAT_USER);
|
||||
}
|
||||
|
||||
static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
|
||||
return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
|
||||
}
|
||||
|
||||
static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
||||
{
|
||||
return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
|
||||
return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
|
||||
}
|
||||
|
||||
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
|
@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
|||
}
|
||||
|
||||
static int __cpuacct_percpu_seq_show(struct seq_file *m,
|
||||
enum cpuacct_usage_index index)
|
||||
enum cpuacct_stat_index index)
|
||||
{
|
||||
struct cpuacct *ca = css_ca(seq_css(m));
|
||||
u64 percpu;
|
||||
|
@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m,
|
|||
|
||||
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
|
||||
{
|
||||
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
|
||||
return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
|
||||
}
|
||||
|
||||
static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
|
||||
{
|
||||
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
|
||||
return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
|
||||
}
|
||||
|
||||
static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
|
||||
{
|
||||
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
|
||||
return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
|
||||
}
|
||||
|
||||
static const char * const cpuacct_stat_desc[] = {
|
||||
[CPUACCT_STAT_USER] = "user",
|
||||
[CPUACCT_STAT_SYSTEM] = "system",
|
||||
};
|
||||
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
|
||||
{
|
||||
struct cpuacct *ca = css_ca(seq_css(m));
|
||||
int index;
|
||||
int cpu;
|
||||
|
||||
seq_puts(m, "cpu");
|
||||
for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
|
||||
seq_printf(m, " %s", cpuacct_stat_desc[index]);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
|
||||
seq_printf(m, "%d", cpu);
|
||||
|
||||
for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
|
||||
#ifndef CONFIG_64BIT
|
||||
/*
|
||||
* Take rq->lock to make 64-bit read safe on 32-bit
|
||||
* platforms.
|
||||
*/
|
||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
#endif
|
||||
|
||||
seq_printf(m, " %llu", cpuusage->usages[index]);
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||
#endif
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpuacct_stats_show(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct cpuacct *ca = css_ca(seq_css(sf));
|
||||
s64 val[CPUACCT_STAT_NSTATS];
|
||||
int cpu;
|
||||
s64 val = 0;
|
||||
int stat;
|
||||
|
||||
memset(val, 0, sizeof(val));
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||
val += kcpustat->cpustat[CPUTIME_USER];
|
||||
val += kcpustat->cpustat[CPUTIME_NICE];
|
||||
}
|
||||
val = cputime64_to_clock_t(val);
|
||||
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
||||
u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
|
||||
|
||||
val = 0;
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||
val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
||||
val += kcpustat->cpustat[CPUTIME_IRQ];
|
||||
val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
|
||||
val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER];
|
||||
val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE];
|
||||
val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
|
||||
val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
|
||||
val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
|
||||
}
|
||||
|
||||
val = cputime64_to_clock_t(val);
|
||||
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
|
||||
for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
|
||||
seq_printf(sf, "%s %lld\n",
|
||||
cpuacct_stat_desc[stat],
|
||||
cputime64_to_clock_t(val[stat]));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -301,6 +329,10 @@ static struct cftype files[] = {
|
|||
.name = "usage_percpu_sys",
|
||||
.seq_show = cpuacct_percpu_sys_seq_show,
|
||||
},
|
||||
{
|
||||
.name = "usage_all",
|
||||
.seq_show = cpuacct_all_seq_show,
|
||||
},
|
||||
{
|
||||
.name = "stat",
|
||||
.seq_show = cpuacct_stats_show,
|
||||
|
@ -316,11 +348,11 @@ static struct cftype files[] = {
|
|||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
int index = CPUACCT_USAGE_SYSTEM;
|
||||
int index = CPUACCT_STAT_SYSTEM;
|
||||
struct pt_regs *regs = task_pt_regs(tsk);
|
||||
|
||||
if (regs && user_mode(regs))
|
||||
index = CPUACCT_USAGE_USER;
|
||||
index = CPUACCT_STAT_USER;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
|
|
|
@ -257,7 +257,7 @@ void account_idle_time(cputime_t cputime)
|
|||
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
|
||||
}
|
||||
|
||||
static __always_inline bool steal_account_process_tick(void)
|
||||
static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
if (static_key_false(¶virt_steal_enabled)) {
|
||||
|
@ -272,14 +272,14 @@ static __always_inline bool steal_account_process_tick(void)
|
|||
* time in jiffies. Lets cast the result to jiffies
|
||||
* granularity and account the rest on the next rounds.
|
||||
*/
|
||||
steal_jiffies = nsecs_to_jiffies(steal);
|
||||
steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
|
||||
this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
|
||||
|
||||
account_steal_time(jiffies_to_cputime(steal_jiffies));
|
||||
return steal_jiffies;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -346,7 +346,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
|||
u64 cputime = (__force u64) cputime_one_jiffy;
|
||||
u64 *cpustat = kcpustat_this_cpu->cpustat;
|
||||
|
||||
if (steal_account_process_tick())
|
||||
if (steal_account_process_tick(ULONG_MAX))
|
||||
return;
|
||||
|
||||
cputime *= ticks;
|
||||
|
@ -477,7 +477,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
|
|||
return;
|
||||
}
|
||||
|
||||
if (steal_account_process_tick())
|
||||
if (steal_account_process_tick(ULONG_MAX))
|
||||
return;
|
||||
|
||||
if (user_tick)
|
||||
|
@ -681,12 +681,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
|
|||
static cputime_t get_vtime_delta(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
unsigned long delta = now - tsk->vtime_snap;
|
||||
unsigned long delta_jiffies, steal_jiffies;
|
||||
|
||||
delta_jiffies = now - tsk->vtime_snap;
|
||||
steal_jiffies = steal_account_process_tick(delta_jiffies);
|
||||
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
|
||||
tsk->vtime_snap = now;
|
||||
|
||||
return jiffies_to_cputime(delta);
|
||||
return jiffies_to_cputime(delta_jiffies - steal_jiffies);
|
||||
}
|
||||
|
||||
static void __vtime_account_system(struct task_struct *tsk)
|
||||
|
|
|
@ -879,9 +879,9 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
|||
|
||||
nr_switches = p->nvcsw + p->nivcsw;
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
P(se.nr_migrations);
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (schedstat_enabled()) {
|
||||
u64 avg_atom, avg_per_cpu;
|
||||
|
||||
|
|
|
@ -690,6 +690,11 @@ void init_entity_runnable_average(struct sched_entity *se)
|
|||
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
||||
}
|
||||
|
||||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
|
||||
static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
|
||||
static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force);
|
||||
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
|
||||
|
||||
/*
|
||||
* With new tasks being created, their initial util_avgs are extrapolated
|
||||
* based on the cfs_rq's current util_avg:
|
||||
|
@ -720,6 +725,8 @@ void post_init_entity_util_avg(struct sched_entity *se)
|
|||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
struct sched_avg *sa = &se->avg;
|
||||
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
|
||||
u64 now = cfs_rq_clock_task(cfs_rq);
|
||||
int tg_update;
|
||||
|
||||
if (cap > 0) {
|
||||
if (cfs_rq->avg.util_avg != 0) {
|
||||
|
@ -733,16 +740,42 @@ void post_init_entity_util_avg(struct sched_entity *se)
|
|||
}
|
||||
sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
|
||||
}
|
||||
|
||||
if (entity_is_task(se)) {
|
||||
struct task_struct *p = task_of(se);
|
||||
if (p->sched_class != &fair_sched_class) {
|
||||
/*
|
||||
* For !fair tasks do:
|
||||
*
|
||||
update_cfs_rq_load_avg(now, cfs_rq, false);
|
||||
attach_entity_load_avg(cfs_rq, se);
|
||||
switched_from_fair(rq, p);
|
||||
*
|
||||
* such that the next switched_to_fair() has the
|
||||
* expected state.
|
||||
*/
|
||||
se->avg.last_update_time = now;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
|
||||
attach_entity_load_avg(cfs_rq, se);
|
||||
if (tg_update)
|
||||
update_tg_load_avg(cfs_rq, false);
|
||||
}
|
||||
|
||||
#else
|
||||
#else /* !CONFIG_SMP */
|
||||
void init_entity_runnable_average(struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
void post_init_entity_util_avg(struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Update the current task's runtime statistics.
|
||||
|
@ -1303,6 +1336,8 @@ static void task_numa_assign(struct task_numa_env *env,
|
|||
{
|
||||
if (env->best_task)
|
||||
put_task_struct(env->best_task);
|
||||
if (p)
|
||||
get_task_struct(p);
|
||||
|
||||
env->best_task = p;
|
||||
env->best_imp = imp;
|
||||
|
@ -1370,31 +1405,11 @@ static void task_numa_compare(struct task_numa_env *env,
|
|||
long imp = env->p->numa_group ? groupimp : taskimp;
|
||||
long moveimp = imp;
|
||||
int dist = env->dist;
|
||||
bool assigned = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
raw_spin_lock_irq(&dst_rq->lock);
|
||||
cur = dst_rq->curr;
|
||||
/*
|
||||
* No need to move the exiting task or idle task.
|
||||
*/
|
||||
if ((cur->flags & PF_EXITING) || is_idle_task(cur))
|
||||
cur = task_rcu_dereference(&dst_rq->curr);
|
||||
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
|
||||
cur = NULL;
|
||||
else {
|
||||
/*
|
||||
* The task_struct must be protected here to protect the
|
||||
* p->numa_faults access in the task_weight since the
|
||||
* numa_faults could already be freed in the following path:
|
||||
* finish_task_switch()
|
||||
* --> put_task_struct()
|
||||
* --> __put_task_struct()
|
||||
* --> task_numa_free()
|
||||
*/
|
||||
get_task_struct(cur);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irq(&dst_rq->lock);
|
||||
|
||||
/*
|
||||
* Because we have preemption enabled we can get migrated around and
|
||||
|
@ -1477,7 +1492,6 @@ balance:
|
|||
*/
|
||||
if (!load_too_imbalanced(src_load, dst_load, env)) {
|
||||
imp = moveimp - 1;
|
||||
put_task_struct(cur);
|
||||
cur = NULL;
|
||||
goto assign;
|
||||
}
|
||||
|
@ -1503,16 +1517,9 @@ balance:
|
|||
env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
|
||||
|
||||
assign:
|
||||
assigned = true;
|
||||
task_numa_assign(env, cur, imp);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* The dst_rq->curr isn't assigned. The protection for task_struct is
|
||||
* finished.
|
||||
*/
|
||||
if (cur && !assigned)
|
||||
put_task_struct(cur);
|
||||
}
|
||||
|
||||
static void task_numa_find_cpu(struct task_numa_env *env,
|
||||
|
@ -2866,8 +2873,6 @@ void set_task_rq_fair(struct sched_entity *se,
|
|||
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
|
||||
|
||||
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
|
@ -2914,7 +2919,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
|
|||
WRITE_ONCE(*ptr, res); \
|
||||
} while (0)
|
||||
|
||||
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
|
||||
/**
|
||||
* update_cfs_rq_load_avg - update the cfs_rq's load/util averages
|
||||
* @now: current time, as per cfs_rq_clock_task()
|
||||
* @cfs_rq: cfs_rq to update
|
||||
* @update_freq: should we call cfs_rq_util_change() or will the call do so
|
||||
*
|
||||
* The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
|
||||
* avg. The immediate corollary is that all (fair) tasks must be attached, see
|
||||
* post_init_entity_util_avg().
|
||||
*
|
||||
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
|
||||
*
|
||||
* Returns true if the load decayed or we removed utilization. It is expected
|
||||
* that one calls update_tg_load_avg() on this condition, but after you've
|
||||
* modified the cfs_rq avg (attach/detach), such that we propagate the new
|
||||
* avg up.
|
||||
*/
|
||||
static inline int
|
||||
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
||||
{
|
||||
|
@ -2969,6 +2990,14 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
|||
update_tg_load_avg(cfs_rq, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
|
||||
* @cfs_rq: cfs_rq to attach to
|
||||
* @se: sched_entity to attach
|
||||
*
|
||||
* Must call update_cfs_rq_load_avg() before this, since we rely on
|
||||
* cfs_rq->avg.last_update_time being current.
|
||||
*/
|
||||
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
if (!sched_feat(ATTACH_AGE_LOAD))
|
||||
|
@ -2977,6 +3006,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
|||
/*
|
||||
* If we got migrated (either between CPUs or between cgroups) we'll
|
||||
* have aged the average right before clearing @last_update_time.
|
||||
*
|
||||
* Or we're fresh through post_init_entity_util_avg().
|
||||
*/
|
||||
if (se->avg.last_update_time) {
|
||||
__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
|
||||
|
@ -2998,6 +3029,14 @@ skip_aging:
|
|||
cfs_rq_util_change(cfs_rq);
|
||||
}
|
||||
|
||||
/**
|
||||
* detach_entity_load_avg - detach this entity from its cfs_rq load avg
|
||||
* @cfs_rq: cfs_rq to detach from
|
||||
* @se: sched_entity to detach
|
||||
*
|
||||
* Must call update_cfs_rq_load_avg() before this, since we rely on
|
||||
* cfs_rq->avg.last_update_time being current.
|
||||
*/
|
||||
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
|
||||
|
@ -3082,11 +3121,14 @@ void remove_entity_load_avg(struct sched_entity *se)
|
|||
u64 last_update_time;
|
||||
|
||||
/*
|
||||
* Newly created task or never used group entity should not be removed
|
||||
* from its (source) cfs_rq
|
||||
* tasks cannot exit without having gone through wake_up_new_task() ->
|
||||
* post_init_entity_util_avg() which will have added things to the
|
||||
* cfs_rq, so we can remove unconditionally.
|
||||
*
|
||||
* Similarly for groups, they will have passed through
|
||||
* post_init_entity_util_avg() before unregister_sched_fair_group()
|
||||
* calls this.
|
||||
*/
|
||||
if (se->avg.last_update_time == 0)
|
||||
return;
|
||||
|
||||
last_update_time = cfs_rq_last_update_time(cfs_rq);
|
||||
|
||||
|
@ -3109,6 +3151,12 @@ static int idle_balance(struct rq *this_rq);
|
|||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
static inline int
|
||||
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void update_load_avg(struct sched_entity *se, int not_used)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
@ -3698,7 +3746,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
|
|||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
if (unlikely(cfs_rq->throttle_count))
|
||||
return cfs_rq->throttled_clock_task;
|
||||
return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
|
||||
|
||||
return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
|
||||
}
|
||||
|
@ -3836,13 +3884,11 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
|
|||
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
|
||||
|
||||
cfs_rq->throttle_count--;
|
||||
#ifdef CONFIG_SMP
|
||||
if (!cfs_rq->throttle_count) {
|
||||
/* adjust cfs_rq_clock_task() */
|
||||
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
|
||||
cfs_rq->throttled_clock_task;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4195,26 +4241,6 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
|
|||
if (!cfs_bandwidth_used())
|
||||
return;
|
||||
|
||||
/* Synchronize hierarchical throttle counter: */
|
||||
if (unlikely(!cfs_rq->throttle_uptodate)) {
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct cfs_rq *pcfs_rq;
|
||||
struct task_group *tg;
|
||||
|
||||
cfs_rq->throttle_uptodate = 1;
|
||||
|
||||
/* Get closest up-to-date node, because leaves go first: */
|
||||
for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
|
||||
pcfs_rq = tg->cfs_rq[cpu_of(rq)];
|
||||
if (pcfs_rq->throttle_uptodate)
|
||||
break;
|
||||
}
|
||||
if (tg) {
|
||||
cfs_rq->throttle_count = pcfs_rq->throttle_count;
|
||||
cfs_rq->throttled_clock_task = rq_clock_task(rq);
|
||||
}
|
||||
}
|
||||
|
||||
/* an active group must be handled by the update_curr()->put() path */
|
||||
if (!cfs_rq->runtime_enabled || cfs_rq->curr)
|
||||
return;
|
||||
|
@ -4229,6 +4255,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
|
|||
throttle_cfs_rq(cfs_rq);
|
||||
}
|
||||
|
||||
static void sync_throttle(struct task_group *tg, int cpu)
|
||||
{
|
||||
struct cfs_rq *pcfs_rq, *cfs_rq;
|
||||
|
||||
if (!cfs_bandwidth_used())
|
||||
return;
|
||||
|
||||
if (!tg->parent)
|
||||
return;
|
||||
|
||||
cfs_rq = tg->cfs_rq[cpu];
|
||||
pcfs_rq = tg->parent->cfs_rq[cpu];
|
||||
|
||||
cfs_rq->throttle_count = pcfs_rq->throttle_count;
|
||||
pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
|
||||
}
|
||||
|
||||
/* conditionally throttle active cfs_rq's from put_prev_entity() */
|
||||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
|
@ -4368,6 +4411,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
|
|||
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
|
||||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
|
||||
static inline void sync_throttle(struct task_group *tg, int cpu) {}
|
||||
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
||||
|
||||
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
|
||||
|
@ -4476,7 +4520,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
*
|
||||
* note: in the case of encountering a throttled cfs_rq we will
|
||||
* post the final h_nr_running increment below.
|
||||
*/
|
||||
*/
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
cfs_rq->h_nr_running++;
|
||||
|
@ -8317,31 +8361,17 @@ static void task_fork_fair(struct task_struct *p)
|
|||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se, *curr;
|
||||
int this_cpu = smp_processor_id();
|
||||
struct rq *rq = this_rq();
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
|
||||
cfs_rq = task_cfs_rq(current);
|
||||
curr = cfs_rq->curr;
|
||||
|
||||
/*
|
||||
* Not only the cpu but also the task_group of the parent might have
|
||||
* been changed after parent->se.parent,cfs_rq were copied to
|
||||
* child->se.parent,cfs_rq. So call __set_task_cpu() to make those
|
||||
* of child point to valid ones.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
__set_task_cpu(p, this_cpu);
|
||||
rcu_read_unlock();
|
||||
|
||||
update_curr(cfs_rq);
|
||||
|
||||
if (curr)
|
||||
if (curr) {
|
||||
update_curr(cfs_rq);
|
||||
se->vruntime = curr->vruntime;
|
||||
}
|
||||
place_entity(cfs_rq, se, 1);
|
||||
|
||||
if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
|
||||
|
@ -8354,8 +8384,7 @@ static void task_fork_fair(struct task_struct *p)
|
|||
}
|
||||
|
||||
se->vruntime -= cfs_rq->min_vruntime;
|
||||
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -8411,6 +8440,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
|
|||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
u64 now = cfs_rq_clock_task(cfs_rq);
|
||||
int tg_update;
|
||||
|
||||
if (!vruntime_normalized(p)) {
|
||||
/*
|
||||
|
@ -8422,13 +8453,18 @@ static void detach_task_cfs_rq(struct task_struct *p)
|
|||
}
|
||||
|
||||
/* Catch up with the cfs_rq and remove our load when we leave */
|
||||
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
|
||||
detach_entity_load_avg(cfs_rq, se);
|
||||
if (tg_update)
|
||||
update_tg_load_avg(cfs_rq, false);
|
||||
}
|
||||
|
||||
static void attach_task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
u64 now = cfs_rq_clock_task(cfs_rq);
|
||||
int tg_update;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
/*
|
||||
|
@ -8439,7 +8475,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
|
|||
#endif
|
||||
|
||||
/* Synchronize task with its cfs_rq */
|
||||
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
|
||||
attach_entity_load_avg(cfs_rq, se);
|
||||
if (tg_update)
|
||||
update_tg_load_avg(cfs_rq, false);
|
||||
|
||||
if (!vruntime_normalized(p))
|
||||
se->vruntime += cfs_rq->min_vruntime;
|
||||
|
@ -8499,6 +8538,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static void task_set_group_fair(struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
|
||||
set_task_rq(p, task_cpu(p));
|
||||
se->depth = se->parent ? se->parent->depth + 1 : 0;
|
||||
}
|
||||
|
||||
static void task_move_group_fair(struct task_struct *p)
|
||||
{
|
||||
detach_task_cfs_rq(p);
|
||||
|
@ -8511,6 +8558,19 @@ static void task_move_group_fair(struct task_struct *p)
|
|||
attach_task_cfs_rq(p);
|
||||
}
|
||||
|
||||
static void task_change_group_fair(struct task_struct *p, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case TASK_SET_GROUP:
|
||||
task_set_group_fair(p);
|
||||
break;
|
||||
|
||||
case TASK_MOVE_GROUP:
|
||||
task_move_group_fair(p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void free_fair_sched_group(struct task_group *tg)
|
||||
{
|
||||
int i;
|
||||
|
@ -8562,10 +8622,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||
init_cfs_rq(cfs_rq);
|
||||
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
|
||||
init_entity_runnable_average(se);
|
||||
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
post_init_entity_util_avg(se);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -8576,6 +8632,23 @@ err:
|
|||
return 0;
|
||||
}
|
||||
|
||||
void online_fair_sched_group(struct task_group *tg)
|
||||
{
|
||||
struct sched_entity *se;
|
||||
struct rq *rq;
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
rq = cpu_rq(i);
|
||||
se = tg->se[i];
|
||||
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
post_init_entity_util_avg(se);
|
||||
sync_throttle(tg, i);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void unregister_fair_sched_group(struct task_group *tg)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
@ -8680,6 +8753,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||
return 1;
|
||||
}
|
||||
|
||||
void online_fair_sched_group(struct task_group *tg) { }
|
||||
|
||||
void unregister_fair_sched_group(struct task_group *tg) { }
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
@ -8739,7 +8814,7 @@ const struct sched_class fair_sched_class = {
|
|||
.update_curr = update_curr_fair,
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
.task_move_group = task_move_group_fair,
|
||||
.task_change_group = task_change_group_fair,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
@ -201,6 +201,8 @@ exit_idle:
|
|||
*/
|
||||
static void cpu_idle_loop(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* If the arch has a polling bit, we maintain an invariant:
|
||||
|
@ -219,7 +221,7 @@ static void cpu_idle_loop(void)
|
|||
check_pgt_cache();
|
||||
rmb();
|
||||
|
||||
if (cpu_is_offline(smp_processor_id())) {
|
||||
if (cpu_is_offline(cpu)) {
|
||||
cpuhp_report_idle_dead();
|
||||
arch_cpu_idle_dead();
|
||||
}
|
||||
|
|
|
@ -321,6 +321,7 @@ extern int tg_nop(struct task_group *tg, void *data);
|
|||
|
||||
extern void free_fair_sched_group(struct task_group *tg);
|
||||
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
|
||||
extern void online_fair_sched_group(struct task_group *tg);
|
||||
extern void unregister_fair_sched_group(struct task_group *tg);
|
||||
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
struct sched_entity *se, int cpu,
|
||||
|
@ -437,7 +438,7 @@ struct cfs_rq {
|
|||
|
||||
u64 throttled_clock, throttled_clock_task;
|
||||
u64 throttled_clock_task_time;
|
||||
int throttled, throttle_count, throttle_uptodate;
|
||||
int throttled, throttle_count;
|
||||
struct list_head throttled_list;
|
||||
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
@ -1246,8 +1247,11 @@ struct sched_class {
|
|||
|
||||
void (*update_curr) (struct rq *rq);
|
||||
|
||||
#define TASK_SET_GROUP 0
|
||||
#define TASK_MOVE_GROUP 1
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
void (*task_move_group) (struct task_struct *p);
|
||||
void (*task_change_group) (struct task_struct *p, int type);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -1809,16 +1813,3 @@ static inline void cpufreq_trigger_update(u64 time) {}
|
|||
#else /* arch_scale_freq_capacity */
|
||||
#define arch_scale_freq_invariant() (false)
|
||||
#endif
|
||||
|
||||
static inline void account_reset_rq(struct rq *rq)
|
||||
{
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
rq->prev_irq_time = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
rq->prev_steal_time = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
||||
rq->prev_steal_time_rq = 0;
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue