Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Thiscontains misc fixes: preempt_schedule_common() and io_schedule() recursion fixes, sched/dl fixes, a completion_done() revert, two sched/rt fixes and a comment update patch" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/rt: Avoid obvious configuration fail sched/autogroup: Fix failure to set cpu.rt_runtime_us sched/dl: Do update_rq_clock() in yield_task_dl() sched: Prevent recursion in io_schedule() sched/completion: Serialize completion_done() with complete() sched: Fix preempt_schedule_common() triggering tracing recursion sched/dl: Prevent enqueue of a sleeping task in dl_task_timer() sched: Make dl_task_time() use task_rq_lock() sched: Clarify ordering between task_rq_lock() and move_queued_task()
This commit is contained in:
commit
e2defd0271
|
@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
|
||||||
*/
|
*/
|
||||||
extern void show_stack(struct task_struct *task, unsigned long *sp);
|
extern void show_stack(struct task_struct *task, unsigned long *sp);
|
||||||
|
|
||||||
void io_schedule(void);
|
|
||||||
long io_schedule_timeout(long timeout);
|
|
||||||
|
|
||||||
extern void cpu_init (void);
|
extern void cpu_init (void);
|
||||||
extern void trap_init(void);
|
extern void trap_init(void);
|
||||||
extern void update_process_times(int user);
|
extern void update_process_times(int user);
|
||||||
|
@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
|
||||||
asmlinkage void schedule(void);
|
asmlinkage void schedule(void);
|
||||||
extern void schedule_preempt_disabled(void);
|
extern void schedule_preempt_disabled(void);
|
||||||
|
|
||||||
|
extern long io_schedule_timeout(long timeout);
|
||||||
|
|
||||||
|
static inline void io_schedule(void)
|
||||||
|
{
|
||||||
|
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
|
||||||
|
}
|
||||||
|
|
||||||
struct nsproxy;
|
struct nsproxy;
|
||||||
struct user_namespace;
|
struct user_namespace;
|
||||||
|
|
||||||
|
|
|
@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
|
||||||
* so we don't have to move tasks around upon policy change,
|
* so we don't have to move tasks around upon policy change,
|
||||||
* or flail around trying to allocate bandwidth on the fly.
|
* or flail around trying to allocate bandwidth on the fly.
|
||||||
* A bandwidth exception in __sched_setscheduler() allows
|
* A bandwidth exception in __sched_setscheduler() allows
|
||||||
* the policy change to proceed. Thereafter, task_group()
|
* the policy change to proceed.
|
||||||
* returns &root_task_group, so zero bandwidth is required.
|
|
||||||
*/
|
*/
|
||||||
free_rt_sched_group(tg);
|
free_rt_sched_group(tg);
|
||||||
tg->rt_se = root_task_group.rt_se;
|
tg->rt_se = root_task_group.rt_se;
|
||||||
|
@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
|
||||||
if (tg != &root_task_group)
|
if (tg != &root_task_group)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (p->sched_class != &fair_sched_class)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can only assume the task group can't go away on us if
|
* We can only assume the task group can't go away on us if
|
||||||
* autogroup_move_group() can see us on ->thread_group list.
|
* autogroup_move_group() can see us on ->thread_group list.
|
||||||
|
|
|
@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
|
||||||
* first without taking the lock so we can
|
* first without taking the lock so we can
|
||||||
* return early in the blocking case.
|
* return early in the blocking case.
|
||||||
*/
|
*/
|
||||||
if (!ACCESS_ONCE(x->done))
|
if (!READ_ONCE(x->done))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
spin_lock_irqsave(&x->wait.lock, flags);
|
spin_lock_irqsave(&x->wait.lock, flags);
|
||||||
|
@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
|
||||||
*/
|
*/
|
||||||
bool completion_done(struct completion *x)
|
bool completion_done(struct completion *x)
|
||||||
{
|
{
|
||||||
return !!ACCESS_ONCE(x->done);
|
if (!READ_ONCE(x->done))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If ->done, we need to wait for complete() to release ->wait.lock
|
||||||
|
* otherwise we can end up freeing the completion before complete()
|
||||||
|
* is done referencing it.
|
||||||
|
*
|
||||||
|
* The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
|
||||||
|
* the loads of ->done and ->wait.lock such that we cannot observe
|
||||||
|
* the lock before complete() acquires it while observing the ->done
|
||||||
|
* after it's acquired the lock.
|
||||||
|
*/
|
||||||
|
smp_rmb();
|
||||||
|
spin_unlock_wait(&x->wait.lock);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(completion_done);
|
EXPORT_SYMBOL(completion_done);
|
||||||
|
|
|
@ -306,66 +306,6 @@ __read_mostly int scheduler_running;
|
||||||
*/
|
*/
|
||||||
int sysctl_sched_rt_runtime = 950000;
|
int sysctl_sched_rt_runtime = 950000;
|
||||||
|
|
||||||
/*
|
|
||||||
* __task_rq_lock - lock the rq @p resides on.
|
|
||||||
*/
|
|
||||||
static inline struct rq *__task_rq_lock(struct task_struct *p)
|
|
||||||
__acquires(rq->lock)
|
|
||||||
{
|
|
||||||
struct rq *rq;
|
|
||||||
|
|
||||||
lockdep_assert_held(&p->pi_lock);
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
rq = task_rq(p);
|
|
||||||
raw_spin_lock(&rq->lock);
|
|
||||||
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
|
|
||||||
return rq;
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
|
|
||||||
while (unlikely(task_on_rq_migrating(p)))
|
|
||||||
cpu_relax();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
|
|
||||||
*/
|
|
||||||
static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
|
|
||||||
__acquires(p->pi_lock)
|
|
||||||
__acquires(rq->lock)
|
|
||||||
{
|
|
||||||
struct rq *rq;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
raw_spin_lock_irqsave(&p->pi_lock, *flags);
|
|
||||||
rq = task_rq(p);
|
|
||||||
raw_spin_lock(&rq->lock);
|
|
||||||
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
|
|
||||||
return rq;
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
|
||||||
|
|
||||||
while (unlikely(task_on_rq_migrating(p)))
|
|
||||||
cpu_relax();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __task_rq_unlock(struct rq *rq)
|
|
||||||
__releases(rq->lock)
|
|
||||||
{
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
|
|
||||||
__releases(rq->lock)
|
|
||||||
__releases(p->pi_lock)
|
|
||||||
{
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this_rq_lock - lock this runqueue and disable interrupts.
|
* this_rq_lock - lock this runqueue and disable interrupts.
|
||||||
*/
|
*/
|
||||||
|
@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void preempt_schedule_common(void)
|
static void __sched notrace preempt_schedule_common(void)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
__preempt_count_add(PREEMPT_ACTIVE);
|
__preempt_count_add(PREEMPT_ACTIVE);
|
||||||
|
@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
|
||||||
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
|
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
|
||||||
* that process accounting knows that this is a task in IO wait state.
|
* that process accounting knows that this is a task in IO wait state.
|
||||||
*/
|
*/
|
||||||
void __sched io_schedule(void)
|
|
||||||
{
|
|
||||||
struct rq *rq = raw_rq();
|
|
||||||
|
|
||||||
delayacct_blkio_start();
|
|
||||||
atomic_inc(&rq->nr_iowait);
|
|
||||||
blk_flush_plug(current);
|
|
||||||
current->in_iowait = 1;
|
|
||||||
schedule();
|
|
||||||
current->in_iowait = 0;
|
|
||||||
atomic_dec(&rq->nr_iowait);
|
|
||||||
delayacct_blkio_end();
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(io_schedule);
|
|
||||||
|
|
||||||
long __sched io_schedule_timeout(long timeout)
|
long __sched io_schedule_timeout(long timeout)
|
||||||
{
|
{
|
||||||
struct rq *rq = raw_rq();
|
int old_iowait = current->in_iowait;
|
||||||
|
struct rq *rq;
|
||||||
long ret;
|
long ret;
|
||||||
|
|
||||||
delayacct_blkio_start();
|
|
||||||
atomic_inc(&rq->nr_iowait);
|
|
||||||
blk_flush_plug(current);
|
|
||||||
current->in_iowait = 1;
|
current->in_iowait = 1;
|
||||||
|
if (old_iowait)
|
||||||
|
blk_schedule_flush_plug(current);
|
||||||
|
else
|
||||||
|
blk_flush_plug(current);
|
||||||
|
|
||||||
|
delayacct_blkio_start();
|
||||||
|
rq = raw_rq();
|
||||||
|
atomic_inc(&rq->nr_iowait);
|
||||||
ret = schedule_timeout(timeout);
|
ret = schedule_timeout(timeout);
|
||||||
current->in_iowait = 0;
|
current->in_iowait = old_iowait;
|
||||||
atomic_dec(&rq->nr_iowait);
|
atomic_dec(&rq->nr_iowait);
|
||||||
delayacct_blkio_end();
|
delayacct_blkio_end();
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(io_schedule_timeout);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sys_sched_get_priority_max - return maximum RT priority.
|
* sys_sched_get_priority_max - return maximum RT priority.
|
||||||
|
@ -7642,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
|
||||||
{
|
{
|
||||||
struct task_struct *g, *p;
|
struct task_struct *g, *p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Autogroups do not have RT tasks; see autogroup_create().
|
||||||
|
*/
|
||||||
|
if (task_group_is_autogroup(tg))
|
||||||
|
return 0;
|
||||||
|
|
||||||
for_each_process_thread(g, p) {
|
for_each_process_thread(g, p) {
|
||||||
if (rt_task(p) && task_group(p) == tg)
|
if (rt_task(p) && task_group(p) == tg)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -7734,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
|
||||||
{
|
{
|
||||||
int i, err = 0;
|
int i, err = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disallowing the root group RT runtime is BAD, it would disallow the
|
||||||
|
* kernel creating (and or operating) RT threads.
|
||||||
|
*/
|
||||||
|
if (tg == &root_task_group && rt_runtime == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* No period doesn't make any sense. */
|
||||||
|
if (rt_period == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
mutex_lock(&rt_constraints_mutex);
|
mutex_lock(&rt_constraints_mutex);
|
||||||
read_lock(&tasklist_lock);
|
read_lock(&tasklist_lock);
|
||||||
err = __rt_schedulable(tg, rt_period, rt_runtime);
|
err = __rt_schedulable(tg, rt_period, rt_runtime);
|
||||||
|
@ -7790,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
|
||||||
rt_period = (u64)rt_period_us * NSEC_PER_USEC;
|
rt_period = (u64)rt_period_us * NSEC_PER_USEC;
|
||||||
rt_runtime = tg->rt_bandwidth.rt_runtime;
|
rt_runtime = tg->rt_bandwidth.rt_runtime;
|
||||||
|
|
||||||
if (rt_period == 0)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
|
return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
||||||
struct sched_dl_entity,
|
struct sched_dl_entity,
|
||||||
dl_timer);
|
dl_timer);
|
||||||
struct task_struct *p = dl_task_of(dl_se);
|
struct task_struct *p = dl_task_of(dl_se);
|
||||||
|
unsigned long flags;
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
again:
|
|
||||||
rq = task_rq(p);
|
|
||||||
raw_spin_lock(&rq->lock);
|
|
||||||
|
|
||||||
if (rq != task_rq(p)) {
|
rq = task_rq_lock(current, &flags);
|
||||||
/* Task was moved, retrying. */
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to take care of several possible races here:
|
* We need to take care of several possible races here:
|
||||||
|
@ -541,6 +535,26 @@ again:
|
||||||
|
|
||||||
sched_clock_tick();
|
sched_clock_tick();
|
||||||
update_rq_clock(rq);
|
update_rq_clock(rq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the throttle happened during sched-out; like:
|
||||||
|
*
|
||||||
|
* schedule()
|
||||||
|
* deactivate_task()
|
||||||
|
* dequeue_task_dl()
|
||||||
|
* update_curr_dl()
|
||||||
|
* start_dl_timer()
|
||||||
|
* __dequeue_task_dl()
|
||||||
|
* prev->on_rq = 0;
|
||||||
|
*
|
||||||
|
* We can be both throttled and !queued. Replenish the counter
|
||||||
|
* but do not enqueue -- wait for our wakeup to do that.
|
||||||
|
*/
|
||||||
|
if (!task_on_rq_queued(p)) {
|
||||||
|
replenish_dl_entity(dl_se, dl_se);
|
||||||
|
goto unlock;
|
||||||
|
}
|
||||||
|
|
||||||
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
|
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
|
||||||
if (dl_task(rq->curr))
|
if (dl_task(rq->curr))
|
||||||
check_preempt_curr_dl(rq, p, 0);
|
check_preempt_curr_dl(rq, p, 0);
|
||||||
|
@ -555,7 +569,7 @@ again:
|
||||||
push_dl_task(rq);
|
push_dl_task(rq);
|
||||||
#endif
|
#endif
|
||||||
unlock:
|
unlock:
|
||||||
raw_spin_unlock(&rq->lock);
|
task_rq_unlock(rq, current, &flags);
|
||||||
|
|
||||||
return HRTIMER_NORESTART;
|
return HRTIMER_NORESTART;
|
||||||
}
|
}
|
||||||
|
@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
|
||||||
rq->curr->dl.dl_yielded = 1;
|
rq->curr->dl.dl_yielded = 1;
|
||||||
p->dl.runtime = 0;
|
p->dl.runtime = 0;
|
||||||
}
|
}
|
||||||
|
update_rq_clock(rq);
|
||||||
update_curr_dl(rq);
|
update_curr_dl(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { }
|
||||||
|
|
||||||
extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
|
extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __task_rq_lock - lock the rq @p resides on.
|
||||||
|
*/
|
||||||
|
static inline struct rq *__task_rq_lock(struct task_struct *p)
|
||||||
|
__acquires(rq->lock)
|
||||||
|
{
|
||||||
|
struct rq *rq;
|
||||||
|
|
||||||
|
lockdep_assert_held(&p->pi_lock);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
rq = task_rq(p);
|
||||||
|
raw_spin_lock(&rq->lock);
|
||||||
|
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
|
||||||
|
return rq;
|
||||||
|
raw_spin_unlock(&rq->lock);
|
||||||
|
|
||||||
|
while (unlikely(task_on_rq_migrating(p)))
|
||||||
|
cpu_relax();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
|
||||||
|
*/
|
||||||
|
static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
|
||||||
|
__acquires(p->pi_lock)
|
||||||
|
__acquires(rq->lock)
|
||||||
|
{
|
||||||
|
struct rq *rq;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
raw_spin_lock_irqsave(&p->pi_lock, *flags);
|
||||||
|
rq = task_rq(p);
|
||||||
|
raw_spin_lock(&rq->lock);
|
||||||
|
/*
|
||||||
|
* move_queued_task() task_rq_lock()
|
||||||
|
*
|
||||||
|
* ACQUIRE (rq->lock)
|
||||||
|
* [S] ->on_rq = MIGRATING [L] rq = task_rq()
|
||||||
|
* WMB (__set_task_cpu()) ACQUIRE (rq->lock);
|
||||||
|
* [S] ->cpu = new_cpu [L] task_rq()
|
||||||
|
* [L] ->on_rq
|
||||||
|
* RELEASE (rq->lock)
|
||||||
|
*
|
||||||
|
* If we observe the old cpu in task_rq_lock, the acquire of
|
||||||
|
* the old rq->lock will fully serialize against the stores.
|
||||||
|
*
|
||||||
|
* If we observe the new cpu in task_rq_lock, the acquire will
|
||||||
|
* pair with the WMB to ensure we must then also see migrating.
|
||||||
|
*/
|
||||||
|
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
|
||||||
|
return rq;
|
||||||
|
raw_spin_unlock(&rq->lock);
|
||||||
|
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
||||||
|
|
||||||
|
while (unlikely(task_on_rq_migrating(p)))
|
||||||
|
cpu_relax();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __task_rq_unlock(struct rq *rq)
|
||||||
|
__releases(rq->lock)
|
||||||
|
{
|
||||||
|
raw_spin_unlock(&rq->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
|
||||||
|
__releases(rq->lock)
|
||||||
|
__releases(p->pi_lock)
|
||||||
|
{
|
||||||
|
raw_spin_unlock(&rq->lock);
|
||||||
|
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
#ifdef CONFIG_PREEMPT
|
#ifdef CONFIG_PREEMPT
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue