sched: Add NEED_RESCHED to the preempt_count
In order to combine the preemption and need_resched test we need to fold the need_resched information into the preempt_count value. Since the NEED_RESCHED flag is set across CPUs this needs to be an atomic operation, however we very much want to avoid making preempt_count atomic, therefore we keep the existing TIF_NEED_RESCHED infrastructure in place but at 3 sites test it and fold its value into preempt_count; namely: - resched_task() when setting TIF_NEED_RESCHED on the current task - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a remote task it follows it up with a reschedule IPI and we can modify the cpu local preempt_count from there. - cpu_idle_loop() for when resched_task() found tsk_is_polling(). We use an inverted bitmask to indicate need_resched so that a 0 means both need_resched and !atomic. Also remove the barrier() in preempt_enable() between preempt_enable_no_resched() and preempt_check_resched() to avoid having to reload the preemption value and allow the compiler to use the flags of the previuos decrement. I couldn't come up with any sane reason for this barrier() to be there as preempt_enable_no_resched() already has a barrier() before doing the decrement. Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/n/tip-7a7m5qqbn5pmwnd4wko9u6da@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
4a2b4b2227
commit
f27dde8dee
|
@ -10,9 +10,19 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
/*
|
||||
* We use the MSB mostly because its available; see <linux/preempt_mask.h> for
|
||||
* the other bits -- can't include that header due to inclusion hell.
|
||||
*/
|
||||
#define PREEMPT_NEED_RESCHED 0x80000000
|
||||
|
||||
/*
|
||||
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
|
||||
* that think a non-zero value indicates we cannot preempt.
|
||||
*/
|
||||
static __always_inline int preempt_count(void)
|
||||
{
|
||||
return current_thread_info()->preempt_count;
|
||||
return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
|
||||
}
|
||||
|
||||
static __always_inline int *preempt_count_ptr(void)
|
||||
|
@ -20,11 +30,40 @@ static __always_inline int *preempt_count_ptr(void)
|
|||
return ¤t_thread_info()->preempt_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
|
||||
* alternative is loosing a reschedule. Better schedule too often -- also this
|
||||
* should be a very rare operation.
|
||||
*/
|
||||
static __always_inline void preempt_count_set(int pc)
|
||||
{
|
||||
*preempt_count_ptr() = pc;
|
||||
}
|
||||
|
||||
/*
|
||||
* We fold the NEED_RESCHED bit into the preempt count such that
|
||||
* preempt_enable() can decrement and test for needing to reschedule with a
|
||||
* single instruction.
|
||||
*
|
||||
* We invert the actual bit, so that when the decrement hits 0 we know we both
|
||||
* need to resched (the bit is cleared) and can resched (no preempt count).
|
||||
*/
|
||||
|
||||
static __always_inline void set_preempt_need_resched(void)
|
||||
{
|
||||
*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
|
||||
}
|
||||
|
||||
static __always_inline void clear_preempt_need_resched(void)
|
||||
{
|
||||
*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
|
||||
}
|
||||
|
||||
static __always_inline bool test_preempt_need_resched(void)
|
||||
{
|
||||
return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
|
||||
extern void add_preempt_count(int val);
|
||||
extern void sub_preempt_count(int val);
|
||||
|
@ -42,7 +81,7 @@ asmlinkage void preempt_schedule(void);
|
|||
|
||||
#define preempt_check_resched() \
|
||||
do { \
|
||||
if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
|
||||
if (unlikely(!*preempt_count_ptr())) \
|
||||
preempt_schedule(); \
|
||||
} while (0)
|
||||
|
||||
|
@ -52,7 +91,7 @@ void preempt_schedule_context(void);
|
|||
|
||||
#define preempt_check_resched_context() \
|
||||
do { \
|
||||
if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
|
||||
if (unlikely(!*preempt_count_ptr())) \
|
||||
preempt_schedule_context(); \
|
||||
} while (0)
|
||||
#else
|
||||
|
@ -88,7 +127,6 @@ do { \
|
|||
#define preempt_enable() \
|
||||
do { \
|
||||
preempt_enable_no_resched(); \
|
||||
barrier(); \
|
||||
preempt_check_resched(); \
|
||||
} while (0)
|
||||
|
||||
|
@ -116,7 +154,6 @@ do { \
|
|||
#define preempt_enable_notrace() \
|
||||
do { \
|
||||
preempt_enable_no_resched_notrace(); \
|
||||
barrier(); \
|
||||
preempt_check_resched_context(); \
|
||||
} while (0)
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ struct sched_param {
|
|||
#include <linux/errno.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/preempt.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
@ -434,7 +435,9 @@ struct task_cputime {
|
|||
* We include PREEMPT_ACTIVE to avoid cond_resched() from working
|
||||
* before the scheduler is active -- see should_resched().
|
||||
*/
|
||||
#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE)
|
||||
#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE + PREEMPT_NEED_RESCHED)
|
||||
#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
|
||||
#define PREEMPT_DISABLED (1 + PREEMPT_NEED_RESCHED)
|
||||
|
||||
/**
|
||||
* struct thread_group_cputimer - thread group interval timer counts
|
||||
|
@ -2408,7 +2411,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
|
|||
|
||||
static inline int need_resched(void)
|
||||
{
|
||||
return unlikely(test_thread_flag(TIF_NEED_RESCHED));
|
||||
return unlikely(test_preempt_need_resched());
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -105,6 +105,13 @@ static void cpu_idle_loop(void)
|
|||
__current_set_polling();
|
||||
}
|
||||
arch_cpu_idle_exit();
|
||||
/*
|
||||
* We need to test and propagate the TIF_NEED_RESCHED
|
||||
* bit here because we might not have send the
|
||||
* reschedule IPI to idle tasks.
|
||||
*/
|
||||
if (tif_need_resched())
|
||||
set_preempt_need_resched();
|
||||
}
|
||||
tick_nohz_idle_exit();
|
||||
schedule_preempt_disabled();
|
||||
|
|
|
@ -525,8 +525,10 @@ void resched_task(struct task_struct *p)
|
|||
set_tsk_need_resched(p);
|
||||
|
||||
cpu = task_cpu(p);
|
||||
if (cpu == smp_processor_id())
|
||||
if (cpu == smp_processor_id()) {
|
||||
set_preempt_need_resched();
|
||||
return;
|
||||
}
|
||||
|
||||
/* NEED_RESCHED must be visible before we test polling */
|
||||
smp_mb();
|
||||
|
@ -1391,6 +1393,14 @@ static void sched_ttwu_pending(void)
|
|||
|
||||
void scheduler_ipi(void)
|
||||
{
|
||||
/*
|
||||
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
|
||||
* TIF_NEED_RESCHED remotely (for the first time) will also send
|
||||
* this IPI.
|
||||
*/
|
||||
if (tif_need_resched())
|
||||
set_preempt_need_resched();
|
||||
|
||||
if (llist_empty(&this_rq()->wake_list)
|
||||
&& !tick_nohz_full_cpu(smp_processor_id())
|
||||
&& !got_nohz_idle_kick())
|
||||
|
@ -1714,7 +1724,7 @@ void sched_fork(struct task_struct *p)
|
|||
#endif
|
||||
#ifdef CONFIG_PREEMPT_COUNT
|
||||
/* Want to start with kernel preemption disabled. */
|
||||
task_thread_info(p)->preempt_count = 1;
|
||||
task_thread_info(p)->preempt_count = PREEMPT_DISABLED;
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
plist_node_init(&p->pushable_tasks, MAX_PRIO);
|
||||
|
@ -2425,6 +2435,7 @@ need_resched:
|
|||
put_prev_task(rq, prev);
|
||||
next = pick_next_task(rq);
|
||||
clear_tsk_need_resched(prev);
|
||||
clear_preempt_need_resched();
|
||||
rq->skip_clock_update = 0;
|
||||
|
||||
if (likely(prev != next)) {
|
||||
|
@ -2536,11 +2547,10 @@ EXPORT_SYMBOL(preempt_schedule);
|
|||
*/
|
||||
asmlinkage void __sched preempt_schedule_irq(void)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
enum ctx_state prev_state;
|
||||
|
||||
/* Catch callers which need to be fixed */
|
||||
BUG_ON(ti->preempt_count || !irqs_disabled());
|
||||
BUG_ON(preempt_count() || !irqs_disabled());
|
||||
|
||||
prev_state = exception_enter();
|
||||
|
||||
|
@ -4207,7 +4217,7 @@ void init_idle(struct task_struct *idle, int cpu)
|
|||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
/* Set the preempt count _outside_ the spinlocks! */
|
||||
task_thread_info(idle)->preempt_count = 0;
|
||||
task_thread_info(idle)->preempt_count = PREEMPT_ENABLED;
|
||||
|
||||
/*
|
||||
* The idle tasks have their own, simple scheduling class:
|
||||
|
|
Loading…
Reference in New Issue