sched, x86: Provide a per-cpu preempt_count implementation
Convert x86 to use a per-cpu preemption count. The reason for doing so is that accessing per-cpu variables is a lot cheaper than accessing thread_info variables. We still need to save/restore the actual preemption count due to PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the same cache-line as the other hot __switch_to() variables such as current_task. NOTE: this save/restore is required even for !PREEMPT kernels as cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore task_struct::state. Also rename thread_info::preempt_count to ensure nobody is 'accidentally' still poking at it. Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/n/tip-gzn5rfsf8trgjoqx8hyayy3q@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
a233f1120c
commit
c2daa3bed5
|
@ -5,4 +5,3 @@ genhdr-y += unistd_64.h
|
|||
genhdr-y += unistd_x32.h
|
||||
|
||||
generic-y += clkdev.h
|
||||
generic-y += preempt.h
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
#ifndef __ASM_PREEMPT_H
|
||||
#define __ASM_PREEMPT_H
|
||||
|
||||
#include <asm/rmwcc.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
DECLARE_PER_CPU(int, __preempt_count);
|
||||
|
||||
/*
|
||||
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
|
||||
* that think a non-zero value indicates we cannot preempt.
|
||||
*/
|
||||
static __always_inline int preempt_count(void)
|
||||
{
|
||||
return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
|
||||
}
|
||||
|
||||
static __always_inline void preempt_count_set(int pc)
|
||||
{
|
||||
__this_cpu_write_4(__preempt_count, pc);
|
||||
}
|
||||
|
||||
/*
|
||||
* must be macros to avoid header recursion hell
|
||||
*/
|
||||
#define task_preempt_count(p) \
|
||||
(task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
|
||||
|
||||
#define init_task_preempt_count(p) do { \
|
||||
task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
|
||||
} while (0)
|
||||
|
||||
#define init_idle_preempt_count(p, cpu) do { \
|
||||
task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
|
||||
per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We fold the NEED_RESCHED bit into the preempt count such that
|
||||
* preempt_enable() can decrement and test for needing to reschedule with a
|
||||
* single instruction.
|
||||
*
|
||||
* We invert the actual bit, so that when the decrement hits 0 we know we both
|
||||
* need to resched (the bit is cleared) and can resched (no preempt count).
|
||||
*/
|
||||
|
||||
static __always_inline void set_preempt_need_resched(void)
|
||||
{
|
||||
__this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
|
||||
}
|
||||
|
||||
static __always_inline void clear_preempt_need_resched(void)
|
||||
{
|
||||
__this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
|
||||
}
|
||||
|
||||
static __always_inline bool test_preempt_need_resched(void)
|
||||
{
|
||||
return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
|
||||
}
|
||||
|
||||
/*
|
||||
* The various preempt_count add/sub methods
|
||||
*/
|
||||
|
||||
static __always_inline void __preempt_count_add(int val)
|
||||
{
|
||||
__this_cpu_add_4(__preempt_count, val);
|
||||
}
|
||||
|
||||
static __always_inline void __preempt_count_sub(int val)
|
||||
{
|
||||
__this_cpu_add_4(__preempt_count, -val);
|
||||
}
|
||||
|
||||
static __always_inline bool __preempt_count_dec_and_test(void)
|
||||
{
|
||||
GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true when we need to resched -- even if we can not.
|
||||
*/
|
||||
static __always_inline bool need_resched(void)
|
||||
{
|
||||
return unlikely(test_preempt_need_resched());
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true when we need to resched and can (barring IRQ state).
|
||||
*/
|
||||
static __always_inline bool should_resched(void)
|
||||
{
|
||||
return unlikely(!__this_cpu_read_4(__preempt_count));
|
||||
}
|
||||
|
||||
#endif /* __ASM_PREEMPT_H */
|
|
@ -28,8 +28,7 @@ struct thread_info {
|
|||
__u32 flags; /* low level flags */
|
||||
__u32 status; /* thread synchronous flags */
|
||||
__u32 cpu; /* current CPU */
|
||||
int preempt_count; /* 0 => preemptable,
|
||||
<0 => BUG */
|
||||
int saved_preempt_count;
|
||||
mm_segment_t addr_limit;
|
||||
struct restart_block restart_block;
|
||||
void __user *sysenter_return;
|
||||
|
@ -49,7 +48,7 @@ struct thread_info {
|
|||
.exec_domain = &default_exec_domain, \
|
||||
.flags = 0, \
|
||||
.cpu = 0, \
|
||||
.preempt_count = INIT_PREEMPT_COUNT, \
|
||||
.saved_preempt_count = INIT_PREEMPT_COUNT, \
|
||||
.addr_limit = KERNEL_DS, \
|
||||
.restart_block = { \
|
||||
.fn = do_no_restart_syscall, \
|
||||
|
|
|
@ -32,7 +32,6 @@ void common(void) {
|
|||
OFFSET(TI_flags, thread_info, flags);
|
||||
OFFSET(TI_status, thread_info, status);
|
||||
OFFSET(TI_addr_limit, thread_info, addr_limit);
|
||||
OFFSET(TI_preempt_count, thread_info, preempt_count);
|
||||
|
||||
BLANK();
|
||||
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
|
||||
|
|
|
@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
|||
|
||||
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||
|
||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
||||
|
||||
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
|
||||
|
||||
/*
|
||||
|
@ -1169,6 +1172,8 @@ void debug_stack_reset(void)
|
|||
|
||||
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
||||
EXPORT_PER_CPU_SYMBOL(current_task);
|
||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
||||
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
|
|
|
@ -362,12 +362,9 @@ END(ret_from_exception)
|
|||
#ifdef CONFIG_PREEMPT
|
||||
ENTRY(resume_kernel)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
|
||||
jnz restore_all
|
||||
need_resched:
|
||||
movl TI_flags(%ebp), %ecx # need_resched set ?
|
||||
testb $_TIF_NEED_RESCHED, %cl
|
||||
jz restore_all
|
||||
cmpl $0,PER_CPU_VAR(__preempt_count)
|
||||
jnz restore_all
|
||||
testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
|
||||
jz restore_all
|
||||
call preempt_schedule_irq
|
||||
|
|
|
@ -1118,10 +1118,8 @@ retint_signal:
|
|||
/* Returning to kernel space. Check if we need preemption */
|
||||
/* rcx: threadinfo. interrupts off. */
|
||||
ENTRY(retint_kernel)
|
||||
cmpl $0,TI_preempt_count(%rcx)
|
||||
cmpl $0,PER_CPU_VAR(__preempt_count)
|
||||
jnz retint_restore_args
|
||||
bt $TIF_NEED_RESCHED,TI_flags(%rcx)
|
||||
jnc retint_restore_args
|
||||
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
|
||||
jnc retint_restore_args
|
||||
call preempt_schedule_irq
|
||||
|
|
|
@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
|
|||
irqctx->tinfo.task = curctx->tinfo.task;
|
||||
irqctx->tinfo.previous_esp = current_stack_pointer;
|
||||
|
||||
/* Copy the preempt_count so that the [soft]irq checks work. */
|
||||
irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
|
||||
|
||||
if (unlikely(overflow))
|
||||
call_on_stack(print_stack_overflow, isp);
|
||||
|
||||
|
@ -131,7 +128,6 @@ void irq_ctx_init(int cpu)
|
|||
THREAD_SIZE_ORDER));
|
||||
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
|
||||
irqctx->tinfo.cpu = cpu;
|
||||
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
|
||||
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
|
||||
|
||||
per_cpu(hardirq_ctx, cpu) = irqctx;
|
||||
|
|
|
@ -291,6 +291,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
|
||||
set_iopl_mask(next->iopl);
|
||||
|
||||
/*
|
||||
* If it were not for PREEMPT_ACTIVE we could guarantee that the
|
||||
* preempt_count of all tasks was equal here and this would not be
|
||||
* needed.
|
||||
*/
|
||||
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
|
||||
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
|
||||
|
||||
/*
|
||||
* Now maybe handle debug registers and/or IO bitmaps
|
||||
*/
|
||||
|
|
|
@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
this_cpu_write(old_rsp, next->usersp);
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
/*
|
||||
* If it were not for PREEMPT_ACTIVE we could guarantee that the
|
||||
* preempt_count of all tasks was equal here and this would not be
|
||||
* needed.
|
||||
*/
|
||||
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
|
||||
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
|
||||
|
||||
this_cpu_write(kernel_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE - KERNEL_STACK_OFFSET);
|
||||
|
|
Loading…
Reference in New Issue