sched/x86: Pass kernel thread parameters in 'struct fork_frame'
Instead of setting up a fake pt_regs context, put the kernel thread function pointer and arg into the unused callee-restored registers of 'struct fork_frame'. Signed-off-by: Brian Gerst <brgerst@gmail.com> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
0100301bfd
commit
616d24835e
|
@ -240,35 +240,34 @@ END(__switch_to_asm)
|
||||||
* A newly forked process directly context switches into this address.
|
* A newly forked process directly context switches into this address.
|
||||||
*
|
*
|
||||||
* eax: prev task we switched from
|
* eax: prev task we switched from
|
||||||
|
* ebx: kernel thread func (NULL for user thread)
|
||||||
|
* edi: kernel thread arg
|
||||||
*/
|
*/
|
||||||
ENTRY(ret_from_fork)
|
ENTRY(ret_from_fork)
|
||||||
pushl %eax
|
pushl %eax
|
||||||
call schedule_tail
|
call schedule_tail
|
||||||
popl %eax
|
popl %eax
|
||||||
|
|
||||||
|
testl %ebx, %ebx
|
||||||
|
jnz 1f /* kernel threads are uncommon */
|
||||||
|
|
||||||
|
2:
|
||||||
/* When we fork, we trace the syscall return in the child, too. */
|
/* When we fork, we trace the syscall return in the child, too. */
|
||||||
movl %esp, %eax
|
movl %esp, %eax
|
||||||
call syscall_return_slowpath
|
call syscall_return_slowpath
|
||||||
jmp restore_all
|
jmp restore_all
|
||||||
END(ret_from_fork)
|
|
||||||
|
|
||||||
ENTRY(ret_from_kernel_thread)
|
|
||||||
pushl %eax
|
|
||||||
call schedule_tail
|
|
||||||
popl %eax
|
|
||||||
movl PT_EBP(%esp), %eax
|
|
||||||
call *PT_EBX(%esp)
|
|
||||||
movl $0, PT_EAX(%esp)
|
|
||||||
|
|
||||||
|
/* kernel thread */
|
||||||
|
1: movl %edi, %eax
|
||||||
|
call *%ebx
|
||||||
/*
|
/*
|
||||||
* Kernel threads return to userspace as if returning from a syscall.
|
* A kernel thread is allowed to return here after successfully
|
||||||
* We should check whether anything actually uses this path and, if so,
|
* calling do_execve(). Exit to userspace to complete the execve()
|
||||||
* consider switching it over to ret_from_fork.
|
* syscall.
|
||||||
*/
|
*/
|
||||||
movl %esp, %eax
|
movl $0, PT_EAX(%esp)
|
||||||
call syscall_return_slowpath
|
jmp 2b
|
||||||
jmp restore_all
|
END(ret_from_fork)
|
||||||
ENDPROC(ret_from_kernel_thread)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return to user mode is not as complex as all this looks,
|
* Return to user mode is not as complex as all this looks,
|
||||||
|
|
|
@ -407,37 +407,34 @@ END(__switch_to_asm)
|
||||||
* A newly forked process directly context switches into this address.
|
* A newly forked process directly context switches into this address.
|
||||||
*
|
*
|
||||||
* rax: prev task we switched from
|
* rax: prev task we switched from
|
||||||
|
* rbx: kernel thread func (NULL for user thread)
|
||||||
|
* r12: kernel thread arg
|
||||||
*/
|
*/
|
||||||
ENTRY(ret_from_fork)
|
ENTRY(ret_from_fork)
|
||||||
movq %rax, %rdi
|
movq %rax, %rdi
|
||||||
call schedule_tail /* rdi: 'prev' task parameter */
|
call schedule_tail /* rdi: 'prev' task parameter */
|
||||||
|
|
||||||
testb $3, CS(%rsp) /* from kernel_thread? */
|
testq %rbx, %rbx /* from kernel_thread? */
|
||||||
jnz 1f
|
jnz 1f /* kernel threads are uncommon */
|
||||||
|
|
||||||
/*
|
2:
|
||||||
* We came from kernel_thread. This code path is quite twisted, and
|
|
||||||
* someone should clean it up.
|
|
||||||
*
|
|
||||||
* copy_thread_tls stashes the function pointer in RBX and the
|
|
||||||
* parameter to be passed in RBP. The called function is permitted
|
|
||||||
* to call do_execve and thereby jump to user mode.
|
|
||||||
*/
|
|
||||||
movq RBP(%rsp), %rdi
|
|
||||||
call *RBX(%rsp)
|
|
||||||
movl $0, RAX(%rsp)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Fall through as though we're exiting a syscall. This makes a
|
|
||||||
* twisted sort of sense if we just called do_execve.
|
|
||||||
*/
|
|
||||||
|
|
||||||
1:
|
|
||||||
movq %rsp, %rdi
|
movq %rsp, %rdi
|
||||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||||
SWAPGS
|
SWAPGS
|
||||||
jmp restore_regs_and_iret
|
jmp restore_regs_and_iret
|
||||||
|
|
||||||
|
1:
|
||||||
|
/* kernel thread */
|
||||||
|
movq %r12, %rdi
|
||||||
|
call *%rbx
|
||||||
|
/*
|
||||||
|
* A kernel thread is allowed to return here after successfully
|
||||||
|
* calling do_execve(). Exit to userspace to complete the execve()
|
||||||
|
* syscall.
|
||||||
|
*/
|
||||||
|
movq $0, RAX(%rsp)
|
||||||
|
jmp 2b
|
||||||
END(ret_from_fork)
|
END(ret_from_fork)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
asmlinkage void ret_from_fork(void);
|
||||||
|
|
||||||
/* data that is pointed to by thread.sp */
|
/* data that is pointed to by thread.sp */
|
||||||
struct inactive_task_frame {
|
struct inactive_task_frame {
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
|
@ -55,9 +55,6 @@
|
||||||
#include <asm/switch_to.h>
|
#include <asm/switch_to.h>
|
||||||
#include <asm/vm86.h>
|
#include <asm/vm86.h>
|
||||||
|
|
||||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
||||||
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return saved PC of a blocked thread.
|
* Return saved PC of a blocked thread.
|
||||||
*/
|
*/
|
||||||
|
@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
frame->bp = 0;
|
frame->bp = 0;
|
||||||
|
frame->ret_addr = (unsigned long) ret_from_fork;
|
||||||
p->thread.sp = (unsigned long) fork_frame;
|
p->thread.sp = (unsigned long) fork_frame;
|
||||||
p->thread.sp0 = (unsigned long) (childregs+1);
|
p->thread.sp0 = (unsigned long) (childregs+1);
|
||||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||||
|
@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||||
/* kernel thread */
|
/* kernel thread */
|
||||||
memset(childregs, 0, sizeof(struct pt_regs));
|
memset(childregs, 0, sizeof(struct pt_regs));
|
||||||
frame->ret_addr = (unsigned long) ret_from_kernel_thread;
|
frame->bx = sp; /* function */
|
||||||
task_user_gs(p) = __KERNEL_STACK_CANARY;
|
frame->di = arg;
|
||||||
childregs->ds = __USER_DS;
|
|
||||||
childregs->es = __USER_DS;
|
|
||||||
childregs->fs = __KERNEL_PERCPU;
|
|
||||||
childregs->bx = sp; /* function */
|
|
||||||
childregs->bp = arg;
|
|
||||||
childregs->orig_ax = -1;
|
|
||||||
childregs->cs = __KERNEL_CS | get_kernel_rpl();
|
|
||||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
|
|
||||||
p->thread.io_bitmap_ptr = NULL;
|
p->thread.io_bitmap_ptr = NULL;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
frame->bx = 0;
|
||||||
*childregs = *current_pt_regs();
|
*childregs = *current_pt_regs();
|
||||||
childregs->ax = 0;
|
childregs->ax = 0;
|
||||||
if (sp)
|
if (sp)
|
||||||
childregs->sp = sp;
|
childregs->sp = sp;
|
||||||
|
|
||||||
frame->ret_addr = (unsigned long) ret_from_fork;
|
|
||||||
task_user_gs(p) = get_user_gs(current_pt_regs());
|
task_user_gs(p) = get_user_gs(current_pt_regs());
|
||||||
|
|
||||||
p->thread.io_bitmap_ptr = NULL;
|
p->thread.io_bitmap_ptr = NULL;
|
||||||
|
|
|
@ -50,8 +50,6 @@
|
||||||
#include <asm/switch_to.h>
|
#include <asm/switch_to.h>
|
||||||
#include <asm/xen/hypervisor.h>
|
#include <asm/xen/hypervisor.h>
|
||||||
|
|
||||||
asmlinkage extern void ret_from_fork(void);
|
|
||||||
|
|
||||||
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
||||||
|
|
||||||
/* Prints also some state that isn't saved in the pt_regs */
|
/* Prints also some state that isn't saved in the pt_regs */
|
||||||
|
@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||||
/* kernel thread */
|
/* kernel thread */
|
||||||
memset(childregs, 0, sizeof(struct pt_regs));
|
memset(childregs, 0, sizeof(struct pt_regs));
|
||||||
childregs->sp = (unsigned long)childregs;
|
frame->bx = sp; /* function */
|
||||||
childregs->ss = __KERNEL_DS;
|
frame->r12 = arg;
|
||||||
childregs->bx = sp; /* function */
|
|
||||||
childregs->bp = arg;
|
|
||||||
childregs->orig_ax = -1;
|
|
||||||
childregs->cs = __KERNEL_CS | get_kernel_rpl();
|
|
||||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
frame->bx = 0;
|
||||||
*childregs = *current_pt_regs();
|
*childregs = *current_pt_regs();
|
||||||
|
|
||||||
childregs->ax = 0;
|
childregs->ax = 0;
|
||||||
|
|
Loading…
Reference in New Issue