x86/stackframe/32: Provide consistent pt_regs
Currently pt_regs on x86_32 has an oddity in that kernel regs (!user_mode(regs)) are short two entries (esp/ss). This means that any code trying to use them (typically: regs->sp) needs to jump through some unfortunate hoops. Change the entry code to fix this up and create a full pt_regs frame. This then simplifies various trampolines in ftrace and kprobes, the stack unwinder, ptrace, kdump and kgdb. Much thanks to Josh for help with the cleanups! Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
ea1ed38dba
commit
3c88c692c2
|
@ -202,9 +202,102 @@
|
||||||
.Lend_\@:
|
.Lend_\@:
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
#define CS_FROM_ENTRY_STACK (1 << 31)
|
||||||
|
#define CS_FROM_USER_CR3 (1 << 30)
|
||||||
|
#define CS_FROM_KERNEL (1 << 29)
|
||||||
|
|
||||||
|
.macro FIXUP_FRAME
|
||||||
|
/*
|
||||||
|
* The high bits of the CS dword (__csh) are used for CS_FROM_*.
|
||||||
|
* Clear them in case hardware didn't do this for us.
|
||||||
|
*/
|
||||||
|
andl $0x0000ffff, 3*4(%esp)
|
||||||
|
|
||||||
|
#ifdef CONFIG_VM86
|
||||||
|
testl $X86_EFLAGS_VM, 4*4(%esp)
|
||||||
|
jnz .Lfrom_usermode_no_fixup_\@
|
||||||
|
#endif
|
||||||
|
testl $SEGMENT_RPL_MASK, 3*4(%esp)
|
||||||
|
jnz .Lfrom_usermode_no_fixup_\@
|
||||||
|
|
||||||
|
orl $CS_FROM_KERNEL, 3*4(%esp)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When we're here from kernel mode; the (exception) stack looks like:
|
||||||
|
*
|
||||||
|
* 5*4(%esp) - <previous context>
|
||||||
|
* 4*4(%esp) - flags
|
||||||
|
* 3*4(%esp) - cs
|
||||||
|
* 2*4(%esp) - ip
|
||||||
|
* 1*4(%esp) - orig_eax
|
||||||
|
* 0*4(%esp) - gs / function
|
||||||
|
*
|
||||||
|
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
|
||||||
|
* is complete and in particular regs->sp is correct. This gives us
|
||||||
|
* the original 5 enties as gap:
|
||||||
|
*
|
||||||
|
* 12*4(%esp) - <previous context>
|
||||||
|
* 11*4(%esp) - gap / flags
|
||||||
|
* 10*4(%esp) - gap / cs
|
||||||
|
* 9*4(%esp) - gap / ip
|
||||||
|
* 8*4(%esp) - gap / orig_eax
|
||||||
|
* 7*4(%esp) - gap / gs / function
|
||||||
|
* 6*4(%esp) - ss
|
||||||
|
* 5*4(%esp) - sp
|
||||||
|
* 4*4(%esp) - flags
|
||||||
|
* 3*4(%esp) - cs
|
||||||
|
* 2*4(%esp) - ip
|
||||||
|
* 1*4(%esp) - orig_eax
|
||||||
|
* 0*4(%esp) - gs / function
|
||||||
|
*/
|
||||||
|
|
||||||
|
pushl %ss # ss
|
||||||
|
pushl %esp # sp (points at ss)
|
||||||
|
addl $6*4, (%esp) # point sp back at the previous context
|
||||||
|
pushl 6*4(%esp) # flags
|
||||||
|
pushl 6*4(%esp) # cs
|
||||||
|
pushl 6*4(%esp) # ip
|
||||||
|
pushl 6*4(%esp) # orig_eax
|
||||||
|
pushl 6*4(%esp) # gs / function
|
||||||
|
.Lfrom_usermode_no_fixup_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro IRET_FRAME
|
||||||
|
testl $CS_FROM_KERNEL, 1*4(%esp)
|
||||||
|
jz .Lfinished_frame_\@
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reconstruct the 3 entry IRET frame right after the (modified)
|
||||||
|
* regs->sp without lowering %esp in between, such that an NMI in the
|
||||||
|
* middle doesn't scribble our stack.
|
||||||
|
*/
|
||||||
|
pushl %eax
|
||||||
|
pushl %ecx
|
||||||
|
movl 5*4(%esp), %eax # (modified) regs->sp
|
||||||
|
|
||||||
|
movl 4*4(%esp), %ecx # flags
|
||||||
|
movl %ecx, -4(%eax)
|
||||||
|
|
||||||
|
movl 3*4(%esp), %ecx # cs
|
||||||
|
andl $0x0000ffff, %ecx
|
||||||
|
movl %ecx, -8(%eax)
|
||||||
|
|
||||||
|
movl 2*4(%esp), %ecx # ip
|
||||||
|
movl %ecx, -12(%eax)
|
||||||
|
|
||||||
|
movl 1*4(%esp), %ecx # eax
|
||||||
|
movl %ecx, -16(%eax)
|
||||||
|
|
||||||
|
popl %ecx
|
||||||
|
lea -16(%eax), %esp
|
||||||
|
popl %eax
|
||||||
|
.Lfinished_frame_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
|
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
|
||||||
cld
|
cld
|
||||||
PUSH_GS
|
PUSH_GS
|
||||||
|
FIXUP_FRAME
|
||||||
pushl %fs
|
pushl %fs
|
||||||
pushl %es
|
pushl %es
|
||||||
pushl %ds
|
pushl %ds
|
||||||
|
@ -358,9 +451,6 @@
|
||||||
* switch to it before we do any copying.
|
* switch to it before we do any copying.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define CS_FROM_ENTRY_STACK (1 << 31)
|
|
||||||
#define CS_FROM_USER_CR3 (1 << 30)
|
|
||||||
|
|
||||||
.macro SWITCH_TO_KERNEL_STACK
|
.macro SWITCH_TO_KERNEL_STACK
|
||||||
|
|
||||||
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
|
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
|
||||||
|
@ -374,13 +464,6 @@
|
||||||
* that register for the time this macro runs
|
* that register for the time this macro runs
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* The high bits of the CS dword (__csh) are used for
|
|
||||||
* CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
|
|
||||||
* hardware didn't do this for us.
|
|
||||||
*/
|
|
||||||
andl $(0x0000ffff), PT_CS(%esp)
|
|
||||||
|
|
||||||
/* Are we on the entry stack? Bail out if not! */
|
/* Are we on the entry stack? Bail out if not! */
|
||||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||||
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
|
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
|
||||||
|
@ -990,6 +1073,7 @@ restore_all:
|
||||||
/* Restore user state */
|
/* Restore user state */
|
||||||
RESTORE_REGS pop=4 # skip orig_eax/error_code
|
RESTORE_REGS pop=4 # skip orig_eax/error_code
|
||||||
.Lirq_return:
|
.Lirq_return:
|
||||||
|
IRET_FRAME
|
||||||
/*
|
/*
|
||||||
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
|
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
|
||||||
* when returning from IPI handler and when returning from
|
* when returning from IPI handler and when returning from
|
||||||
|
@ -1340,6 +1424,7 @@ END(page_fault)
|
||||||
|
|
||||||
common_exception:
|
common_exception:
|
||||||
/* the function address is in %gs's slot on the stack */
|
/* the function address is in %gs's slot on the stack */
|
||||||
|
FIXUP_FRAME
|
||||||
pushl %fs
|
pushl %fs
|
||||||
pushl %es
|
pushl %es
|
||||||
pushl %ds
|
pushl %ds
|
||||||
|
|
|
@ -70,22 +70,6 @@ struct kimage;
|
||||||
#define KEXEC_BACKUP_SRC_START (0UL)
|
#define KEXEC_BACKUP_SRC_START (0UL)
|
||||||
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
|
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
|
||||||
|
|
||||||
/*
|
|
||||||
* CPU does not save ss and sp on stack if execution is already
|
|
||||||
* running in kernel mode at the time of NMI occurrence. This code
|
|
||||||
* fixes it.
|
|
||||||
*/
|
|
||||||
static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
|
|
||||||
struct pt_regs *oldregs)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
newregs->sp = (unsigned long)&(oldregs->sp);
|
|
||||||
asm volatile("xorl %%eax, %%eax\n\t"
|
|
||||||
"movw %%ss, %%ax\n\t"
|
|
||||||
:"=a"(newregs->ss));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is responsible for capturing register states if coming
|
* This function is responsible for capturing register states if coming
|
||||||
* via panic otherwise just fix up the ss and sp if coming via kernel
|
* via panic otherwise just fix up the ss and sp if coming via kernel
|
||||||
|
@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
|
||||||
{
|
{
|
||||||
if (oldregs) {
|
if (oldregs) {
|
||||||
memcpy(newregs, oldregs, sizeof(*newregs));
|
memcpy(newregs, oldregs, sizeof(*newregs));
|
||||||
crash_fixup_ss_esp(newregs, oldregs);
|
|
||||||
} else {
|
} else {
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
|
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
|
||||||
|
|
|
@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
|
|
||||||
#else
|
|
||||||
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
return regs->sp;
|
return regs->sp;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GET_IP(regs) ((regs)->ip)
|
#define GET_IP(regs) ((regs)->ip)
|
||||||
#define GET_FP(regs) ((regs)->bp)
|
#define GET_FP(regs) ((regs)->bp)
|
||||||
|
@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
|
||||||
if (unlikely(offset > MAX_REG_OFFSET))
|
if (unlikely(offset > MAX_REG_OFFSET))
|
||||||
return 0;
|
return 0;
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
/*
|
|
||||||
* Traps from the kernel do not save sp and ss.
|
|
||||||
* Use the helper function to retrieve sp.
|
|
||||||
*/
|
|
||||||
if (offset == offsetof(struct pt_regs, sp) &&
|
|
||||||
regs->cs == __KERNEL_CS)
|
|
||||||
return kernel_stack_pointer(regs);
|
|
||||||
|
|
||||||
/* The selector fields are 16-bit. */
|
/* The selector fields are 16-bit. */
|
||||||
if (offset == offsetof(struct pt_regs, cs) ||
|
if (offset == offsetof(struct pt_regs, cs) ||
|
||||||
offset == offsetof(struct pt_regs, ss) ||
|
offset == offsetof(struct pt_regs, ss) ||
|
||||||
|
@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
|
||||||
static inline int regs_within_kernel_stack(struct pt_regs *regs,
|
static inline int regs_within_kernel_stack(struct pt_regs *regs,
|
||||||
unsigned long addr)
|
unsigned long addr)
|
||||||
{
|
{
|
||||||
return ((addr & ~(THREAD_SIZE - 1)) ==
|
return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
|
||||||
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
|
||||||
*/
|
*/
|
||||||
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
|
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
|
||||||
{
|
{
|
||||||
unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
|
unsigned long *addr = (unsigned long *)regs->sp;
|
||||||
|
|
||||||
addr += n;
|
addr += n;
|
||||||
if (regs_within_kernel_stack(regs, (unsigned long)addr))
|
if (regs_within_kernel_stack(regs, (unsigned long)addr))
|
||||||
|
|
|
@ -78,7 +78,7 @@ static inline unsigned long *
|
||||||
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
|
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
if (regs)
|
if (regs)
|
||||||
return (unsigned long *)kernel_stack_pointer(regs);
|
return (unsigned long *)regs->sp;
|
||||||
|
|
||||||
if (task == current)
|
if (task == current)
|
||||||
return __builtin_frame_address(0);
|
return __builtin_frame_address(0);
|
||||||
|
|
|
@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
|
||||||
|
|
||||||
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
struct pt_regs fixed_regs;
|
|
||||||
|
|
||||||
if (!user_mode(regs)) {
|
|
||||||
crash_fixup_ss_esp(&fixed_regs, regs);
|
|
||||||
regs = &fixed_regs;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
crash_save_cpu(regs, cpu);
|
crash_save_cpu(regs, cpu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <asm/ftrace.h>
|
#include <asm/ftrace.h>
|
||||||
#include <asm/nospec-branch.h>
|
#include <asm/nospec-branch.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
#include <asm/asm-offsets.h>
|
||||||
|
|
||||||
# define function_hook __fentry__
|
# define function_hook __fentry__
|
||||||
EXPORT_SYMBOL(__fentry__)
|
EXPORT_SYMBOL(__fentry__)
|
||||||
|
@ -90,26 +91,38 @@ END(ftrace_caller)
|
||||||
|
|
||||||
ENTRY(ftrace_regs_caller)
|
ENTRY(ftrace_regs_caller)
|
||||||
/*
|
/*
|
||||||
* i386 does not save SS and ESP when coming from kernel.
|
* We're here from an mcount/fentry CALL, and the stack frame looks like:
|
||||||
* Instead, to get sp, ®s->sp is used (see ptrace.h).
|
*
|
||||||
* Unfortunately, that means eflags must be at the same location
|
* <previous context>
|
||||||
* as the current return ip is. We move the return ip into the
|
* RET-IP
|
||||||
* regs->ip location, and move flags into the return ip location.
|
*
|
||||||
|
* The purpose of this function is to call out in an emulated INT3
|
||||||
|
* environment with a stack frame like:
|
||||||
|
*
|
||||||
|
* <previous context>
|
||||||
|
* gap / RET-IP
|
||||||
|
* gap
|
||||||
|
* gap
|
||||||
|
* gap
|
||||||
|
* pt_regs
|
||||||
|
*
|
||||||
|
* We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
|
||||||
*/
|
*/
|
||||||
pushl $__KERNEL_CS
|
subl $3*4, %esp # RET-IP + 3 gaps
|
||||||
pushl 4(%esp) /* Save the return ip */
|
pushl %ss # ss
|
||||||
pushl $0 /* Load 0 into orig_ax */
|
pushl %esp # points at ss
|
||||||
|
addl $5*4, (%esp) # make it point at <previous context>
|
||||||
|
pushfl # flags
|
||||||
|
pushl $__KERNEL_CS # cs
|
||||||
|
pushl 7*4(%esp) # ip <- RET-IP
|
||||||
|
pushl $0 # orig_eax
|
||||||
|
|
||||||
pushl %gs
|
pushl %gs
|
||||||
pushl %fs
|
pushl %fs
|
||||||
pushl %es
|
pushl %es
|
||||||
pushl %ds
|
pushl %ds
|
||||||
|
|
||||||
pushl %eax
|
pushl %eax
|
||||||
|
|
||||||
/* Get flags and place them into the return ip slot */
|
|
||||||
pushf
|
|
||||||
popl %eax
|
|
||||||
movl %eax, 8*4(%esp)
|
|
||||||
|
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -119,24 +132,25 @@ ENTRY(ftrace_regs_caller)
|
||||||
|
|
||||||
ENCODE_FRAME_POINTER
|
ENCODE_FRAME_POINTER
|
||||||
|
|
||||||
movl 12*4(%esp), %eax /* Load ip (1st parameter) */
|
movl PT_EIP(%esp), %eax # 1st argument: IP
|
||||||
subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
|
subl $MCOUNT_INSN_SIZE, %eax
|
||||||
movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */
|
movl 21*4(%esp), %edx # 2nd argument: parent ip
|
||||||
movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
|
movl function_trace_op, %ecx # 3rd argument: ftrace_pos
|
||||||
pushl %esp /* Save pt_regs as 4th parameter */
|
pushl %esp # 4th argument: pt_regs
|
||||||
|
|
||||||
GLOBAL(ftrace_regs_call)
|
GLOBAL(ftrace_regs_call)
|
||||||
call ftrace_stub
|
call ftrace_stub
|
||||||
|
|
||||||
addl $4, %esp /* Skip pt_regs */
|
addl $4, %esp # skip 4th argument
|
||||||
|
|
||||||
/* restore flags */
|
/* place IP below the new SP */
|
||||||
push 14*4(%esp)
|
movl PT_OLDESP(%esp), %eax
|
||||||
popf
|
movl PT_EIP(%esp), %ecx
|
||||||
|
movl %ecx, -4(%eax)
|
||||||
|
|
||||||
/* Move return ip back to its original location */
|
/* place EAX below that */
|
||||||
movl 12*4(%esp), %eax
|
movl PT_EAX(%esp), %ecx
|
||||||
movl %eax, 14*4(%esp)
|
movl %ecx, -8(%eax)
|
||||||
|
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %ecx
|
popl %ecx
|
||||||
|
@ -144,14 +158,9 @@ GLOBAL(ftrace_regs_call)
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
popl %eax
|
|
||||||
popl %ds
|
|
||||||
popl %es
|
|
||||||
popl %fs
|
|
||||||
popl %gs
|
|
||||||
|
|
||||||
/* use lea to not affect flags */
|
lea -8(%eax), %esp
|
||||||
lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */
|
popl %eax
|
||||||
|
|
||||||
jmp .Lftrace_ret
|
jmp .Lftrace_ret
|
||||||
|
|
||||||
|
|
|
@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
switch (regno) {
|
switch (regno) {
|
||||||
case GDB_SS:
|
|
||||||
if (!user_mode(regs))
|
|
||||||
*(unsigned long *)mem = __KERNEL_DS;
|
|
||||||
break;
|
|
||||||
case GDB_SP:
|
|
||||||
if (!user_mode(regs))
|
|
||||||
*(unsigned long *)mem = kernel_stack_pointer(regs);
|
|
||||||
break;
|
|
||||||
case GDB_GS:
|
case GDB_GS:
|
||||||
case GDB_FS:
|
case GDB_FS:
|
||||||
*(unsigned long *)mem = 0xFFFF;
|
*(unsigned long *)mem = 0xFFFF;
|
||||||
|
|
|
@ -72,8 +72,8 @@
|
||||||
" popl %edi\n" \
|
" popl %edi\n" \
|
||||||
" popl %ebp\n" \
|
" popl %ebp\n" \
|
||||||
" popl %eax\n" \
|
" popl %eax\n" \
|
||||||
/* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
|
/* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
|
||||||
" addl $24, %esp\n"
|
" addl $7*4, %esp\n"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Ensure if the instruction can be boostable */
|
/* Ensure if the instruction can be boostable */
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||||
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||||
|
|
||||||
#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
|
#define stack_addr(regs) ((unsigned long *)regs->sp)
|
||||||
|
|
||||||
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
|
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
|
||||||
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
|
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
|
||||||
|
@ -718,29 +718,27 @@ asm(
|
||||||
".global kretprobe_trampoline\n"
|
".global kretprobe_trampoline\n"
|
||||||
".type kretprobe_trampoline, @function\n"
|
".type kretprobe_trampoline, @function\n"
|
||||||
"kretprobe_trampoline:\n"
|
"kretprobe_trampoline:\n"
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
/* We don't bother saving the ss register */
|
/* We don't bother saving the ss register */
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
" pushq %rsp\n"
|
" pushq %rsp\n"
|
||||||
" pushfq\n"
|
" pushfq\n"
|
||||||
SAVE_REGS_STRING
|
SAVE_REGS_STRING
|
||||||
" movq %rsp, %rdi\n"
|
" movq %rsp, %rdi\n"
|
||||||
" call trampoline_handler\n"
|
" call trampoline_handler\n"
|
||||||
/* Replace saved sp with true return address. */
|
/* Replace saved sp with true return address. */
|
||||||
" movq %rax, 152(%rsp)\n"
|
" movq %rax, 19*8(%rsp)\n"
|
||||||
RESTORE_REGS_STRING
|
RESTORE_REGS_STRING
|
||||||
" popfq\n"
|
" popfq\n"
|
||||||
#else
|
#else
|
||||||
" pushf\n"
|
" pushl %esp\n"
|
||||||
|
" pushfl\n"
|
||||||
SAVE_REGS_STRING
|
SAVE_REGS_STRING
|
||||||
" movl %esp, %eax\n"
|
" movl %esp, %eax\n"
|
||||||
" call trampoline_handler\n"
|
" call trampoline_handler\n"
|
||||||
/* Move flags to cs */
|
/* Replace saved sp with true return address. */
|
||||||
" movl 56(%esp), %edx\n"
|
" movl %eax, 15*4(%esp)\n"
|
||||||
" movl %edx, 52(%esp)\n"
|
|
||||||
/* Replace saved flags with true return address. */
|
|
||||||
" movl %eax, 56(%esp)\n"
|
|
||||||
RESTORE_REGS_STRING
|
RESTORE_REGS_STRING
|
||||||
" popf\n"
|
" popfl\n"
|
||||||
#endif
|
#endif
|
||||||
" ret\n"
|
" ret\n"
|
||||||
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
|
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
|
||||||
|
@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||||
INIT_HLIST_HEAD(&empty_rp);
|
INIT_HLIST_HEAD(&empty_rp);
|
||||||
kretprobe_hash_lock(current, &head, &flags);
|
kretprobe_hash_lock(current, &head, &flags);
|
||||||
/* fixup registers */
|
/* fixup registers */
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
regs->cs = __KERNEL_CS;
|
regs->cs = __KERNEL_CS;
|
||||||
/* On x86-64, we use pt_regs->sp for return address holder. */
|
#ifdef CONFIG_X86_32
|
||||||
frame_pointer = ®s->sp;
|
regs->cs |= get_kernel_rpl();
|
||||||
#else
|
|
||||||
regs->cs = __KERNEL_CS | get_kernel_rpl();
|
|
||||||
regs->gs = 0;
|
regs->gs = 0;
|
||||||
/* On x86-32, we use pt_regs->flags for return address holder. */
|
|
||||||
frame_pointer = ®s->flags;
|
|
||||||
#endif
|
#endif
|
||||||
|
/* We use pt_regs->sp for return address holder. */
|
||||||
|
frame_pointer = ®s->sp;
|
||||||
regs->ip = trampoline_address;
|
regs->ip = trampoline_address;
|
||||||
regs->orig_ax = ~0UL;
|
regs->orig_ax = ~0UL;
|
||||||
|
|
||||||
|
|
|
@ -102,14 +102,15 @@ asm (
|
||||||
"optprobe_template_call:\n"
|
"optprobe_template_call:\n"
|
||||||
ASM_NOP5
|
ASM_NOP5
|
||||||
/* Move flags to rsp */
|
/* Move flags to rsp */
|
||||||
" movq 144(%rsp), %rdx\n"
|
" movq 18*8(%rsp), %rdx\n"
|
||||||
" movq %rdx, 152(%rsp)\n"
|
" movq %rdx, 19*8(%rsp)\n"
|
||||||
RESTORE_REGS_STRING
|
RESTORE_REGS_STRING
|
||||||
/* Skip flags entry */
|
/* Skip flags entry */
|
||||||
" addq $8, %rsp\n"
|
" addq $8, %rsp\n"
|
||||||
" popfq\n"
|
" popfq\n"
|
||||||
#else /* CONFIG_X86_32 */
|
#else /* CONFIG_X86_32 */
|
||||||
" pushf\n"
|
" pushl %esp\n"
|
||||||
|
" pushfl\n"
|
||||||
SAVE_REGS_STRING
|
SAVE_REGS_STRING
|
||||||
" movl %esp, %edx\n"
|
" movl %esp, %edx\n"
|
||||||
".global optprobe_template_val\n"
|
".global optprobe_template_val\n"
|
||||||
|
@ -118,9 +119,13 @@ asm (
|
||||||
".global optprobe_template_call\n"
|
".global optprobe_template_call\n"
|
||||||
"optprobe_template_call:\n"
|
"optprobe_template_call:\n"
|
||||||
ASM_NOP5
|
ASM_NOP5
|
||||||
|
/* Move flags into esp */
|
||||||
|
" movl 14*4(%esp), %edx\n"
|
||||||
|
" movl %edx, 15*4(%esp)\n"
|
||||||
RESTORE_REGS_STRING
|
RESTORE_REGS_STRING
|
||||||
" addl $4, %esp\n" /* skip cs */
|
/* Skip flags entry */
|
||||||
" popf\n"
|
" addl $4, %esp\n"
|
||||||
|
" popfl\n"
|
||||||
#endif
|
#endif
|
||||||
".global optprobe_template_end\n"
|
".global optprobe_template_end\n"
|
||||||
"optprobe_template_end:\n"
|
"optprobe_template_end:\n"
|
||||||
|
@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
|
||||||
} else {
|
} else {
|
||||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||||
/* Save skipped registers */
|
/* Save skipped registers */
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
regs->cs = __KERNEL_CS;
|
regs->cs = __KERNEL_CS;
|
||||||
#else
|
#ifdef CONFIG_X86_32
|
||||||
regs->cs = __KERNEL_CS | get_kernel_rpl();
|
regs->cs |= get_kernel_rpl();
|
||||||
regs->gs = 0;
|
regs->gs = 0;
|
||||||
#endif
|
#endif
|
||||||
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
|
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
|
||||||
|
|
|
@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
|
||||||
{
|
{
|
||||||
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
||||||
unsigned long d0, d1, d2, d3, d6, d7;
|
unsigned long d0, d1, d2, d3, d6, d7;
|
||||||
unsigned long sp;
|
unsigned short gs;
|
||||||
unsigned short ss, gs;
|
|
||||||
|
|
||||||
if (user_mode(regs)) {
|
if (user_mode(regs))
|
||||||
sp = regs->sp;
|
|
||||||
ss = regs->ss;
|
|
||||||
gs = get_user_gs(regs);
|
gs = get_user_gs(regs);
|
||||||
} else {
|
else
|
||||||
sp = kernel_stack_pointer(regs);
|
|
||||||
savesegment(ss, ss);
|
|
||||||
savesegment(gs, gs);
|
savesegment(gs, gs);
|
||||||
}
|
|
||||||
|
|
||||||
show_ip(regs, KERN_DEFAULT);
|
show_ip(regs, KERN_DEFAULT);
|
||||||
|
|
||||||
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
|
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
|
||||||
regs->ax, regs->bx, regs->cx, regs->dx);
|
regs->ax, regs->bx, regs->cx, regs->dx);
|
||||||
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
|
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
|
||||||
regs->si, regs->di, regs->bp, sp);
|
regs->si, regs->di, regs->bp, regs->sp);
|
||||||
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
|
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
|
||||||
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
|
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
|
||||||
|
|
||||||
if (mode != SHOW_REGS_ALL)
|
if (mode != SHOW_REGS_ALL)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -154,35 +154,6 @@ static inline bool invalid_selector(u16 value)
|
||||||
|
|
||||||
#define FLAG_MASK FLAG_MASK_32
|
#define FLAG_MASK FLAG_MASK_32
|
||||||
|
|
||||||
/*
|
|
||||||
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
|
|
||||||
* when it traps. The previous stack will be directly underneath the saved
|
|
||||||
* registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'.
|
|
||||||
*
|
|
||||||
* Now, if the stack is empty, '®s->sp' is out of range. In this
|
|
||||||
* case we try to take the previous stack. To always return a non-null
|
|
||||||
* stack pointer we fall back to regs as stack if no previous stack
|
|
||||||
* exists.
|
|
||||||
*
|
|
||||||
* This is valid only for kernel mode traps.
|
|
||||||
*/
|
|
||||||
unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
|
|
||||||
unsigned long sp = (unsigned long)®s->sp;
|
|
||||||
u32 *prev_esp;
|
|
||||||
|
|
||||||
if (context == (sp & ~(THREAD_SIZE - 1)))
|
|
||||||
return sp;
|
|
||||||
|
|
||||||
prev_esp = (u32 *)(context);
|
|
||||||
if (*prev_esp)
|
|
||||||
return (unsigned long)*prev_esp;
|
|
||||||
|
|
||||||
return (unsigned long)regs;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(kernel_stack_pointer);
|
|
||||||
|
|
||||||
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
|
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
|
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
|
||||||
|
|
|
@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
|
||||||
#ifdef CONFIG_FRAME_POINTER
|
#ifdef CONFIG_FRAME_POINTER
|
||||||
return *(unsigned long *)(regs->bp + sizeof(long));
|
return *(unsigned long *)(regs->bp + sizeof(long));
|
||||||
#else
|
#else
|
||||||
unsigned long *sp =
|
unsigned long *sp = (unsigned long *)regs->sp;
|
||||||
(unsigned long *)kernel_stack_pointer(regs);
|
|
||||||
/*
|
/*
|
||||||
* Return address is either directly at stack pointer
|
* Return address is either directly at stack pointer
|
||||||
* or above a saved flags. Eflags has bits 22-31 zero,
|
* or above a saved flags. Eflags has bits 22-31 zero,
|
||||||
|
|
|
@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t regs_size(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
/* x86_32 regs from kernel mode are two words shorter: */
|
|
||||||
if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
|
|
||||||
return sizeof(*regs) - 2*sizeof(long);
|
|
||||||
|
|
||||||
return sizeof(*regs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool in_entry_code(unsigned long ip)
|
static bool in_entry_code(unsigned long ip)
|
||||||
{
|
{
|
||||||
char *addr = (char *)ip;
|
char *addr = (char *)ip;
|
||||||
|
@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
|
|
||||||
#else
|
|
||||||
#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static bool update_stack_state(struct unwind_state *state,
|
static bool update_stack_state(struct unwind_state *state,
|
||||||
unsigned long *next_bp)
|
unsigned long *next_bp)
|
||||||
{
|
{
|
||||||
|
@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
|
||||||
size_t len;
|
size_t len;
|
||||||
|
|
||||||
if (state->regs)
|
if (state->regs)
|
||||||
prev_frame_end = (void *)state->regs + regs_size(state->regs);
|
prev_frame_end = (void *)state->regs + sizeof(*state->regs);
|
||||||
else
|
else
|
||||||
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
|
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
|
||||||
|
|
||||||
|
@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
|
||||||
regs = decode_frame_pointer(next_bp);
|
regs = decode_frame_pointer(next_bp);
|
||||||
if (regs) {
|
if (regs) {
|
||||||
frame = (unsigned long *)regs;
|
frame = (unsigned long *)regs;
|
||||||
len = KERNEL_REGS_SIZE;
|
len = sizeof(*regs);
|
||||||
state->got_irq = true;
|
state->got_irq = true;
|
||||||
} else {
|
} else {
|
||||||
frame = next_bp;
|
frame = next_bp;
|
||||||
|
@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
|
||||||
frame < prev_frame_end)
|
frame < prev_frame_end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
|
||||||
* On 32-bit with user mode regs, make sure the last two regs are safe
|
|
||||||
* to access:
|
|
||||||
*/
|
|
||||||
if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
|
|
||||||
!on_stack(info, frame, len + 2*sizeof(long)))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Move state to the next frame: */
|
/* Move state to the next frame: */
|
||||||
if (regs) {
|
if (regs) {
|
||||||
state->regs = regs;
|
state->regs = regs;
|
||||||
|
@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||||
* Pretend that the frame is complete and that BP points to it, but save
|
* Pretend that the frame is complete and that BP points to it, but save
|
||||||
* the real BP so that we can use it when looking for the next frame.
|
* the real BP so that we can use it when looking for the next frame.
|
||||||
*/
|
*/
|
||||||
if (regs && regs->ip == 0 &&
|
if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
|
||||||
(unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
|
|
||||||
state->next_bp = bp;
|
state->next_bp = bp;
|
||||||
bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
|
bp = ((unsigned long *)regs->sp) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize stack info and make sure the frame data is accessible: */
|
/* Initialize stack info and make sure the frame data is accessible: */
|
||||||
|
|
|
@ -580,7 +580,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
state->ip = regs->ip;
|
state->ip = regs->ip;
|
||||||
state->sp = kernel_stack_pointer(regs);
|
state->sp = regs->sp;
|
||||||
state->bp = regs->bp;
|
state->bp = regs->bp;
|
||||||
state->regs = regs;
|
state->regs = regs;
|
||||||
state->full_regs = true;
|
state->full_regs = true;
|
||||||
|
|
Loading…
Reference in New Issue