2018-08-17 06:16:58 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* This code fills the used part of the kernel stack with a poison value
|
|
|
|
* before returning to userspace. It's part of the STACKLEAK feature
|
|
|
|
* ported from grsecurity/PaX.
|
|
|
|
*
|
|
|
|
* Author: Alexander Popov <alex.popov@linux.com>
|
|
|
|
*
|
|
|
|
* STACKLEAK reduces the information which kernel stack leak bugs can
|
|
|
|
* reveal and blocks some uninitialized stack variable attacks.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/stackleak.h>
|
2018-11-13 05:08:48 +08:00
|
|
|
#include <linux/kprobes.h>
|
2018-08-17 06:16:58 +08:00
|
|
|
|
2018-08-17 06:17:03 +08:00
|
|
|
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
|
|
|
|
#include <linux/jump_label.h>
|
|
|
|
#include <linux/sysctl.h>
|
|
|
|
|
|
|
|
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
|
|
|
|
|
|
|
|
int stack_erasing_sysctl(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int state = !static_branch_unlikely(&stack_erasing_bypass);
|
|
|
|
int prev_state = state;
|
|
|
|
|
|
|
|
table->data = &state;
|
|
|
|
table->maxlen = sizeof(int);
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
state = !!state;
|
|
|
|
if (ret || !write || state == prev_state)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (state)
|
|
|
|
static_branch_disable(&stack_erasing_bypass);
|
|
|
|
else
|
|
|
|
static_branch_enable(&stack_erasing_bypass);
|
|
|
|
|
|
|
|
pr_warn("stackleak: kernel stack erasing is %s\n",
|
|
|
|
state ? "enabled" : "disabled");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
|
|
|
|
#else
|
|
|
|
#define skip_erasing() false
|
|
|
|
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
|
|
|
|
|
2018-11-13 05:08:48 +08:00
|
|
|
asmlinkage void notrace stackleak_erase(void)
|
2018-08-17 06:16:58 +08:00
|
|
|
{
|
|
|
|
/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
|
|
|
|
unsigned long kstack_ptr = current->lowest_stack;
|
|
|
|
unsigned long boundary = (unsigned long)end_of_stack(current);
|
|
|
|
unsigned int poison_count = 0;
|
|
|
|
const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
|
|
|
|
|
2018-08-17 06:17:03 +08:00
|
|
|
if (skip_erasing())
|
|
|
|
return;
|
|
|
|
|
2018-08-17 06:16:58 +08:00
|
|
|
/* Check that 'lowest_stack' value is sane */
|
|
|
|
if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
|
|
|
|
kstack_ptr = boundary;
|
|
|
|
|
|
|
|
/* Search for the poison value in the kernel stack */
|
|
|
|
while (kstack_ptr > boundary && poison_count <= depth) {
|
|
|
|
if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
|
|
|
|
poison_count++;
|
|
|
|
else
|
|
|
|
poison_count = 0;
|
|
|
|
|
|
|
|
kstack_ptr -= sizeof(unsigned long);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* One 'long int' at the bottom of the thread stack is reserved and
|
|
|
|
* should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
|
|
|
|
*/
|
|
|
|
if (kstack_ptr == boundary)
|
|
|
|
kstack_ptr += sizeof(unsigned long);
|
|
|
|
|
2018-08-17 06:17:01 +08:00
|
|
|
#ifdef CONFIG_STACKLEAK_METRICS
|
|
|
|
current->prev_lowest_stack = kstack_ptr;
|
|
|
|
#endif
|
|
|
|
|
2018-08-17 06:16:58 +08:00
|
|
|
/*
|
|
|
|
* Now write the poison value to the kernel stack. Start from
|
|
|
|
* 'kstack_ptr' and move up till the new 'boundary'. We assume that
|
|
|
|
* the stack pointer doesn't change when we write poison.
|
|
|
|
*/
|
|
|
|
if (on_thread_stack())
|
|
|
|
boundary = current_stack_pointer;
|
|
|
|
else
|
|
|
|
boundary = current_top_of_stack();
|
|
|
|
|
|
|
|
while (kstack_ptr < boundary) {
|
|
|
|
*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
|
|
|
|
kstack_ptr += sizeof(unsigned long);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset the 'lowest_stack' value for the next syscall */
|
|
|
|
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
|
|
|
|
}
|
2018-11-13 05:08:48 +08:00
|
|
|
NOKPROBE_SYMBOL(stackleak_erase);
|
2018-08-17 06:16:58 +08:00
|
|
|
|
stackleak: Mark stackleak_track_stack() as notrace
Function graph tracing recurses into itself when stackleak is enabled,
causing the ftrace graph selftest to run for up to 90 seconds and
trigger the softlockup watchdog.
Breakpoint 2, ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:200
200 mcount_get_lr_addr x0 // pointer to function's saved lr
(gdb) bt
\#0 ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:200
\#1 0xffffff80081d5280 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:153
\#2 0xffffff8008555484 in stackleak_track_stack () at ../kernel/stackleak.c:106
\#3 0xffffff8008421ff8 in ftrace_ops_test (ops=0xffffff8009eaa840 <graph_ops>, ip=18446743524091297036, regs=<optimized out>) at ../kernel/trace/ftrace.c:1507
\#4 0xffffff8008428770 in __ftrace_ops_list_func (regs=<optimized out>, ignored=<optimized out>, parent_ip=<optimized out>, ip=<optimized out>) at ../kernel/trace/ftrace.c:6286
\#5 ftrace_ops_no_ops (ip=18446743524091297036, parent_ip=18446743524091242824) at ../kernel/trace/ftrace.c:6321
\#6 0xffffff80081d5280 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:153
\#7 0xffffff800832fd10 in irq_find_mapping (domain=0xffffffc03fc4bc80, hwirq=27) at ../kernel/irq/irqdomain.c:876
\#8 0xffffff800832294c in __handle_domain_irq (domain=0xffffffc03fc4bc80, hwirq=27, lookup=true, regs=0xffffff800814b840) at ../kernel/irq/irqdesc.c:650
\#9 0xffffff80081d52b4 in ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:205
Rework so we mark stackleak_track_stack as notrace
Co-developed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-11-30 23:08:59 +08:00
|
|
|
void __used notrace stackleak_track_stack(void)
|
2018-08-17 06:16:59 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* N.B. stackleak_erase() fills the kernel stack with the poison value,
|
|
|
|
* which has the register width. That code assumes that the value
|
|
|
|
* of 'lowest_stack' is aligned on the register width boundary.
|
|
|
|
*
|
|
|
|
* That is true for x86 and x86_64 because of the kernel stack
|
|
|
|
* alignment on these platforms (for details, see 'cc_stack_align' in
|
|
|
|
* arch/x86/Makefile). Take care of that when you port STACKLEAK to
|
|
|
|
* new platforms.
|
|
|
|
*/
|
|
|
|
unsigned long sp = (unsigned long)&sp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
|
|
|
|
* STACKLEAK_SEARCH_DEPTH makes the poison search in
|
|
|
|
* stackleak_erase() unreliable. Let's prevent that.
|
|
|
|
*/
|
|
|
|
BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
|
|
|
|
|
|
|
|
if (sp < current->lowest_stack &&
|
|
|
|
sp >= (unsigned long)task_stack_page(current) +
|
|
|
|
sizeof(unsigned long)) {
|
|
|
|
current->lowest_stack = sp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(stackleak_track_stack);
|