x86: implement x86_32 stack protector

Impact: stack protector for x86_32

Implement stack protector for x86_32.  GDT entry 28 is used for it.
It's set to point to stack_canary-20 and have the length of 24 bytes.
CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs
to the stack canary segment on entry.  As %gs is otherwise unused by
the kernel, the canary can be anywhere.  It's defined as a percpu
variable.

x86_32 exception handlers take register frame on stack directly as
struct pt_regs.  With -fstack-protector turned on, gcc copies the
whole structure after the stack canary and (of course) doesn't copy
back on return thus losing all changed.  For now, -fno-stack-protector
is added to all files which contain those functions.  We definitely
need something better.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Tejun Heo 2009-02-09 22:17:40 +09:00 committed by Ingo Molnar
parent ccbeed3a05
commit 60a5317ff0
12 changed files with 180 additions and 16 deletions

View File

@ -209,7 +209,7 @@ config X86_TRAMPOLINE
config X86_32_LAZY_GS config X86_32_LAZY_GS
def_bool y def_bool y
depends on X86_32 depends on X86_32 && !CC_STACKPROTECTOR
config KTIME_SCALAR config KTIME_SCALAR
def_bool X86_32 def_bool X86_32
@ -1356,7 +1356,6 @@ config CC_STACKPROTECTOR_ALL
config CC_STACKPROTECTOR config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
depends on X86_64
select CC_STACKPROTECTOR_ALL select CC_STACKPROTECTOR_ALL
help help
This option turns on the -fstack-protector GCC feature. This This option turns on the -fstack-protector GCC feature. This

View File

@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(char *, irq_stack_ptr);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, stack_canary);
#endif #endif
#endif /* X86_64 */
extern void print_cpu_info(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int xstate_size; extern unsigned int xstate_size;

View File

@ -61,7 +61,7 @@
* *
* 26 - ESPFIX small SS * 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ] * 27 - per-cpu [ offset to per-cpu data area ]
* 28 - unused * 28 - stack_canary-20 [ for stack protector ]
* 29 - unused * 29 - unused
* 30 - unused * 30 - unused
* 31 - TSS for double fault handler * 31 - TSS for double fault handler
@ -95,6 +95,13 @@
#define __KERNEL_PERCPU 0 #define __KERNEL_PERCPU 0
#endif #endif
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16)
#ifdef CONFIG_CC_STACKPROTECTOR
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8)
#else
#define __KERNEL_STACK_CANARY 0
#endif
#define GDT_ENTRY_DOUBLEFAULT_TSS 31 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
/* /*

View File

@ -1,3 +1,35 @@
/*
* GCC stack protector support.
*
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
* and unfortunately gcc requires it to be at a fixed offset from %gs.
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
* and x86_32 use segment registers differently and thus handles this
* requirement differently.
*
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* irq_stack_union which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
* used for userland TLS. Unfortunately, some processors are much
* slower at loading segment registers with different value when
* entering and leaving the kernel, so the kernel uses %fs for percpu
* area and manages %gs lazily so that %gs is switched only when
* necessary, usually during task switch.
*
* As gcc requires the stack canary at %gs:20, %gs can't be managed
* lazily if stack protector is enabled, so the kernel saves and
* restores userland %gs on kernel entry and exit. This behavior is
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
* system.h to hide the details.
*/
#ifndef _ASM_STACKPROTECTOR_H #ifndef _ASM_STACKPROTECTOR_H
#define _ASM_STACKPROTECTOR_H 1 #define _ASM_STACKPROTECTOR_H 1
@ -6,8 +38,18 @@
#include <asm/tsc.h> #include <asm/tsc.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/system.h>
#include <asm/desc.h>
#include <linux/random.h> #include <linux/random.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
* can't handle the address bit shifting. Address will be set in
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
/* /*
* Initialize the stackprotector canary value. * Initialize the stackprotector canary value.
* *
@ -19,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void)
u64 canary; u64 canary;
u64 tsc; u64 tsc;
/* #ifdef CONFIG_X86_64
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/* /*
* We both use the random pool and the current TSC as a source * We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init, * of randomness. The TSC only matters for very early init,
@ -36,7 +75,49 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL); canary += tsc + (tsc << 32UL);
current->stack_canary = canary; current->stack_canary = canary;
#ifdef CONFIG_X86_64
percpu_write(irq_stack_union.stack_canary, canary); percpu_write(irq_stack_union.stack_canary, canary);
#else
percpu_write(stack_canary, canary);
#endif
}
static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
desc.base0 = canary & 0xffff;
desc.base1 = (canary >> 16) & 0xff;
desc.base2 = (canary >> 24) & 0xff;
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
#endif
}
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
#endif
}
#else /* CC_STACKPROTECTOR */
#define GDT_STACK_CANARY_INIT
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
static inline void setup_stack_canary_segment(int cpu)
{ }
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif
} }
#endif /* CC_STACKPROTECTOR */ #endif /* CC_STACKPROTECTOR */

View File

@ -23,6 +23,22 @@ struct task_struct *__switch_to(struct task_struct *prev,
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
"movl "__percpu_arg([current_task])",%%ebx\n\t" \
"movl %P[task_canary](%%ebx),%%ebx\n\t" \
"movl %%ebx,"__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
, [stack_canary] "=m" (per_cpu_var(stack_canary))
#define __switch_canary_iparam \
, [current_task] "m" (per_cpu_var(current_task)) \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
#define __switch_canary
#define __switch_canary_oparam
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
/* /*
* Saving eflags is important. It switches not only IOPL between tasks, * Saving eflags is important. It switches not only IOPL between tasks,
* it also protects other tasks from NT leaking through sysenter etc. * it also protects other tasks from NT leaking through sysenter etc.
@ -46,6 +62,7 @@ do { \
"pushl %[next_ip]\n\t" /* restore EIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \
"jmp __switch_to\n" /* regparm call */ \ "jmp __switch_to\n" /* regparm call */ \
"1:\t" \ "1:\t" \
__switch_canary \
"popl %%ebp\n\t" /* restore EBP */ \ "popl %%ebp\n\t" /* restore EBP */ \
"popfl\n" /* restore flags */ \ "popfl\n" /* restore flags */ \
\ \
@ -58,6 +75,8 @@ do { \
"=b" (ebx), "=c" (ecx), "=d" (edx), \ "=b" (ebx), "=c" (ecx), "=d" (edx), \
"=S" (esi), "=D" (edi) \ "=S" (esi), "=D" (edi) \
\ \
__switch_canary_oparam \
\
/* input parameters: */ \ /* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \ : [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \ [next_ip] "m" (next->thread.ip), \
@ -66,6 +85,8 @@ do { \
[prev] "a" (prev), \ [prev] "a" (prev), \
[next] "d" (next) \ [next] "d" (next) \
\ \
__switch_canary_iparam \
\
: /* reloaded segment registers */ \ : /* reloaded segment registers */ \
"memory"); \ "memory"); \
} while (0) } while (0)

View File

@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp) CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp) CFLAGS_tsc.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp)
#
# On x86_32, register frame is passed verbatim on stack as struct
# pt_regs. gcc considers the parameter to belong to the callee and
# with -fstack-protector it copies pt_regs to the callee's stack frame
# to put the structure after the stack canary causing changes made by
# the exception handlers to be lost. Turn off stack protector for all
# files containing functions which take struct pt_regs from register
# frame.
#
# The proper way to fix this is to teach gcc that the argument belongs
# to the caller for these functions, oh well...
#
ifdef CONFIG_X86_32
CFLAGS_process_32.o := $(nostackp)
CFLAGS_vm86_32.o := $(nostackp)
CFLAGS_signal.o := $(nostackp)
CFLAGS_traps.o := $(nostackp)
endif
obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o

View File

@ -39,6 +39,7 @@
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/stackprotector.h>
#include "cpu.h" #include "cpu.h"
@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT
#endif #endif
} }; } };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@ -261,6 +263,7 @@ void load_percpu_segment(int cpu)
loadsegment(gs, 0); loadsegment(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif #endif
load_stack_canary_segment();
} }
/* Current gdt points %fs at the "master" per-cpu area: after this, /* Current gdt points %fs at the "master" per-cpu area: after this,
@ -946,16 +949,21 @@ unsigned long kernel_eflags;
*/ */
DEFINE_PER_CPU(struct orig_ist, orig_ist); DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else #else /* x86_64 */
/* Make sure %fs is initialized properly in idle threads */ #ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{ {
memset(regs, 0, sizeof(struct pt_regs)); memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU; regs->fs = __KERNEL_PERCPU;
regs->gs = __KERNEL_STACK_CANARY;
return regs; return regs;
} }
#endif #endif /* x86_64 */
/* /*
* cpu_init() initializes state that is per-CPU. Some data is already * cpu_init() initializes state that is per-CPU. Some data is already
@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void)
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif #endif
/* Clear %gs. */
asm volatile ("mov %0, %%gs" : : "r" (0));
/* Clear all 6 debug registers: */ /* Clear all 6 debug registers: */
set_debugreg(0, 0); set_debugreg(0, 0);
set_debugreg(0, 1); set_debugreg(0, 1);

View File

@ -186,7 +186,7 @@
/*CFI_REL_OFFSET gs, PT_GS*/ /*CFI_REL_OFFSET gs, PT_GS*/
.endm .endm
.macro SET_KERNEL_GS reg .macro SET_KERNEL_GS reg
xorl \reg, \reg movl $(__KERNEL_STACK_CANARY), \reg
movl \reg, %gs movl \reg, %gs
.endm .endm

View File

@ -19,6 +19,7 @@
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/percpu.h>
/* Physical address */ /* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET) #define pa(X) ((X) - __PAGE_OFFSET)
@ -437,8 +438,25 @@ is386: movl $2,%ecx # set MP
movl $(__KERNEL_PERCPU), %eax movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu movl %eax,%fs # set this cpu's percpu
xorl %eax,%eax # Clear GS and LDT #ifdef CONFIG_CC_STACKPROTECTOR
/*
* The linker can't handle this by relocation. Manually set
* base address in stack canary segment descriptor.
*/
cmpb $0,ready
jne 1f
movl $per_cpu__gdt_page,%eax
movl $per_cpu__stack_canary,%ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
1:
#endif
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs movl %eax,%gs
xorl %eax,%eax # Clear LDT
lldt %ax lldt %ax
cld # gcc2 wants the direction flag cleared at all times cld # gcc2 wants the direction flag cleared at all times

View File

@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.ds = __USER_DS; regs.ds = __USER_DS;
regs.es = __USER_DS; regs.es = __USER_DS;
regs.fs = __KERNEL_PERCPU; regs.fs = __KERNEL_PERCPU;
regs.gs = __KERNEL_STACK_CANARY;
regs.orig_ax = -1; regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper; regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl(); regs.cs = __KERNEL_CS | get_kernel_rpl();

View File

@ -16,6 +16,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/cpumask.h> #include <asm/cpumask.h>
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/stackprotector.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS #ifdef CONFIG_DEBUG_PER_CPU_MAPS
# define DBG(x...) printk(KERN_DEBUG x) # define DBG(x...) printk(KERN_DEBUG x)
@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void)
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu; per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu); setup_percpu_segment(cpu);
setup_stack_canary_segment(cpu);
/* /*
* Copy data used in early init routines from the * Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These * initial arrays to the per cpu data areas. These

View File

@ -0,0 +1,8 @@
#!/bin/sh
echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
if [ "$?" -eq "0" ] ; then
echo y
else
echo n
fi