x86: make lazy %gs optional on x86_32
Impact: pt_regs changed, lazy gs handling made optional, add slight overhead to SAVE_ALL, simplifies error_code path a bit On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs doesn't have place for it and gs is saved/loaded only when necessary. In preparation for stack protector support, this patch makes lazy %gs handling optional by doing the followings. * Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs. * Save and restore %gs along with other registers in entry_32.S unless LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL even when LAZY_GS. However, it adds no overhead to common exit path and simplifies entry path with error code. * Define different user_gs accessors depending on LAZY_GS and add lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The lazy_*_gs() ops are used to save, load and clear %gs lazily. * Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly. xen and lguest changes need to be verified. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
d9a89a26e0
commit
ccbeed3a05
|
@ -207,6 +207,10 @@ config X86_TRAMPOLINE
|
|||
depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
|
||||
default y
|
||||
|
||||
config X86_32_LAZY_GS
|
||||
def_bool y
|
||||
depends on X86_32
|
||||
|
||||
config KTIME_SCALAR
|
||||
def_bool X86_32
|
||||
source "init/Kconfig"
|
||||
|
|
|
@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
|
|||
* now struct_user_regs, they are different)
|
||||
*/
|
||||
|
||||
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
|
||||
#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
|
||||
do { \
|
||||
pr_reg[0] = regs->bx; \
|
||||
pr_reg[1] = regs->cx; \
|
||||
|
@ -124,7 +124,6 @@ do { \
|
|||
pr_reg[7] = regs->ds & 0xffff; \
|
||||
pr_reg[8] = regs->es & 0xffff; \
|
||||
pr_reg[9] = regs->fs & 0xffff; \
|
||||
pr_reg[10] = get_user_gs(regs); \
|
||||
pr_reg[11] = regs->orig_ax; \
|
||||
pr_reg[12] = regs->ip; \
|
||||
pr_reg[13] = regs->cs & 0xffff; \
|
||||
|
@ -133,6 +132,18 @@ do { \
|
|||
pr_reg[16] = regs->ss & 0xffff; \
|
||||
} while (0);
|
||||
|
||||
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
|
||||
do { \
|
||||
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
|
||||
pr_reg[10] = get_user_gs(regs); \
|
||||
} while (0);
|
||||
|
||||
#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
|
||||
do { \
|
||||
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
|
||||
savesegment(gs, pr_reg[10]); \
|
||||
} while (0);
|
||||
|
||||
#define ELF_PLATFORM (utsname()->machine)
|
||||
#define set_personality_64bit() do { } while (0)
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ do { \
|
|||
#ifdef CONFIG_X86_32
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
do { \
|
||||
set_user_gs(task_pt_regs(tsk), 0); \
|
||||
lazy_load_gs(0); \
|
||||
} while (0)
|
||||
#else
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
|
|
|
@ -28,7 +28,7 @@ struct pt_regs {
|
|||
int xds;
|
||||
int xes;
|
||||
int xfs;
|
||||
/* int gs; */
|
||||
int xgs;
|
||||
long orig_eax;
|
||||
long eip;
|
||||
int xcs;
|
||||
|
@ -50,7 +50,7 @@ struct pt_regs {
|
|||
unsigned long ds;
|
||||
unsigned long es;
|
||||
unsigned long fs;
|
||||
/* int gs; */
|
||||
unsigned long gs;
|
||||
unsigned long orig_ax;
|
||||
unsigned long ip;
|
||||
unsigned long cs;
|
||||
|
|
|
@ -186,10 +186,20 @@ extern void native_load_gs_index(unsigned);
|
|||
* x86_32 user gs accessors.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
|
||||
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
|
||||
#define task_user_gs(tsk) ((tsk)->thread.gs)
|
||||
#endif
|
||||
#define lazy_save_gs(v) savesegment(gs, (v))
|
||||
#define lazy_load_gs(v) loadsegment(gs, (v))
|
||||
#else /* X86_32_LAZY_GS */
|
||||
#define get_user_gs(regs) (u16)((regs)->gs)
|
||||
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
|
||||
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
|
||||
#define lazy_save_gs(v) do { } while (0)
|
||||
#define lazy_load_gs(v) do { } while (0)
|
||||
#endif /* X86_32_LAZY_GS */
|
||||
#endif /* X86_32 */
|
||||
|
||||
static inline unsigned long get_limit(unsigned long segment)
|
||||
{
|
||||
|
|
|
@ -75,6 +75,7 @@ void foo(void)
|
|||
OFFSET(PT_DS, pt_regs, ds);
|
||||
OFFSET(PT_ES, pt_regs, es);
|
||||
OFFSET(PT_FS, pt_regs, fs);
|
||||
OFFSET(PT_GS, pt_regs, gs);
|
||||
OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
|
||||
OFFSET(PT_EIP, pt_regs, ip);
|
||||
OFFSET(PT_CS, pt_regs, cs);
|
||||
|
|
|
@ -30,12 +30,13 @@
|
|||
* 1C(%esp) - %ds
|
||||
* 20(%esp) - %es
|
||||
* 24(%esp) - %fs
|
||||
* 28(%esp) - orig_eax
|
||||
* 2C(%esp) - %eip
|
||||
* 30(%esp) - %cs
|
||||
* 34(%esp) - %eflags
|
||||
* 38(%esp) - %oldesp
|
||||
* 3C(%esp) - %oldss
|
||||
* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
|
||||
* 2C(%esp) - orig_eax
|
||||
* 30(%esp) - %eip
|
||||
* 34(%esp) - %cs
|
||||
* 38(%esp) - %eflags
|
||||
* 3C(%esp) - %oldesp
|
||||
* 40(%esp) - %oldss
|
||||
*
|
||||
* "current" is in register %ebx during any slow entries.
|
||||
*/
|
||||
|
@ -101,8 +102,99 @@
|
|||
#define resume_userspace_sig resume_userspace
|
||||
#endif
|
||||
|
||||
/*
|
||||
* User gs save/restore
|
||||
*
|
||||
* %gs is used for userland TLS and kernel only uses it for stack
|
||||
* canary which is required to be at %gs:20 by gcc. Read the comment
|
||||
* at the top of stackprotector.h for more info.
|
||||
*
|
||||
* Local labels 98 and 99 are used.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
|
||||
/* unfortunately push/pop can't be no-op */
|
||||
.macro PUSH_GS
|
||||
pushl $0
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
.endm
|
||||
.macro POP_GS pop=0
|
||||
addl $(4 + \pop), %esp
|
||||
CFI_ADJUST_CFA_OFFSET -(4 + \pop)
|
||||
.endm
|
||||
.macro POP_GS_EX
|
||||
.endm
|
||||
|
||||
/* all the rest are no-op */
|
||||
.macro PTGS_TO_GS
|
||||
.endm
|
||||
.macro PTGS_TO_GS_EX
|
||||
.endm
|
||||
.macro GS_TO_REG reg
|
||||
.endm
|
||||
.macro REG_TO_PTGS reg
|
||||
.endm
|
||||
.macro SET_KERNEL_GS reg
|
||||
.endm
|
||||
|
||||
#else /* CONFIG_X86_32_LAZY_GS */
|
||||
|
||||
.macro PUSH_GS
|
||||
pushl %gs
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET gs, 0*/
|
||||
.endm
|
||||
|
||||
.macro POP_GS pop=0
|
||||
98: popl %gs
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
/*CFI_RESTORE gs*/
|
||||
.if \pop <> 0
|
||||
add $\pop, %esp
|
||||
CFI_ADJUST_CFA_OFFSET -\pop
|
||||
.endif
|
||||
.endm
|
||||
.macro POP_GS_EX
|
||||
.pushsection .fixup, "ax"
|
||||
99: movl $0, (%esp)
|
||||
jmp 98b
|
||||
.section __ex_table, "a"
|
||||
.align 4
|
||||
.long 98b, 99b
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
.macro PTGS_TO_GS
|
||||
98: mov PT_GS(%esp), %gs
|
||||
.endm
|
||||
.macro PTGS_TO_GS_EX
|
||||
.pushsection .fixup, "ax"
|
||||
99: movl $0, PT_GS(%esp)
|
||||
jmp 98b
|
||||
.section __ex_table, "a"
|
||||
.align 4
|
||||
.long 98b, 99b
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
.macro GS_TO_REG reg
|
||||
movl %gs, \reg
|
||||
/*CFI_REGISTER gs, \reg*/
|
||||
.endm
|
||||
.macro REG_TO_PTGS reg
|
||||
movl \reg, PT_GS(%esp)
|
||||
/*CFI_REL_OFFSET gs, PT_GS*/
|
||||
.endm
|
||||
.macro SET_KERNEL_GS reg
|
||||
xorl \reg, \reg
|
||||
movl \reg, %gs
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_X86_32_LAZY_GS */
|
||||
|
||||
.macro SAVE_ALL
|
||||
cld
|
||||
PUSH_GS
|
||||
pushl %fs
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET fs, 0;*/
|
||||
|
@ -138,6 +230,7 @@
|
|||
movl %edx, %es
|
||||
movl $(__KERNEL_PERCPU), %edx
|
||||
movl %edx, %fs
|
||||
SET_KERNEL_GS %edx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_INT_REGS
|
||||
|
@ -164,7 +257,7 @@
|
|||
CFI_RESTORE eax
|
||||
.endm
|
||||
|
||||
.macro RESTORE_REGS
|
||||
.macro RESTORE_REGS pop=0
|
||||
RESTORE_INT_REGS
|
||||
1: popl %ds
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
|
@ -175,6 +268,7 @@
|
|||
3: popl %fs
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
/*CFI_RESTORE fs;*/
|
||||
POP_GS \pop
|
||||
.pushsection .fixup, "ax"
|
||||
4: movl $0, (%esp)
|
||||
jmp 1b
|
||||
|
@ -188,6 +282,7 @@
|
|||
.long 2b, 5b
|
||||
.long 3b, 6b
|
||||
.popsection
|
||||
POP_GS_EX
|
||||
.endm
|
||||
|
||||
.macro RING0_INT_FRAME
|
||||
|
@ -368,6 +463,7 @@ sysenter_exit:
|
|||
xorl %ebp,%ebp
|
||||
TRACE_IRQS_ON
|
||||
1: mov PT_FS(%esp), %fs
|
||||
PTGS_TO_GS
|
||||
ENABLE_INTERRUPTS_SYSEXIT
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
|
@ -416,6 +512,7 @@ sysexit_audit:
|
|||
.align 4
|
||||
.long 1b,2b
|
||||
.popsection
|
||||
PTGS_TO_GS_EX
|
||||
ENDPROC(ia32_sysenter_target)
|
||||
|
||||
# system call handler stub
|
||||
|
@ -458,8 +555,7 @@ restore_all:
|
|||
restore_nocheck:
|
||||
TRACE_IRQS_IRET
|
||||
restore_nocheck_notrace:
|
||||
RESTORE_REGS
|
||||
addl $4, %esp # skip orig_eax/error_code
|
||||
RESTORE_REGS 4 # skip orig_eax/error_code
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
irq_return:
|
||||
INTERRUPT_RETURN
|
||||
|
@ -1078,7 +1174,10 @@ ENTRY(page_fault)
|
|||
CFI_ADJUST_CFA_OFFSET 4
|
||||
ALIGN
|
||||
error_code:
|
||||
/* the function address is in %fs's slot on the stack */
|
||||
/* the function address is in %gs's slot on the stack */
|
||||
pushl %fs
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET fs, 0*/
|
||||
pushl %es
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET es, 0*/
|
||||
|
@ -1107,20 +1206,15 @@ error_code:
|
|||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
cld
|
||||
pushl %fs
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET fs, 0*/
|
||||
movl $(__KERNEL_PERCPU), %ecx
|
||||
movl %ecx, %fs
|
||||
UNWIND_ESPFIX_STACK
|
||||
popl %ecx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
/*CFI_REGISTER es, ecx*/
|
||||
movl PT_FS(%esp), %edi # get the function address
|
||||
GS_TO_REG %ecx
|
||||
movl PT_GS(%esp), %edi # get the function address
|
||||
movl PT_ORIG_EAX(%esp), %edx # get the error code
|
||||
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
|
||||
mov %ecx, PT_FS(%esp)
|
||||
/*CFI_REL_OFFSET fs, ES*/
|
||||
REG_TO_PTGS %ecx
|
||||
SET_KERNEL_GS %ecx
|
||||
movl $(__USER_DS), %ecx
|
||||
movl %ecx, %ds
|
||||
movl %ecx, %es
|
||||
|
|
|
@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
* used %fs or %gs (it does not today), or if the kernel is
|
||||
* running inside of a hypervisor layer.
|
||||
*/
|
||||
savesegment(gs, prev->gs);
|
||||
lazy_save_gs(prev->gs);
|
||||
|
||||
/*
|
||||
* Load the per-thread Thread-Local Storage descriptor.
|
||||
|
@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
* Restore %gs if needed (which is common)
|
||||
*/
|
||||
if (prev->gs | next->gs)
|
||||
loadsegment(gs, next->gs);
|
||||
lazy_load_gs(next->gs);
|
||||
|
||||
percpu_write(current_task, next_p);
|
||||
|
||||
|
|
|
@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
|
|||
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
|
||||
{
|
||||
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
|
||||
regno >>= 2;
|
||||
if (regno > FS)
|
||||
--regno;
|
||||
return ®s->bx + regno;
|
||||
return ®s->bx + (regno >> 2);
|
||||
}
|
||||
|
||||
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
|
||||
|
|
|
@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
|
|||
/* There's one problem which normal hardware doesn't have: the Host
|
||||
* can't handle us removing entries we're currently using. So we clear
|
||||
* the GS register here: if it's needed it'll be reloaded anyway. */
|
||||
loadsegment(gs, 0);
|
||||
lazy_load_gs(0);
|
||||
lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t,
|
|||
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* XXX sleazy hack: If we're being called in a lazy-cpu zone,
|
||||
* it means we're in a context switch, and %gs has just been
|
||||
* saved. This means we can zero it out to prevent faults on
|
||||
* exit from the hypervisor if the next process has no %gs.
|
||||
* Either way, it has been saved, and the new value will get
|
||||
* loaded properly. This will go away as soon as Xen has been
|
||||
* modified to not save/restore %gs for normal hypercalls.
|
||||
* XXX sleazy hack: If we're being called in a lazy-cpu zone
|
||||
* and lazy gs handling is enabled, it means we're in a
|
||||
* context switch, and %gs has just been saved. This means we
|
||||
* can zero it out to prevent faults on exit from the
|
||||
* hypervisor if the next process has no %gs. Either way, it
|
||||
* has been saved, and the new value will get loaded properly.
|
||||
* This will go away as soon as Xen has been modified to not
|
||||
* save/restore %gs for normal hypercalls.
|
||||
*
|
||||
* On x86_64, this hack is not used for %gs, because gs points
|
||||
* to KERNEL_GS_BASE (and uses it for PDA references), so we
|
||||
|
@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
|||
*/
|
||||
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
|
||||
#ifdef CONFIG_X86_32
|
||||
loadsegment(gs, 0);
|
||||
lazy_load_gs(0);
|
||||
#else
|
||||
loadsegment(fs, 0);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue