Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86-64: move clts into batch cpu state updates when preloading fpu x86-64: move unlazy_fpu() into lazy cpu state part of context switch x86-32: make sure clts is batched during context switch x86: split out core __math_state_restore
This commit is contained in:
commit
625037cc40
|
@ -26,6 +26,7 @@ extern void fpu_init(void);
|
|||
extern void mxcsr_feature_mask_init(void);
|
||||
extern int init_fpu(struct task_struct *child);
|
||||
extern asmlinkage void math_state_restore(void);
|
||||
extern void __math_state_restore(void);
|
||||
extern void init_thread_xstate(void);
|
||||
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
|
||||
|
||||
|
|
|
@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
*next = &next_p->thread;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
bool preload_fpu;
|
||||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
/*
|
||||
* If the task has used fpu the last 5 timeslices, just do a full
|
||||
* restore of the math state immediately to avoid the trap; the
|
||||
* chances of needing FPU soon are obviously high now
|
||||
*/
|
||||
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
|
||||
|
||||
__unlazy_fpu(prev_p);
|
||||
|
||||
|
||||
/* we're going to use this soon, after a few expensive things */
|
||||
if (next_p->fpu_counter > 5)
|
||||
if (preload_fpu)
|
||||
prefetch(next->xstate);
|
||||
|
||||
/*
|
||||
|
@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
|
||||
__switch_to_xtra(prev_p, next_p, tss);
|
||||
|
||||
/* If we're going to preload the fpu context, make sure clts
|
||||
is run while we're batching the cpu state updates. */
|
||||
if (preload_fpu)
|
||||
clts();
|
||||
|
||||
/*
|
||||
* Leave lazy mode, flushing any hypercalls made here.
|
||||
* This must be done before restoring TLS segments so
|
||||
|
@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
*/
|
||||
arch_end_context_switch(next_p);
|
||||
|
||||
/* If the task has used fpu the last 5 timeslices, just do a full
|
||||
* restore of the math state immediately to avoid the trap; the
|
||||
* chances of needing FPU soon are obviously high now
|
||||
*
|
||||
* tsk_used_math() checks prevent calling math_state_restore(),
|
||||
* which can sleep in the case of !tsk_used_math()
|
||||
*/
|
||||
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
|
||||
math_state_restore();
|
||||
if (preload_fpu)
|
||||
__math_state_restore();
|
||||
|
||||
/*
|
||||
* Restore %gs if needed (which is common)
|
||||
|
|
|
@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
unsigned fsindex, gsindex;
|
||||
bool preload_fpu;
|
||||
|
||||
/*
|
||||
* If the task has used fpu the last 5 timeslices, just do a full
|
||||
* restore of the math state immediately to avoid the trap; the
|
||||
* chances of needing FPU soon are obviously high now
|
||||
*/
|
||||
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
|
||||
|
||||
/* we're going to use this soon, after a few expensive things */
|
||||
if (next_p->fpu_counter > 5)
|
||||
if (preload_fpu)
|
||||
prefetch(next->xstate);
|
||||
|
||||
/*
|
||||
|
@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
|
||||
load_TLS(next, cpu);
|
||||
|
||||
/* Must be after DS reload */
|
||||
unlazy_fpu(prev_p);
|
||||
|
||||
/* Make sure cpu is ready for new context */
|
||||
if (preload_fpu)
|
||||
clts();
|
||||
|
||||
/*
|
||||
* Leave lazy mode, flushing any hypercalls made here.
|
||||
* This must be done before restoring TLS segments so
|
||||
|
@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
|
||||
prev->gsindex = gsindex;
|
||||
|
||||
/* Must be after DS reload */
|
||||
unlazy_fpu(prev_p);
|
||||
|
||||
/*
|
||||
* Switch the PDA and FPU contexts.
|
||||
*/
|
||||
|
@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
|
||||
__switch_to_xtra(prev_p, next_p, tss);
|
||||
|
||||
/* If the task has used fpu the last 5 timeslices, just do a full
|
||||
* restore of the math state immediately to avoid the trap; the
|
||||
* chances of needing FPU soon are obviously high now
|
||||
*
|
||||
* tsk_used_math() checks prevent calling math_state_restore(),
|
||||
* which can sleep in the case of !tsk_used_math()
|
||||
/*
|
||||
* Preload the FPU context, now that we've determined that the
|
||||
* task is likely to be using it.
|
||||
*/
|
||||
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
|
||||
math_state_restore();
|
||||
if (preload_fpu)
|
||||
__math_state_restore();
|
||||
return prev_p;
|
||||
}
|
||||
|
||||
|
|
|
@ -794,6 +794,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
|
|||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* __math_state_restore assumes that cr0.TS is already clear and the
|
||||
* fpu state is all ready for use. Used during context switch.
|
||||
*/
|
||||
void __math_state_restore(void)
|
||||
{
|
||||
struct thread_info *thread = current_thread_info();
|
||||
struct task_struct *tsk = thread->task;
|
||||
|
||||
/*
|
||||
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
||||
*/
|
||||
if (unlikely(restore_fpu_checking(tsk))) {
|
||||
stts();
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
||||
tsk->fpu_counter++;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'math_state_restore()' saves the current math information in the
|
||||
* old math state array, and gets the new ones from the current task
|
||||
|
@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void)
|
|||
}
|
||||
|
||||
clts(); /* Allow maths ops (or we recurse) */
|
||||
/*
|
||||
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
||||
*/
|
||||
if (unlikely(restore_fpu_checking(tsk))) {
|
||||
stts();
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
||||
tsk->fpu_counter++;
|
||||
__math_state_restore();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(math_state_restore);
|
||||
|
||||
|
|
Loading…
Reference in New Issue