Commit 625037cc authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86-64: move clts into batch cpu state updates when preloading fpu
  x86-64: move unlazy_fpu() into lazy cpu state part of context switch
  x86-32: make sure clts is batched during context switch
  x86: split out core __math_state_restore
parents 8fafa0a7 17950c5b
...@@ -26,6 +26,7 @@ extern void fpu_init(void); ...@@ -26,6 +26,7 @@ extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void); extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void); extern asmlinkage void math_state_restore(void);
extern void __math_state_restore(void);
extern void init_thread_xstate(void); extern void init_thread_xstate(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
......
...@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread; *next = &next_p->thread;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(init_tss, cpu);
bool preload_fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
__unlazy_fpu(prev_p); /*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
__unlazy_fpu(prev_p);
/* we're going to use this soon, after a few expensive things */ /* we're going to use this soon, after a few expensive things */
if (next_p->fpu_counter > 5) if (preload_fpu)
prefetch(next->xstate); prefetch(next->xstate);
/* /*
...@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss); __switch_to_xtra(prev_p, next_p, tss);
/* If we're going to preload the fpu context, make sure clts
is run while we're batching the cpu state updates. */
if (preload_fpu)
clts();
/* /*
* Leave lazy mode, flushing any hypercalls made here. * Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so * This must be done before restoring TLS segments so
...@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/ */
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
/* If the task has used fpu the last 5 timeslices, just do a full if (preload_fpu)
* restore of the math state immediately to avoid the trap; the __math_state_restore();
* chances of needing FPU soon are obviously high now
*
* tsk_used_math() checks prevent calling math_state_restore(),
* which can sleep in the case of !tsk_used_math()
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
/* /*
* Restore %gs if needed (which is common) * Restore %gs if needed (which is common)
......
...@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex; unsigned fsindex, gsindex;
bool preload_fpu;
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
/* we're going to use this soon, after a few expensive things */ /* we're going to use this soon, after a few expensive things */
if (next_p->fpu_counter > 5) if (preload_fpu)
prefetch(next->xstate); prefetch(next->xstate);
/* /*
...@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu); load_TLS(next, cpu);
/* Must be after DS reload */
unlazy_fpu(prev_p);
/* Make sure cpu is ready for new context */
if (preload_fpu)
clts();
/* /*
* Leave lazy mode, flushing any hypercalls made here. * Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so * This must be done before restoring TLS segments so
...@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs); wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex; prev->gsindex = gsindex;
/* Must be after DS reload */
unlazy_fpu(prev_p);
/* /*
* Switch the PDA and FPU contexts. * Switch the PDA and FPU contexts.
*/ */
...@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss); __switch_to_xtra(prev_p, next_p, tss);
/* If the task has used fpu the last 5 timeslices, just do a full /*
* restore of the math state immediately to avoid the trap; the * Preload the FPU context, now that we've determined that the
* chances of needing FPU soon are obviously high now * task is likely to be using it.
*
* tsk_used_math() checks prevent calling math_state_restore(),
* which can sleep in the case of !tsk_used_math()
*/ */
if (tsk_used_math(next_p) && next_p->fpu_counter > 5) if (preload_fpu)
math_state_restore(); __math_state_restore();
return prev_p; return prev_p;
} }
......
...@@ -794,6 +794,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) ...@@ -794,6 +794,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{ {
} }
/*
* __math_state_restore assumes that cr0.TS is already clear and the
* fpu state is all ready for use. Used during context switch.
*/
void __math_state_restore(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
stts();
force_sig(SIGSEGV, tsk);
return;
}
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
}
/* /*
* 'math_state_restore()' saves the current math information in the * 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task * old math state array, and gets the new ones from the current task
...@@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void) ...@@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void)
} }
clts(); /* Allow maths ops (or we recurse) */ clts(); /* Allow maths ops (or we recurse) */
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
stts();
force_sig(SIGSEGV, tsk);
return;
}
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ __math_state_restore();
tsk->fpu_counter++;
} }
EXPORT_SYMBOL_GPL(math_state_restore); EXPORT_SYMBOL_GPL(math_state_restore);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment