From a1488f8bf4d72ad724700f6e982469a1240e4264 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Jul 2010 16:05:48 -0700 Subject: [PATCH 01/12] x86, xsave: Track the offset, size of state in the xsave layout Subleaves of the cpuid vector 0xd provides the offset and size of different feature state that are managed by the xsave/xrstor. Track this for the upcoming usage during signal handling. Signed-off-by: Suresh Siddha LKML-Reference: <20100719230205.262987929@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 980149867a19..4993caa4181c 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -21,6 +21,8 @@ struct _fpx_sw_bytes fx_sw_reserved; struct _fpx_sw_bytes fx_sw_reserved_ia32; #endif +static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; + /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. @@ -301,6 +303,31 @@ void __cpuinit xsave_init(void) xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } +/* + * Record the offsets and sizes of different state managed by the xsave + * memory layout. + */ +static void setup_xstate_features(void) +{ + int eax, ebx, ecx, edx, leaf = 0x2; + + xstate_features = fls64(pcntxt_mask); + xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); + xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); + + do { + cpuid_count(0xd, leaf, &eax, &ebx, &ecx, &edx); + + if (eax == 0) + break; + + xstate_offsets[leaf] = ebx; + xstate_sizes[leaf] = eax; + + leaf++; + } while (1); +} + /* * setup the xstate image representing the init state */ @@ -308,6 +335,8 @@ static void __init setup_xstate_init(void) { init_xstate_buf = alloc_bootmem(xstate_size); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; + + setup_xstate_features(); } /* From 29104e101d710dd152f807978884643a52eca8b7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Jul 2010 16:05:49 -0700 Subject: [PATCH 02/12] x86, xsave: Sync xsave memory layout with its header for user handling With xsaveopt, if a processor implementation discern that a processor state component is in its initialized state it may modify the corresponding bit in the xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory layout. Hence wHile presenting the xstate information to the user, we always ensure that the memory layout of a feature will be in the init state if the corresponding header bit is zero. This ensures the consistency and avoids the condition of the user seeing some some stale state in the memory layout during signal handling, debugging etc. Signed-off-by: Suresh Siddha LKML-Reference: <20100719230205.351459480@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 14 ++++++ arch/x86/include/asm/xsave.h | 10 ++++ arch/x86/kernel/i387.c | 11 +++++ arch/x86/kernel/xsave.c | 89 +++++++++++++++++++++++++++++++++++- 4 files changed, 123 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..bb370fd0a1c2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -58,11 +58,25 @@ extern int restore_i387_xstate_ia32(void __user *buf); #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_xsaveopt(void) +{ + return 0; +} + static __always_inline __pure bool use_xsave(void) { return static_cpu_has(X86_FEATURE_XSAVE); } +extern void __sanitize_i387_state(struct task_struct *); + +static inline void sanitize_i387_state(struct task_struct *tsk) +{ + if (!use_xsaveopt()) + return; + __sanitize_i387_state(tsk); +} + #ifdef CONFIG_X86_64 /* Ignore delayed exceptions from user space */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..0c72adc0cb15 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -111,6 +111,16 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) : "memory"); } +static inline void xsave_state(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + static inline void fpu_xsave(struct fpu *fpu) { /* This, however, we can work around by forcing the compiler to select diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b32253..6106af9fd129 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -190,6 +190,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; + sanitize_i387_state(target); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.state->fxsave, 0, -1); } @@ -207,6 +209,8 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; + sanitize_i387_state(target); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.state->fxsave, 0, -1); @@ -446,6 +450,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, -1); } + sanitize_i387_state(target); + if (kbuf && pos == 0 && count == sizeof(env)) { convert_from_fxsr(kbuf, target); return 0; @@ -467,6 +473,8 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; + sanitize_i387_state(target); + if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); @@ -533,6 +541,9 @@ static int save_i387_xsave(void __user *buf) struct _fpstate_ia32 __user *fx = buf; int err = 0; + + sanitize_i387_state(tsk); + /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 4993caa4181c..368047c8d507 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -23,6 +23,76 @@ struct _fpx_sw_bytes fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; +/* + * If a processor implementation discern that a processor state component is + * in its initialized state it may modify the corresponding bit in the + * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory + * layout in the case of xsaveopt. While presenting the xstate information to + * the user, we always ensure that the memory layout of a feature will be in + * the init state if the corresponding header bit is zero. This is to ensure + * that the user doesn't see some stale state in the memory layout during + * signal handling, debugging etc. + */ +void __sanitize_i387_state(struct task_struct *tsk) +{ + u64 xstate_bv; + int feature_bit = 0x2; + struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + + if (!fx) + return; + + BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); + + xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; + + /* + * None of the feature bits are in init state. So nothing else + * to do for us, as the memory layout is upto date. + */ + if ((xstate_bv & pcntxt_mask) == pcntxt_mask) + return; + + /* + * FP is in init state + */ + if (!(xstate_bv & XSTATE_FP)) { + fx->cwd = 0x37f; + fx->swd = 0; + fx->twd = 0; + fx->fop = 0; + fx->rip = 0; + fx->rdp = 0; + memset(&fx->st_space[0], 0, 128); + } + + /* + * SSE is in init state + */ + if (!(xstate_bv & XSTATE_SSE)) + memset(&fx->xmm_space[0], 0, 256); + + xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; + + /* + * Update all the other memory layouts for which the corresponding + * header bit is in the init state. + */ + while (xstate_bv) { + if (xstate_bv & 0x1) { + int offset = xstate_offsets[feature_bit]; + int size = xstate_sizes[feature_bit]; + + memcpy(((void *) fx) + offset, + ((void *) init_xstate_buf) + offset, + size); + } + + xstate_bv >>= 1; + feature_bit++; + } +} + /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. @@ -112,6 +182,7 @@ int save_i387_xstate(void __user *buf) task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } else { + sanitize_i387_state(tsk); if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, xstate_size)) return -1; @@ -333,10 +404,26 @@ static void setup_xstate_features(void) */ static void __init setup_xstate_init(void) { + setup_xstate_features(); + + /* + * Setup init_xstate_buf to represent the init state of + * all the features managed by the xsave + */ init_xstate_buf = alloc_bootmem(xstate_size); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; - setup_xstate_features(); + clts(); + /* + * Init all the features state with header_bv being 0x0 + */ + xrstor_state(init_xstate_buf, -1); + /* + * Dump the init state again. This is to identify the init state + * of any feature which is not represented by all zero's. + */ + xsave_state(init_xstate_buf, -1); + stts(); } /* From 6bad06b768920e278c7cedfdda56a0b4c6a35ee9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Jul 2010 16:05:52 -0700 Subject: [PATCH 03/12] x86, xsave: Use xsaveopt in context-switch path when supported xsaveopt is a more optimized form of xsave specifically designed for the context switch usage. xsaveopt doesn't save the state that's not modified from the prior xrstor. And if a specific feature state gets modified to the init state, then xsaveopt just updates the header bit in the xsave memory layout without updating the corresponding memory layout. Signed-off-by: Suresh Siddha LKML-Reference: <20100719230205.604014179@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 2 +- arch/x86/include/asm/xsave.h | 9 ++++++--- arch/x86/kernel/cpu/common.c | 8 ++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index bb370fd0a1c2..59bd93ac7fef 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -60,7 +60,7 @@ extern int restore_i387_xstate_ia32(void __user *buf); static __always_inline __pure bool use_xsaveopt(void) { - return 0; + return static_cpu_has(X86_FEATURE_XSAVEOPT); } static __always_inline __pure bool use_xsave(void) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 0c72adc0cb15..ec86c5fd6a6e 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -125,8 +125,11 @@ static inline void fpu_xsave(struct fpu *fpu) { /* This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" - : : "D" (&(fpu->state->xsave)), - "a" (-1), "d"(-1) : "memory"); + alternative_input( + ".byte " REX_PREFIX "0x0f,0xae,0x27", + ".byte " REX_PREFIX "0x0f,0xae,0x37", + X86_FEATURE_XSAVEOPT, + [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : + "memory"); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7358303d8cd..3f715efc594d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); return 1; } __setup("noxsave", x86_xsave_setup); +static int __init x86_xsaveopt_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + return 1; +} +__setup("noxsaveopt", x86_xsaveopt_setup); + #ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; From 7aa2b5f8ec60505160df1c25398e8286c8432689 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 20 Jul 2010 20:50:48 +0200 Subject: [PATCH 04/12] x86, xsave: Do not include asm/i387.h in asm/xsave.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no dependencies to asm/i387.h. Instead, if including only xsave.h the following error occurs: .../arch/x86/include/asm/i387.h:110: error: ‘XSTATE_FP’ undeclared (first use in this function) .../arch/x86/include/asm/i387.h:110: error: (Each undeclared identifier is reported only once .../arch/x86/include/asm/i387.h:110: error: for each function it appears in.) This patch fixes this. Signed-off-by: Robert Richter LKML-Reference: <1279651857-24639-2-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ec86c5fd6a6e..94d5f84d89f2 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -3,7 +3,6 @@ #include #include -#include #define XSTATE_FP 0x1 #define XSTATE_SSE 0x2 From db10db48b2c530def21bfd76d576702c7df7f620 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 20 Jul 2010 20:50:49 +0200 Subject: [PATCH 05/12] x86, xsave: 32/64 bit boot cpu check unification in initialization Boot cpu id is always 0, thus simplifying and unifying boot cpu check. boot_cpu_id is there for historical reasons and was renamed to boot_cpu_physical_apicid in patch: c70dcb7 x86: change boot_cpu_id to boot_cpu_physical_apicid However, there are some remaining occurrences of boot_cpu_id that are never touched in the kernel and thus its value is always 0. Signed-off-by: Robert Richter LKML-Reference: <1279651857-24639-3-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f715efc594d..26804b2986b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1273,7 +1273,7 @@ void __cpuinit cpu_init(void) /* * Boot processor to setup the FP and extended state context info. */ - if (smp_processor_id() == boot_cpu_id) + if (!smp_processor_id()) init_thread_xstate(); xsave_init(); From 82d4150cec83b9775f84810b39a1c0b91585d429 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 20 Jul 2010 20:50:51 +0200 Subject: [PATCH 06/12] x86, xsave: Move boot cpu initialization to xsave_init() This patch moves boot cpu initialization to xsave_init(). Now all cpus are initialized in one single function. Signed-off-by: Robert Richter LKML-Reference: <1279651857-24639-5-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 6 ------ arch/x86/kernel/i387.c | 5 ----- arch/x86/kernel/xsave.c | 14 ++++++++++++-- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 26804b2986b8..40561085d4f3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1270,12 +1270,6 @@ void __cpuinit cpu_init(void) clear_used_math(); mxcsr_feature_mask_init(); - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - xsave_init(); } #endif diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 6106af9fd129..2f32ef05f10e 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -93,11 +93,6 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); xsave_init(); mxcsr_feature_mask_init(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 368047c8d507..ab9ad48b6530 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -360,7 +360,7 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate); /* * Enable the extended processor state save/restore feature */ -void __cpuinit xsave_init(void) +static void __cpuinit __xsave_init(void) { if (!cpu_has_xsave) return; @@ -446,7 +446,7 @@ void __ref xsave_cntxt_init(void) * Support only the state known to OS. */ pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - xsave_init(); + __xsave_init(); /* * Recompute the context size for enabled features @@ -463,3 +463,13 @@ void __ref xsave_cntxt_init(void) "cntxt size 0x%x\n", pcntxt_mask, xstate_size); } + +void __cpuinit xsave_init(void) +{ + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + __xsave_init(); +} From 0e49bf66d2ca649b167428adddbbbe9d9bd4894c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:52 +0200 Subject: [PATCH 07/12] x86, xsave: Separate fpu and xsave initialization As xsave also supports other than fpu features, it should be initialized independently of the fpu. This patch moves this out of fpu initialization. There is also a lot of cross referencing between fpu and xsave code. This patch reduces this by making xsave_cntxt_init() and init_thread_xstate() static functions. The patch moves the cpu_has_xsave check at the beginning of xsave_init(). All other checks may removed then. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-2-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 1 - arch/x86/include/asm/xsave.h | 1 - arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/i387.c | 27 +++++++++++++++++++-------- arch/x86/kernel/xsave.c | 10 +++++----- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 59bd93ac7fef..509ddabeae25 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -31,7 +31,6 @@ extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); extern void __math_state_restore(void); -extern void init_thread_xstate(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern user_regset_active_fn fpregs_active, xfpregs_active; diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 94d5f84d89f2..4d3b5d1fc028 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -28,7 +28,6 @@ extern u64 pcntxt_mask; extern struct xsave_struct *init_xstate_buf; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern void xsave_cntxt_init(void); extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 40561085d4f3..94c36c7ac183 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1210,6 +1210,7 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); + xsave_init(); raw_local_save_flags(kernel_eflags); @@ -1270,6 +1271,7 @@ void __cpuinit cpu_init(void) clear_used_math(); mxcsr_feature_mask_init(); + fpu_init(); xsave_init(); } #endif diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 2f32ef05f10e..e73c54ebafce 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -59,18 +59,18 @@ void __cpuinit mxcsr_feature_mask_init(void) stts(); } -void __cpuinit init_thread_xstate(void) +static void __cpuinit init_thread_xstate(void) { + /* + * Note that xstate_size might be overwriten later during + * xsave_init(). + */ + if (!HAVE_HWFP) { xstate_size = sizeof(struct i387_soft_struct); return; } - if (cpu_has_xsave) { - xsave_cntxt_init(); - return; - } - if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -84,6 +84,7 @@ void __cpuinit init_thread_xstate(void) * Called at bootup to set up the initial FPU state that is later cloned * into all processes. */ + void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); @@ -93,14 +94,24 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ - xsave_init(); + if (!smp_processor_id()) + init_thread_xstate(); mxcsr_feature_mask_init(); /* clean state in init */ current_thread_info()->status = 0; clear_used_math(); } -#endif /* CONFIG_X86_64 */ + +#else /* CONFIG_X86_64 */ + +void __cpuinit fpu_init(void) +{ + if (!smp_processor_id()) + init_thread_xstate(); +} + +#endif /* CONFIG_X86_32 */ static void fpu_finit(struct fpu *fpu) { diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index ab9ad48b6530..550bf45236f4 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -362,9 +362,6 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate); */ static void __cpuinit __xsave_init(void) { - if (!cpu_has_xsave) - return; - set_in_cr4(X86_CR4_OSXSAVE); /* @@ -429,7 +426,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -void __ref xsave_cntxt_init(void) +static void __cpuinit xsave_cntxt_init(void) { unsigned int eax, ebx, ecx, edx; @@ -466,10 +463,13 @@ void __ref xsave_cntxt_init(void) void __cpuinit xsave_init(void) { + if (!cpu_has_xsave) + return; + /* * Boot processor to setup the FP and extended state context info. */ if (!smp_processor_id()) - init_thread_xstate(); + xsave_cntxt_init(); __xsave_init(); } From 97e80a70db689fb1e876df9f12305cc72f85ca53 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:53 +0200 Subject: [PATCH 08/12] x86, xsave: Introduce xstate enable functions The patch renames xsave_cntxt_init() and __xsave_init() into xstate_enable_boot_cpu() and xstate_enable() as this names are more meaningful. It also removes the duplicate xcr setup for the boot cpu. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-3-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 550bf45236f4..2322f586c051 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -360,15 +360,10 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate); /* * Enable the extended processor state save/restore feature */ -static void __cpuinit __xsave_init(void) +static inline void xstate_enable(u64 mask) { set_in_cr4(X86_CR4_OSXSAVE); - - /* - * Enable all the features that the HW is capable of - * and the Linux kernel is aware of. - */ - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + xsetbv(XCR_XFEATURE_ENABLED_MASK, mask); } /* @@ -426,7 +421,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -static void __cpuinit xsave_cntxt_init(void) +static void __cpuinit xstate_enable_boot_cpu(void) { unsigned int eax, ebx, ecx, edx; @@ -443,7 +438,8 @@ static void __cpuinit xsave_cntxt_init(void) * Support only the state known to OS. */ pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - __xsave_init(); + + xstate_enable(pcntxt_mask); /* * Recompute the context size for enabled features @@ -470,6 +466,7 @@ void __cpuinit xsave_init(void) * Boot processor to setup the FP and extended state context info. */ if (!smp_processor_id()) - xsave_cntxt_init(); - __xsave_init(); + xstate_enable_boot_cpu(); + else + xstate_enable(pcntxt_mask); } From ee813d53a8e980a3a28318efb8935d45723f5211 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:54 +0200 Subject: [PATCH 09/12] x86, xsave: Check cpuid level for XSTATE_CPUID (0x0d) The patch introduces the XSTATE_CPUID macro and adds a check that tests if XSTATE_CPUID exists. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-4-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 2 ++ arch/x86/kernel/xsave.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 4d3b5d1fc028..d1b5f3a2fa20 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -4,6 +4,8 @@ #include #include +#define XSTATE_CPUID 0x0000000d + #define XSTATE_FP 0x1 #define XSTATE_SSE 0x2 #define XSTATE_YMM 0x4 diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 2322f586c051..5adb7fb408f0 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -379,7 +379,7 @@ static void setup_xstate_features(void) xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); do { - cpuid_count(0xd, leaf, &eax, &ebx, &ecx, &edx); + cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); if (eax == 0) break; @@ -425,7 +425,12 @@ static void __cpuinit xstate_enable_boot_cpu(void) { unsigned int eax, ebx, ecx, edx; - cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { + WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); + return; + } + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); pcntxt_mask = eax + ((u64)edx << 32); if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { @@ -444,7 +449,7 @@ static void __cpuinit xstate_enable_boot_cpu(void) /* * Recompute the context size for enabled features */ - cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); xstate_size = ebx; update_regset_xstate_info(xstate_size, pcntxt_mask); From 45c2d7f46211a0b1f6b425c59575c53145afc4b4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:55 +0200 Subject: [PATCH 10/12] x86, xsave: Make init_xstate_buf static The pointer is only used in xsave.c. Making it static. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-5-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 1 - arch/x86/kernel/xsave.c | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index d1b5f3a2fa20..0ae6b9961985 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -27,7 +27,6 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; -extern struct xsave_struct *init_xstate_buf; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void xsave_init(void); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 5adb7fb408f0..3b44a9b1eca4 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -16,6 +16,11 @@ */ u64 pcntxt_mask; +/* + * Represents init state for the supported extended state. + */ +static struct xsave_struct *init_xstate_buf; + struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION struct _fpx_sw_bytes fx_sw_reserved_ia32; @@ -348,11 +353,6 @@ static void prepare_fx_sw_frame(void) #endif } -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct *init_xstate_buf; - #ifdef CONFIG_X86_64 unsigned int sig_xstate_size = sizeof(struct _fpstate); #endif From 4995b9dba908436c1611454f9bd2cb3ddf6babee Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:56 +0200 Subject: [PATCH 11/12] x86, xsave: Add __init attribute to setup_xstate_features() This is called only from initialization code. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-6-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3b44a9b1eca4..cfc7901ee94e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -370,7 +370,7 @@ static inline void xstate_enable(u64 mask) * Record the offsets and sizes of different state managed by the xsave * memory layout. */ -static void setup_xstate_features(void) +static void __init setup_xstate_features(void) { int eax, ebx, ecx, edx, leaf = 0x2; From 1cff92d8fdb27684308864d9cdb324bee43b40ab Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 21 Jul 2010 14:23:10 -0700 Subject: [PATCH 12/12] x86, xsave: Make xstate_enable_boot_cpu() __init, protect on CPU 0 xstate_enable_boot_cpu() is, as the name implies, only used on the boot CPU; furthermore, it invokes alloc_bootmem(), which is __init; hence it needs to be tagged __init rather than __cpuinit. Furthermore, it is *not* safe in the long run to rely on CPU 0 only coming online during the early boot -- at some point we're going to support offlining (and re-onlining) the boot CPU, and at that point we must not call xstate_enable_boot_cpu() again. The code is a fair bit more obscure than one would like, because the __ref overrides aren't quite powerful enough. Signed-off-by: H. Peter Anvin Acked-by: Suresh Siddha Cc: Robert Richter LKML-Reference: <4C476236.1020302@zytor.com> --- arch/x86/kernel/xsave.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index cfc7901ee94e..b2549c3eb2c3 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -360,10 +360,10 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate); /* * Enable the extended processor state save/restore feature */ -static inline void xstate_enable(u64 mask) +static inline void xstate_enable(void) { set_in_cr4(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, mask); + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } /* @@ -421,7 +421,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -static void __cpuinit xstate_enable_boot_cpu(void) +static void __init xstate_enable_boot_cpu(void) { unsigned int eax, ebx, ecx, edx; @@ -444,7 +444,7 @@ static void __cpuinit xstate_enable_boot_cpu(void) */ pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - xstate_enable(pcntxt_mask); + xstate_enable(); /* * Recompute the context size for enabled features @@ -462,16 +462,22 @@ static void __cpuinit xstate_enable_boot_cpu(void) pcntxt_mask, xstate_size); } +/* + * For the very first instance, this calls xstate_enable_boot_cpu(); + * for all subsequent instances, this calls xstate_enable(). + * + * This is somewhat obfuscated due to the lack of powerful enough + * overrides for the section checks. + */ void __cpuinit xsave_init(void) { + static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; + void (*this_func)(void); + if (!cpu_has_xsave) return; - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - xstate_enable_boot_cpu(); - else - xstate_enable(pcntxt_mask); + this_func = next_func; + next_func = xstate_enable; + this_func(); }