Most of the changes here related to 'XSAVES supervisor state' support,

which is a feature that allows kernel-only data to be automatically
 saved/restored by the FPU context switching code.
 
 CPU features that can be supported this way are Intel PT, 'PASID' and
 CET features.
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl7VMZgRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1jmAQ/7BJpyAHUjFJdChtkvUmLcBgI2qnxP7rc8
 Eh/tSo4PKh484Uqb4WY6XAHIAPBzEt3rHJG3fdaavzlUl98YJCdD9tstfwMPcCQ4
 L4c2Ru+h+mPQCMOZUctOphPjDzGWPzR4IhceH6gqhoS4vg9EqgN4o158x4jW6KFN
 Jlocp9CMfIaGSmaMlRrIUZ4Dj3mgboqqHsuCaibtaKAMK6LqZQDViTEal4mNbESX
 KQPOFpKrhoq6Jtzzer7fLPY2qb6kkLrL03X5IUGFP5UxigSejnfrI9SZpAuPP9S0
 kdN04Jo0T2aBIAikBTVhDWdLMJk19qeu7YXBrFEVbyhZHl1HdDqOhMdWPOp1GH9W
 CtGUalbIvz/5FbXuUImiiNh/bw2FxYjHsrDguW96IvMVFteucrFg9QyL+taYb1cV
 WqWdpIC0VoMuQxQI5FBWu4Bb/cLNV9VCxWAZjZQ806kwmyDxldsw5mucMGmH3+bO
 LD6bwRShSMRzI9bzcJSG+Z3y7Fe8b5IGNjCjzgPb88ezffBEFHzIEKdCL6QTNlRF
 6UgSGbRs41SqXwNw5tdQQNwPpDO73p+KVRGoEzyMJvojLKRGTcOHHUDriGZ30MNX
 3oHvLf5+dNrLC/frbOqUmQ7doBQOplR5VxlZVwwqkdpPw13Jf5zn4ewzriTOmKCq
 mEHMQmbkyi4=
 =M+BC
 -----END PGP SIGNATURE-----

Merge tag 'x86-fpu-2020-06-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 FPU updates from Ingo Molnar:
 "Most of the changes here related to 'XSAVES supervisor state' support,
  which is a feature that allows kernel-only data to be automatically
  saved/restored by the FPU context switching code.

  CPU features that can be supported this way are Intel PT, 'PASID' and
  CET features"

* tag 'x86-fpu-2020-06-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu/xstate: Restore supervisor states for signal return
  x86/fpu/xstate: Preserve supervisor states for the slow path in __fpu__restore_sig()
  x86/fpu: Introduce copy_supervisor_to_kernel()
  x86/fpu/xstate: Update copy_kernel_to_xregs_err() for supervisor states
  x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates
  x86/fpu/xstate: Define new functions for clearing fpregs and xstates
  x86/fpu/xstate: Introduce XSAVES supervisor states
  x86/fpu/xstate: Separate user and supervisor xfeatures mask
  x86/fpu/xstate: Define new macros for supervisor and user xstates
  x86/fpu/xstate: Rename validate_xstate_header() to validate_user_xstate_header()
This commit is contained in:
Linus Torvalds 2020-06-01 14:09:26 -07:00
commit 0a319ef75d
9 changed files with 336 additions and 131 deletions

View File

@ -31,7 +31,8 @@ extern void fpu__save(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern int fpu__restore_sig(void __user *buf, int ia32_frame);
extern void fpu__drop(struct fpu *fpu); extern void fpu__drop(struct fpu *fpu);
extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
extern void fpu__clear(struct fpu *fpu); extern void fpu__clear_user_states(struct fpu *fpu);
extern void fpu__clear_all(struct fpu *fpu);
extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
@ -92,7 +93,7 @@ static inline void fpstate_init_xstate(struct xregs_state *xsave)
* XRSTORS requires these bits set in xcomp_bv, or it will * XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP: * trigger #GP:
*/ */
xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
} }
static inline void fpstate_init_fxstate(struct fxregs_state *fx) static inline void fpstate_init_fxstate(struct fxregs_state *fx)
@ -399,7 +400,10 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
u32 hmask = mask >> 32; u32 hmask = mask >> 32;
int err; int err;
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err; return err;
} }

View File

@ -21,19 +21,29 @@
#define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
/* Supervisor features */ /* All currently supported user features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) #define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR)
/* All currently supported features */ /* All currently supported supervisor features */
#define XCNTXT_MASK (XFEATURE_MASK_FP | \ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \ /*
XFEATURE_MASK_OPMASK | \ * Unsupported supervisor features. When a supervisor feature in this mask is
XFEATURE_MASK_ZMM_Hi256 | \ * supported in the future, move it to the supported supervisor feature mask.
XFEATURE_MASK_Hi16_ZMM | \ */
XFEATURE_MASK_PKRU | \ #define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR) /* All supervisor states including supported and unsupported states. */
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, " #define REX_PREFIX "0x48, "
@ -41,7 +51,18 @@
#define REX_PREFIX #define REX_PREFIX
#endif #endif
extern u64 xfeatures_mask; extern u64 xfeatures_mask_all;
static inline u64 xfeatures_mask_supervisor(void)
{
return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
static inline u64 xfeatures_mask_user(void)
{
return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
}
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size, extern void __init update_regset_xstate_info(unsigned int size,
@ -54,8 +75,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
void copy_supervisor_to_kernel(struct xregs_state *xsave);
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
extern int validate_xstate_header(const struct xstate_header *hdr); int validate_user_xstate_header(const struct xstate_header *hdr);
#endif #endif

View File

@ -291,15 +291,13 @@ void fpu__drop(struct fpu *fpu)
} }
/* /*
* Clear FPU registers by setting them up from * Clear FPU registers by setting them up from the init fpstate.
* the init fpstate: * Caller must do fpregs_[un]lock() around it.
*/ */
static inline void copy_init_fpstate_to_fpregs(void) static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
{ {
fpregs_lock();
if (use_xsave()) if (use_xsave())
copy_kernel_to_xregs(&init_fpstate.xsave, -1); copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
else if (static_cpu_has(X86_FEATURE_FXSR)) else if (static_cpu_has(X86_FEATURE_FXSR))
copy_kernel_to_fxregs(&init_fpstate.fxsave); copy_kernel_to_fxregs(&init_fpstate.fxsave);
else else
@ -307,9 +305,6 @@ static inline void copy_init_fpstate_to_fpregs(void)
if (boot_cpu_has(X86_FEATURE_OSPKE)) if (boot_cpu_has(X86_FEATURE_OSPKE))
copy_init_pkru_to_fpregs(); copy_init_pkru_to_fpregs();
fpregs_mark_activate();
fpregs_unlock();
} }
/* /*
@ -318,18 +313,40 @@ static inline void copy_init_fpstate_to_fpregs(void)
* Called by sys_execve(), by the signal handler code and by various * Called by sys_execve(), by the signal handler code and by various
* error paths. * error paths.
*/ */
void fpu__clear(struct fpu *fpu) static void fpu__clear(struct fpu *fpu, bool user_only)
{ {
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */ WARN_ON_FPU(fpu != &current->thread.fpu);
fpu__drop(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) {
fpu__drop(fpu);
fpu__initialize(fpu);
return;
}
/* fpregs_lock();
* Make sure fpstate is cleared and initialized.
*/ if (user_only) {
fpu__initialize(fpu); if (!fpregs_state_valid(fpu, smp_processor_id()) &&
if (static_cpu_has(X86_FEATURE_FPU)) xfeatures_mask_supervisor())
copy_init_fpstate_to_fpregs(); copy_kernel_to_xregs(&fpu->state.xsave,
xfeatures_mask_supervisor());
copy_init_fpstate_to_fpregs(xfeatures_mask_user());
} else {
copy_init_fpstate_to_fpregs(xfeatures_mask_all);
}
fpregs_mark_activate();
fpregs_unlock();
}
void fpu__clear_user_states(struct fpu *fpu)
{
fpu__clear(fpu, true);
}
void fpu__clear_all(struct fpu *fpu)
{
fpu__clear(fpu, false);
} }
/* /*

View File

@ -224,7 +224,8 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/ */
u64 __init fpu__get_supported_xfeatures_mask(void) u64 __init fpu__get_supported_xfeatures_mask(void)
{ {
return XCNTXT_MASK; return XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
} }
/* Legacy code to initialize eager fpu mode. */ /* Legacy code to initialize eager fpu mode. */

View File

@ -139,7 +139,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
} else { } else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
if (!ret) if (!ret)
ret = validate_xstate_header(&xsave->header); ret = validate_user_xstate_header(&xsave->header);
} }
/* /*

View File

@ -211,9 +211,9 @@ retry:
} }
static inline void static inline void
sanitize_restored_xstate(union fpregs_state *state, sanitize_restored_user_xstate(union fpregs_state *state,
struct user_i387_ia32_struct *ia32_env, struct user_i387_ia32_struct *ia32_env,
u64 xfeatures, int fx_only) u64 user_xfeatures, int fx_only)
{ {
struct xregs_state *xsave = &state->xsave; struct xregs_state *xsave = &state->xsave;
struct xstate_header *header = &xsave->header; struct xstate_header *header = &xsave->header;
@ -226,13 +226,22 @@ sanitize_restored_xstate(union fpregs_state *state,
*/ */
/* /*
* Init the state that is not present in the memory * 'user_xfeatures' might have bits clear which are
* layout and not enabled by the OS. * set in header->xfeatures. This represents features that
* were in init state prior to a signal delivery, and need
* to be reset back to the init state. Clear any user
* feature bits which are set in the kernel buffer to get
* them back to the init state.
*
* Supervisor state is unchanged by input from userspace.
* Ensure supervisor state bits stay set and supervisor
* state is not modified.
*/ */
if (fx_only) if (fx_only)
header->xfeatures = XFEATURE_MASK_FPSSE; header->xfeatures = XFEATURE_MASK_FPSSE;
else else
header->xfeatures &= xfeatures; header->xfeatures &= user_xfeatures |
xfeatures_mask_supervisor();
} }
if (use_fxsr()) { if (use_fxsr()) {
@ -252,16 +261,24 @@ sanitize_restored_xstate(union fpregs_state *state,
*/ */
static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
{ {
u64 init_bv;
int r;
if (use_xsave()) { if (use_xsave()) {
if (fx_only) { if (fx_only) {
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
return copy_user_to_fxregs(buf); r = copy_user_to_fxregs(buf);
} else { if (!r)
u64 init_bv = xfeatures_mask & ~xbv;
if (unlikely(init_bv))
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
return copy_user_to_xregs(buf, xbv); return r;
} else {
init_bv = xfeatures_mask_user() & ~xbv;
r = copy_user_to_xregs(buf, xbv);
if (!r && unlikely(init_bv))
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
return r;
} }
} else if (use_fxsr()) { } else if (use_fxsr()) {
return copy_user_to_fxregs(buf); return copy_user_to_fxregs(buf);
@ -277,7 +294,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu; struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env; struct user_i387_ia32_struct env;
u64 xfeatures = 0; u64 user_xfeatures = 0;
int fx_only = 0; int fx_only = 0;
int ret = 0; int ret = 0;
@ -285,7 +302,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
IS_ENABLED(CONFIG_IA32_EMULATION)); IS_ENABLED(CONFIG_IA32_EMULATION));
if (!buf) { if (!buf) {
fpu__clear(fpu); fpu__clear_user_states(fpu);
return 0; return 0;
} }
@ -310,32 +327,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
trace_x86_fpu_xstate_check_failed(fpu); trace_x86_fpu_xstate_check_failed(fpu);
} else { } else {
state_size = fx_sw_user.xstate_size; state_size = fx_sw_user.xstate_size;
xfeatures = fx_sw_user.xfeatures; user_xfeatures = fx_sw_user.xfeatures;
} }
} }
/*
* The current state of the FPU registers does not matter. By setting
* TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
* is not modified on context switch and that the xstate is considered
* to be loaded again on return to userland (overriding last_cpu avoids
* the optimisation).
*/
set_thread_flag(TIF_NEED_FPU_LOAD);
__fpu_invalidate_fpregs_state(fpu);
if ((unsigned long)buf_fx % 64) if ((unsigned long)buf_fx % 64)
fx_only = 1; fx_only = 1;
/*
* For 32-bit frames with fxstate, copy the fxstate so it can be if (!ia32_fxstate) {
* reconstructed later.
*/
if (ia32_fxstate) {
ret = __copy_from_user(&env, buf, sizeof(env));
if (ret)
goto err_out;
envp = &env;
} else {
/* /*
* Attempt to restore the FPU registers directly from user * Attempt to restore the FPU registers directly from user
* memory. For that to succeed, the user access cannot cause * memory. For that to succeed, the user access cannot cause
@ -345,20 +344,65 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
*/ */
fpregs_lock(); fpregs_lock();
pagefault_disable(); pagefault_disable();
ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only); ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only);
pagefault_enable(); pagefault_enable();
if (!ret) { if (!ret) {
/*
* Restore supervisor states: previous context switch
* etc has done XSAVES and saved the supervisor states
* in the kernel buffer from which they can be restored
* now.
*
* We cannot do a single XRSTORS here - which would
* be nice - because the rest of the FPU registers are
* being restored from a user buffer directly. The
* single XRSTORS happens below, when the user buffer
* has been copied to the kernel one.
*/
if (test_thread_flag(TIF_NEED_FPU_LOAD) &&
xfeatures_mask_supervisor())
copy_kernel_to_xregs(&fpu->state.xsave,
xfeatures_mask_supervisor());
fpregs_mark_activate(); fpregs_mark_activate();
fpregs_unlock(); fpregs_unlock();
return 0; return 0;
} }
fpregs_deactivate(fpu);
fpregs_unlock(); fpregs_unlock();
} else {
/*
* For 32-bit frames with fxstate, copy the fxstate so it can
* be reconstructed later.
*/
ret = __copy_from_user(&env, buf, sizeof(env));
if (ret)
goto err_out;
envp = &env;
} }
/*
* By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
* not modified on context switch and that the xstate is considered
* to be loaded again on return to userland (overriding last_cpu avoids
* the optimisation).
*/
fpregs_lock();
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
/*
* Supervisor states are not modified by user space input. Save
* current supervisor states first and invalidate the FPU regs.
*/
if (xfeatures_mask_supervisor())
copy_supervisor_to_kernel(&fpu->state.xsave);
set_thread_flag(TIF_NEED_FPU_LOAD);
}
__fpu_invalidate_fpregs_state(fpu);
fpregs_unlock();
if (use_xsave() && !fx_only) { if (use_xsave() && !fx_only) {
u64 init_bv = xfeatures_mask & ~xfeatures; u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
if (using_compacted_format()) { if (using_compacted_format()) {
ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
@ -366,17 +410,24 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
if (!ret && state_size > offsetof(struct xregs_state, header)) if (!ret && state_size > offsetof(struct xregs_state, header))
ret = validate_xstate_header(&fpu->state.xsave.header); ret = validate_user_xstate_header(&fpu->state.xsave.header);
} }
if (ret) if (ret)
goto err_out; goto err_out;
sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
fx_only);
fpregs_lock(); fpregs_lock();
if (unlikely(init_bv)) if (unlikely(init_bv))
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
/*
* Restore previously saved supervisor xstates along with
* copied-in user xstates.
*/
ret = copy_kernel_to_xregs_err(&fpu->state.xsave,
user_xfeatures | xfeatures_mask_supervisor());
} else if (use_fxsr()) { } else if (use_fxsr()) {
ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
@ -385,11 +436,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
goto err_out; goto err_out;
} }
sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
fx_only);
fpregs_lock(); fpregs_lock();
if (use_xsave()) { if (use_xsave()) {
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; u64 init_bv;
init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
} }
@ -410,7 +464,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
err_out: err_out:
if (ret) if (ret)
fpu__clear(fpu); fpu__clear_user_states(fpu);
return ret; return ret;
} }
@ -465,7 +519,7 @@ void fpu__init_prepare_fx_sw_frame(void)
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size; fx_sw_reserved.extended_size = size;
fx_sw_reserved.xfeatures = xfeatures_mask; fx_sw_reserved.xfeatures = xfeatures_mask_user();
fx_sw_reserved.xstate_size = fpu_user_xstate_size; fx_sw_reserved.xstate_size = fpu_user_xstate_size;
if (IS_ENABLED(CONFIG_IA32_EMULATION) || if (IS_ENABLED(CONFIG_IA32_EMULATION) ||

View File

@ -54,13 +54,15 @@ static short xsave_cpuid_features[] __initdata = {
}; };
/* /*
* Mask of xstate features supported by the CPU and the kernel: * This represents the full set of bits that should ever be set in a kernel
* XSAVE buffer, both supervisor and user xstates.
*/ */
u64 xfeatures_mask __read_mostly; u64 xfeatures_mask_all __read_mostly;
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
/* /*
* The XSAVE area of kernel can be in standard or compacted format; * The XSAVE area of kernel can be in standard or compacted format;
@ -76,7 +78,7 @@ unsigned int fpu_user_xstate_size;
*/ */
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{ {
u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
if (unlikely(feature_name)) { if (unlikely(feature_name)) {
long xfeature_idx, max_idx; long xfeature_idx, max_idx;
@ -150,7 +152,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
* None of the feature bits are in init state. So nothing else * None of the feature bits are in init state. So nothing else
* to do for us, as the memory layout is up to date. * to do for us, as the memory layout is up to date.
*/ */
if ((xfeatures & xfeatures_mask) == xfeatures_mask) if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all)
return; return;
/* /*
@ -177,7 +179,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
* in a special way already: * in a special way already:
*/ */
feature_bit = 0x2; feature_bit = 0x2;
xfeatures = (xfeatures_mask & ~xfeatures) >> 2; xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2;
/* /*
* Update all the remaining memory layouts according to their * Update all the remaining memory layouts according to their
@ -205,30 +207,39 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
*/ */
void fpu__init_cpu_xstate(void) void fpu__init_cpu_xstate(void)
{ {
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) u64 unsup_bits;
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
return; return;
/* /*
* Make it clear that XSAVES supervisor states are not yet * Unsupported supervisor xstates should not be found in
* implemented should anyone expect it to work by changing * the xfeatures mask.
* bits in XFEATURE_MASK_* macros and XCR0.
*/ */
WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
"x86/fpu: XSAVES supervisor states are not yet implemented.\n"); WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
unsup_bits);
xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
cr4_set_bits(X86_CR4_OSXSAVE); cr4_set_bits(X86_CR4_OSXSAVE);
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
/*
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
* managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
* states can be set here.
*/
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
/*
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
*/
if (boot_cpu_has(X86_FEATURE_XSAVES))
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
} }
/* static bool xfeature_enabled(enum xfeature xfeature)
* Note that in the future we will likely need a pair of
* functions here: one for user xstates and the other for
* system xstates. For now, they are the same.
*/
static int xfeature_enabled(enum xfeature xfeature)
{ {
return !!(xfeatures_mask & (1UL << xfeature)); return xfeatures_mask_all & BIT_ULL(xfeature);
} }
/* /*
@ -382,6 +393,33 @@ static void __init setup_xstate_comp_offsets(void)
} }
} }
/*
* Setup offsets of a supervisor-state-only XSAVES buffer:
*
* The offsets stored in xstate_comp_offsets[] only work for one specific
* value of the Requested Feature BitMap (RFBM). In cases where a different
* RFBM value is used, a different set of offsets is required. This set of
* offsets is for when RFBM=xfeatures_mask_supervisor().
*/
static void __init setup_supervisor_only_offsets(void)
{
unsigned int next_offset;
int i;
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
continue;
if (xfeature_is_aligned(i))
next_offset = ALIGN(next_offset, 64);
xstate_supervisor_only_offsets[i] = next_offset;
next_offset += xstate_sizes[i];
}
}
/* /*
* Print out xstate component offsets and sizes * Print out xstate component offsets and sizes
*/ */
@ -415,7 +453,7 @@ static void __init setup_init_fpu_buf(void)
if (boot_cpu_has(X86_FEATURE_XSAVES)) if (boot_cpu_has(X86_FEATURE_XSAVES))
init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
xfeatures_mask; xfeatures_mask_all;
/* /*
* Init all the features state with header.xfeatures being 0x0 * Init all the features state with header.xfeatures being 0x0
@ -438,7 +476,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
* format. Checking a supervisor state's uncompacted offset is * format. Checking a supervisor state's uncompacted offset is
* an error. * an error.
*/ */
if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) { if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
return -1; return -1;
} }
@ -472,10 +510,10 @@ int using_compacted_format(void)
} }
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
int validate_xstate_header(const struct xstate_header *hdr) int validate_user_xstate_header(const struct xstate_header *hdr)
{ {
/* No unknown or supervisor features may be set */ /* No unknown or supervisor features may be set */
if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR)) if (hdr->xfeatures & ~xfeatures_mask_user())
return -EINVAL; return -EINVAL;
/* Userspace must use the uncompacted format */ /* Userspace must use the uncompacted format */
@ -610,15 +648,12 @@ static void do_extra_xstate_size_checks(void)
/* /*
* Get total size of enabled xstates in XCR0/xfeatures_mask. * Get total size of enabled xstates in XCR0 | IA32_XSS.
* *
* Note the SDM's wording here. "sub-function 0" only enumerates * Note the SDM's wording here. "sub-function 0" only enumerates
* the size of the *user* states. If we use it to size a buffer * the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the * that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too. * buffer because 'XSAVES' saves system states too.
*
* Note that we do not currently set any bits on IA32_XSS so
* 'XCR0 | IA32_XSS == XCR0' for now.
*/ */
static unsigned int __init get_xsaves_size(void) static unsigned int __init get_xsaves_size(void)
{ {
@ -700,7 +735,7 @@ static int __init init_xstate_size(void)
*/ */
static void fpu__init_disable_system_xstate(void) static void fpu__init_disable_system_xstate(void)
{ {
xfeatures_mask = 0; xfeatures_mask_all = 0;
cr4_clear_bits(X86_CR4_OSXSAVE); cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE);
} }
@ -735,16 +770,26 @@ void __init fpu__init_system_xstate(void)
return; return;
} }
/*
* Find user xstates supported by the processor.
*/
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xfeatures_mask = eax + ((u64)edx << 32); xfeatures_mask_all = eax + ((u64)edx << 32);
if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /*
* Find supervisor xstates supported by the processor.
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
xfeatures_mask_all |= ecx + ((u64)edx << 32);
if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/* /*
* This indicates that something really unexpected happened * This indicates that something really unexpected happened
* with the enumeration. Disable XSAVE and try to continue * with the enumeration. Disable XSAVE and try to continue
* booting without it. This is too early to BUG(). * booting without it. This is too early to BUG().
*/ */
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
xfeatures_mask_all);
goto out_disable; goto out_disable;
} }
@ -753,10 +798,10 @@ void __init fpu__init_system_xstate(void)
*/ */
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
if (!boot_cpu_has(xsave_cpuid_features[i])) if (!boot_cpu_has(xsave_cpuid_features[i]))
xfeatures_mask &= ~BIT(i); xfeatures_mask_all &= ~BIT_ULL(i);
} }
xfeatures_mask &= fpu__get_supported_xfeatures_mask(); xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */ /* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate(); fpu__init_cpu_xstate();
@ -768,15 +813,16 @@ void __init fpu__init_system_xstate(void)
* Update info used for ptrace frames; use standard-format size and no * Update info used for ptrace frames; use standard-format size and no
* supervisor xstates: * supervisor xstates:
*/ */
update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR); update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user());
fpu__init_prepare_fx_sw_frame(); fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf(); setup_init_fpu_buf();
setup_xstate_comp_offsets(); setup_xstate_comp_offsets();
setup_supervisor_only_offsets();
print_xstate_offset_size(); print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
xfeatures_mask, xfeatures_mask_all,
fpu_kernel_xstate_size, fpu_kernel_xstate_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
return; return;
@ -795,7 +841,14 @@ void fpu__resume_cpu(void)
* Restore XCR0 on xsave capable CPUs: * Restore XCR0 on xsave capable CPUs:
*/ */
if (boot_cpu_has(X86_FEATURE_XSAVE)) if (boot_cpu_has(X86_FEATURE_XSAVE))
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
/*
* Restore IA32_XSS. The same CPUID bit enumerates support
* of XSAVES and MSR_IA32_XSS.
*/
if (boot_cpu_has(X86_FEATURE_XSAVES))
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
} }
/* /*
@ -840,10 +893,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
/* /*
* We should not ever be requesting features that we * We should not ever be requesting features that we
* have not enabled. Remember that xfeatures_mask is * have not enabled.
* what we write to the XCR0 register.
*/ */
WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
"get of unsupported state"); "get of unsupported state");
/* /*
* This assumes the last 'xsave*' instruction to * This assumes the last 'xsave*' instruction to
@ -1010,7 +1062,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
*/ */
memset(&header, 0, sizeof(header)); memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures; header.xfeatures = xsave->header.xfeatures;
header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; header.xfeatures &= xfeatures_mask_user();
if (header.xfeatures & XFEATURE_MASK_FP) if (header.xfeatures & XFEATURE_MASK_FP)
copy_part(0, off_mxcsr, copy_part(0, off_mxcsr,
@ -1090,7 +1142,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
*/ */
memset(&header, 0, sizeof(header)); memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures; header.xfeatures = xsave->header.xfeatures;
header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; header.xfeatures &= xfeatures_mask_user();
/* /*
* Copy xregs_state->header: * Copy xregs_state->header:
@ -1157,7 +1209,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
memcpy(&hdr, kbuf + offset, size); memcpy(&hdr, kbuf + offset, size);
if (validate_xstate_header(&hdr)) if (validate_user_xstate_header(&hdr))
return -EINVAL; return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) { for (i = 0; i < XFEATURE_MAX; i++) {
@ -1183,7 +1235,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
* The state that came in from userspace was user-state only. * The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures': * Mask all the user states out of 'xfeatures':
*/ */
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/* /*
* Add back in the features that came in from userspace: * Add back in the features that came in from userspace:
@ -1211,7 +1263,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
if (__copy_from_user(&hdr, ubuf + offset, size)) if (__copy_from_user(&hdr, ubuf + offset, size))
return -EFAULT; return -EFAULT;
if (validate_xstate_header(&hdr)) if (validate_user_xstate_header(&hdr))
return -EINVAL; return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) { for (i = 0; i < XFEATURE_MAX; i++) {
@ -1239,7 +1291,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
* The state that came in from userspace was user-state only. * The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures': * Mask all the user states out of 'xfeatures':
*/ */
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/* /*
* Add back in the features that came in from userspace: * Add back in the features that came in from userspace:
@ -1249,6 +1301,61 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0; return 0;
} }
/*
* Save only supervisor states to the kernel buffer. This blows away all
* old states, and is intended to be used only in __fpu__restore_sig(), where
* user states are restored from the user buffer.
*/
void copy_supervisor_to_kernel(struct xregs_state *xstate)
{
struct xstate_header *header;
u64 max_bit, min_bit;
u32 lmask, hmask;
int err, i;
if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES)))
return;
if (!xfeatures_mask_supervisor())
return;
max_bit = __fls(xfeatures_mask_supervisor());
min_bit = __ffs(xfeatures_mask_supervisor());
lmask = xfeatures_mask_supervisor();
hmask = xfeatures_mask_supervisor() >> 32;
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
if (WARN_ON_FPU(err))
return;
/*
* At this point, the buffer has only supervisor states and must be
* converted back to normal kernel format.
*/
header = &xstate->header;
header->xcomp_bv |= xfeatures_mask_all;
/*
* This only moves states up in the buffer. Start with
* the last state and move backwards so that states are
* not overwritten until after they are moved. Note:
* memmove() allows overlapping src/dst buffers.
*/
for (i = max_bit; i >= min_bit; i--) {
u8 *xbuf = (u8 *)xstate;
if (!((header->xfeatures >> i) & 1))
continue;
/* Move xfeature 'i' into its normal location */
memmove(xbuf + xstate_comp_offsets[i],
xbuf + xstate_supervisor_only_offsets[i],
xstate_sizes[i]);
}
}
#ifdef CONFIG_PROC_PID_ARCH_STATUS #ifdef CONFIG_PROC_PID_ARCH_STATUS
/* /*
* Report the amount of time elapsed in millisecond since last AVX512 * Report the amount of time elapsed in millisecond since last AVX512

View File

@ -191,7 +191,7 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk); flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
fpu__clear(&tsk->thread.fpu); fpu__clear_all(&tsk->thread.fpu);
} }
void disable_TSC(void) void disable_TSC(void)

View File

@ -732,7 +732,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/* /*
* Ensure the signal handler starts with the new fpu state. * Ensure the signal handler starts with the new fpu state.
*/ */
fpu__clear(fpu); fpu__clear_user_states(fpu);
} }
signal_setup_done(failed, ksig, stepping); signal_setup_done(failed, ksig, stepping);
} }