Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu changes from Ingo Molnar: "There are two main areas of changes: - Rework of the extended FPU state code to robustify the kernel's usage of cpuid provided xstate sizes - and related changes (Dave Hansen)" - math emulation enhancements: new modern FPU instructions support, with testcases, plus cleanups (Denys Vlasnko)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/fpu: Fixup uninitialized feature_name warning x86/fpu/math-emu: Add support for FISTTP instructions x86/fpu/math-emu, selftests: Add test for FISTTP instructions x86/fpu/math-emu: Add support for FCMOVcc insns x86/fpu/math-emu: Add support for F[U]COMI[P] insns x86/fpu/math-emu: Remove define layer for undocumented opcodes x86/fpu/math-emu, selftests: Add tests for FCMOV and FCOMI insns x86/fpu/math-emu: Remove !NO_UNDOC_CODE x86/fpu: Check CPU-provided sizes against struct declarations x86/fpu: Check to ensure increasing-offset xstate offsets x86/fpu: Correct and check XSAVE xstate size calculations x86/fpu: Add C structures for AVX-512 state components x86/fpu: Rework YMM definition x86/fpu/mpx: Rework MPX 'xstate' types x86/fpu: Add xfeature_enabled() helper instead of test_bit() x86/fpu: Remove 'xfeature_nr' x86/fpu: Rework XSTATE_* macros to remove magic '2' x86/fpu: Rename XFEATURES_NR_MAX x86/fpu: Rename XSAVE macros x86/fpu: Remove partial LWP support definitions ...
This commit is contained in:
commit
ce4d72fac1
|
@ -567,7 +567,8 @@ static int __init camellia_aesni_init(void)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -559,7 +559,8 @@ static int __init camellia_aesni_init(void)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -469,7 +469,8 @@ static int __init cast5_init(void)
|
|||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -591,7 +591,8 @@ static int __init cast6_init(void)
|
|||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -130,7 +130,7 @@ static int __init chacha20_simd_mod_init(void)
|
|||
|
||||
#ifdef CONFIG_AS_AVX2
|
||||
chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
|
||||
cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#endif
|
||||
return crypto_register_alg(&alg);
|
||||
}
|
||||
|
|
|
@ -184,7 +184,7 @@ static int __init poly1305_simd_mod_init(void)
|
|||
|
||||
#ifdef CONFIG_AS_AVX2
|
||||
poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
|
||||
cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
|
||||
if (poly1305_use_avx2)
|
||||
alg.descsize += 10 * sizeof(u32);
|
||||
|
|
|
@ -542,7 +542,8 @@ static int __init init(void)
|
|||
pr_info("AVX2 instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -597,7 +597,8 @@ static int __init serpent_init(void)
|
|||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ static struct shash_alg alg = {
|
|||
#ifdef CONFIG_AS_AVX
|
||||
static bool __init avx_usable(void)
|
||||
{
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
|
||||
if (cpu_has_avx)
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return false;
|
||||
|
|
|
@ -130,7 +130,7 @@ static struct shash_alg algs[] = { {
|
|||
#ifdef CONFIG_AS_AVX
|
||||
static bool __init avx_usable(void)
|
||||
{
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
|
||||
if (cpu_has_avx)
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return false;
|
||||
|
|
|
@ -129,7 +129,7 @@ static struct shash_alg algs[] = { {
|
|||
#ifdef CONFIG_AS_AVX
|
||||
static bool __init avx_usable(void)
|
||||
{
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
|
||||
if (cpu_has_avx)
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return false;
|
||||
|
|
|
@ -558,7 +558,7 @@ static int __init twofish_init(void)
|
|||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
|
|
@ -95,63 +95,122 @@ struct swregs_state {
|
|||
/*
|
||||
* List of XSAVE features Linux knows about:
|
||||
*/
|
||||
enum xfeature_bit {
|
||||
XSTATE_BIT_FP,
|
||||
XSTATE_BIT_SSE,
|
||||
XSTATE_BIT_YMM,
|
||||
XSTATE_BIT_BNDREGS,
|
||||
XSTATE_BIT_BNDCSR,
|
||||
XSTATE_BIT_OPMASK,
|
||||
XSTATE_BIT_ZMM_Hi256,
|
||||
XSTATE_BIT_Hi16_ZMM,
|
||||
enum xfeature {
|
||||
XFEATURE_FP,
|
||||
XFEATURE_SSE,
|
||||
/*
|
||||
* Values above here are "legacy states".
|
||||
* Those below are "extended states".
|
||||
*/
|
||||
XFEATURE_YMM,
|
||||
XFEATURE_BNDREGS,
|
||||
XFEATURE_BNDCSR,
|
||||
XFEATURE_OPMASK,
|
||||
XFEATURE_ZMM_Hi256,
|
||||
XFEATURE_Hi16_ZMM,
|
||||
|
||||
XFEATURES_NR_MAX,
|
||||
XFEATURE_MAX,
|
||||
};
|
||||
|
||||
#define XSTATE_FP (1 << XSTATE_BIT_FP)
|
||||
#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
|
||||
#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
|
||||
#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
|
||||
#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
|
||||
#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
|
||||
#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
|
||||
#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
|
||||
#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
|
||||
#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
|
||||
#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
|
||||
#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
|
||||
#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
|
||||
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
|
||||
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
|
||||
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
|
||||
|
||||
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
|
||||
#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
|
||||
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
|
||||
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
|
||||
| XFEATURE_MASK_ZMM_Hi256 \
|
||||
| XFEATURE_MASK_Hi16_ZMM)
|
||||
|
||||
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
|
||||
|
||||
struct reg_128_bit {
|
||||
u8 regbytes[128/8];
|
||||
};
|
||||
struct reg_256_bit {
|
||||
u8 regbytes[256/8];
|
||||
};
|
||||
struct reg_512_bit {
|
||||
u8 regbytes[512/8];
|
||||
};
|
||||
|
||||
/*
|
||||
* State component 2:
|
||||
*
|
||||
* There are 16x 256-bit AVX registers named YMM0-YMM15.
|
||||
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
|
||||
* and are stored in 'struct fxregs_state::xmm_space[]'.
|
||||
* and are stored in 'struct fxregs_state::xmm_space[]' in the
|
||||
* "legacy" area.
|
||||
*
|
||||
* The high 128 bits are stored here:
|
||||
* 16x 128 bits == 256 bytes.
|
||||
* The high 128 bits are stored here.
|
||||
*/
|
||||
struct ymmh_struct {
|
||||
u8 ymmh_space[256];
|
||||
};
|
||||
|
||||
/* We don't support LWP yet: */
|
||||
struct lwp_struct {
|
||||
u8 reserved[128];
|
||||
};
|
||||
struct reg_128_bit hi_ymm[16];
|
||||
} __packed;
|
||||
|
||||
/* Intel MPX support: */
|
||||
struct bndreg {
|
||||
|
||||
struct mpx_bndreg {
|
||||
u64 lower_bound;
|
||||
u64 upper_bound;
|
||||
} __packed;
|
||||
/*
|
||||
* State component 3 is used for the 4 128-bit bounds registers
|
||||
*/
|
||||
struct mpx_bndreg_state {
|
||||
struct mpx_bndreg bndreg[4];
|
||||
} __packed;
|
||||
|
||||
struct bndcsr {
|
||||
/*
|
||||
* State component 4 is used for the 64-bit user-mode MPX
|
||||
* configuration register BNDCFGU and the 64-bit MPX status
|
||||
* register BNDSTATUS. We call the pair "BNDCSR".
|
||||
*/
|
||||
struct mpx_bndcsr {
|
||||
u64 bndcfgu;
|
||||
u64 bndstatus;
|
||||
} __packed;
|
||||
|
||||
struct mpx_struct {
|
||||
struct bndreg bndreg[4];
|
||||
struct bndcsr bndcsr;
|
||||
};
|
||||
/*
|
||||
* The BNDCSR state is padded out to be 64-bytes in size.
|
||||
*/
|
||||
struct mpx_bndcsr_state {
|
||||
union {
|
||||
struct mpx_bndcsr bndcsr;
|
||||
u8 pad_to_64_bytes[64];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* AVX-512 Components: */
|
||||
|
||||
/*
|
||||
* State component 5 is used for the 8 64-bit opmask registers
|
||||
* k0-k7 (opmask state).
|
||||
*/
|
||||
struct avx_512_opmask_state {
|
||||
u64 opmask_reg[8];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 6 is used for the upper 256 bits of the
|
||||
* registers ZMM0-ZMM15. These 16 256-bit values are denoted
|
||||
* ZMM0_H-ZMM15_H (ZMM_Hi256 state).
|
||||
*/
|
||||
struct avx_512_zmm_uppers_state {
|
||||
struct reg_256_bit zmm_upper[16];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 7 is used for the 16 512-bit registers
|
||||
* ZMM16-ZMM31 (Hi16_ZMM state).
|
||||
*/
|
||||
struct avx_512_hi16_state {
|
||||
struct reg_512_bit hi16_zmm[16];
|
||||
} __packed;
|
||||
|
||||
struct xstate_header {
|
||||
u64 xfeatures;
|
||||
|
@ -159,22 +218,19 @@ struct xstate_header {
|
|||
u64 reserved[6];
|
||||
} __attribute__((packed));
|
||||
|
||||
/* New processor state extensions should be added here: */
|
||||
#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
|
||||
sizeof(struct lwp_struct) + \
|
||||
sizeof(struct mpx_struct) )
|
||||
/*
|
||||
* This is our most modern FPU state format, as saved by the XSAVE
|
||||
* and restored by the XRSTOR instructions.
|
||||
*
|
||||
* It consists of a legacy fxregs portion, an xstate header and
|
||||
* subsequent fixed size areas as defined by the xstate header.
|
||||
* Not all CPUs support all the extensions.
|
||||
* subsequent areas as defined by the xstate header. Not all CPUs
|
||||
* support all the extensions, so the size of the extended area
|
||||
* can vary quite a bit between CPUs.
|
||||
*/
|
||||
struct xregs_state {
|
||||
struct fxregs_state i387;
|
||||
struct xstate_header header;
|
||||
u8 __reserved[XSTATE_RESERVE];
|
||||
u8 extended_state_area[0];
|
||||
} __attribute__ ((packed, aligned (64)));
|
||||
|
||||
/*
|
||||
|
@ -182,7 +238,9 @@ struct xregs_state {
|
|||
* put together, so that we can pick the right one runtime.
|
||||
*
|
||||
* The size of the structure is determined by the largest
|
||||
* member - which is the xsave area:
|
||||
* member - which is the xsave area. The padding is there
|
||||
* to ensure that statically-allocated task_structs (just
|
||||
* the init_task today) have enough space.
|
||||
*/
|
||||
union fpregs_state {
|
||||
struct fregs_state fsave;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
|
||||
/* Bit 63 of XCR0 is reserved for future expansion */
|
||||
#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
|
||||
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
|
||||
|
||||
#define XSTATE_CPUID 0x0000000d
|
||||
|
||||
|
@ -19,14 +19,18 @@
|
|||
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
|
||||
|
||||
/* Supported features which support lazy state saving */
|
||||
#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
|
||||
| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
|
||||
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
|
||||
XFEATURE_MASK_SSE | \
|
||||
XFEATURE_MASK_YMM | \
|
||||
XFEATURE_MASK_OPMASK | \
|
||||
XFEATURE_MASK_ZMM_Hi256 | \
|
||||
XFEATURE_MASK_Hi16_ZMM)
|
||||
|
||||
/* Supported features which require eager state saving */
|
||||
#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
|
||||
#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
|
||||
|
||||
/* All currently supported features */
|
||||
#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
|
||||
#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define REX_PREFIX "0x48, "
|
||||
|
@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
|||
|
||||
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
|
||||
|
||||
void fpu__xstate_clear_all_cpu_caps(void);
|
||||
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
|
||||
const void *get_xsave_field_ptr(int xstate_field);
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
TRACE_EVENT(mpx_bounds_register_exception,
|
||||
|
||||
TP_PROTO(void *addr_referenced,
|
||||
const struct bndreg *bndreg),
|
||||
const struct mpx_bndreg *bndreg),
|
||||
TP_ARGS(addr_referenced, bndreg),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
|
@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception,
|
|||
|
||||
TRACE_EVENT(bounds_exception_mpx,
|
||||
|
||||
TP_PROTO(const struct bndcsr *bndcsr),
|
||||
TP_PROTO(const struct mpx_bndcsr *bndcsr),
|
||||
TP_ARGS(bndcsr),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
|
@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table,
|
|||
/*
|
||||
* This gets used outside of MPX-specific code, so we need a stub.
|
||||
*/
|
||||
static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
|
||||
static inline
|
||||
void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -290,11 +290,11 @@ static void __init fpu__init_system_ctx_switch(void)
|
|||
if (cpu_has_xsaveopt && eagerfpu != DISABLE)
|
||||
eagerfpu = ENABLE;
|
||||
|
||||
if (xfeatures_mask & XSTATE_EAGER) {
|
||||
if (xfeatures_mask & XFEATURE_MASK_EAGER) {
|
||||
if (eagerfpu == DISABLE) {
|
||||
pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
|
||||
xfeatures_mask & XSTATE_EAGER);
|
||||
xfeatures_mask &= ~XSTATE_EAGER;
|
||||
xfeatures_mask & XFEATURE_MASK_EAGER);
|
||||
xfeatures_mask &= ~XFEATURE_MASK_EAGER;
|
||||
} else {
|
||||
eagerfpu = ENABLE;
|
||||
}
|
||||
|
@ -354,17 +354,7 @@ static int __init x86_noxsave_setup(char *s)
|
|||
if (strlen(s))
|
||||
return 0;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
fpu__xstate_clear_all_cpu_caps();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
|
|||
* presence of FP and SSE state.
|
||||
*/
|
||||
if (cpu_has_xsave)
|
||||
fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE;
|
||||
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -326,7 +326,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
|
|||
* presence of FP.
|
||||
*/
|
||||
if (cpu_has_xsave)
|
||||
fpu->state.xsave.header.xfeatures |= XSTATE_FP;
|
||||
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
|
|||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
xfeatures |= XSTATE_FPSSE;
|
||||
xfeatures |= XFEATURE_MASK_FPSSE;
|
||||
|
||||
err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures);
|
||||
|
||||
|
@ -207,7 +207,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
|
|||
* layout and not enabled by the OS.
|
||||
*/
|
||||
if (fx_only)
|
||||
header->xfeatures = XSTATE_FPSSE;
|
||||
header->xfeatures = XFEATURE_MASK_FPSSE;
|
||||
else
|
||||
header->xfeatures &= (xfeatures_mask & xfeatures);
|
||||
}
|
||||
|
@ -230,7 +230,7 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_
|
|||
{
|
||||
if (use_xsave()) {
|
||||
if ((unsigned long)buf % 64 || fx_only) {
|
||||
u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE;
|
||||
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
|
||||
return copy_user_to_fxregs(buf);
|
||||
} else {
|
||||
|
|
|
@ -31,12 +31,28 @@ static const char *xfeature_names[] =
|
|||
*/
|
||||
u64 xfeatures_mask __read_mostly;
|
||||
|
||||
static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
|
||||
static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
|
||||
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
|
||||
|
||||
/* The number of supported xfeatures in xfeatures_mask: */
|
||||
static unsigned int xfeatures_nr;
|
||||
/*
|
||||
* Clear all of the X86_FEATURE_* bits that are unavailable
|
||||
* when the CPU has no XSAVE support.
|
||||
*/
|
||||
void fpu__xstate_clear_all_cpu_caps(void)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return whether the system supports a given xfeature.
|
||||
|
@ -53,7 +69,7 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
|
|||
/*
|
||||
* So we use FLS here to be able to print the most advanced
|
||||
* feature that was requested but is missing. So if a driver
|
||||
* asks about "XSTATE_SSE | XSTATE_YMM" we'll print the
|
||||
* asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
|
||||
* missing AVX feature - this is the most informative message
|
||||
* to users:
|
||||
*/
|
||||
|
@ -112,7 +128,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
|
|||
/*
|
||||
* FP is in init state
|
||||
*/
|
||||
if (!(xfeatures & XSTATE_FP)) {
|
||||
if (!(xfeatures & XFEATURE_MASK_FP)) {
|
||||
fx->cwd = 0x37f;
|
||||
fx->swd = 0;
|
||||
fx->twd = 0;
|
||||
|
@ -125,7 +141,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
|
|||
/*
|
||||
* SSE is in init state
|
||||
*/
|
||||
if (!(xfeatures & XSTATE_SSE))
|
||||
if (!(xfeatures & XFEATURE_MASK_SSE))
|
||||
memset(&fx->xmm_space[0], 0, 256);
|
||||
|
||||
/*
|
||||
|
@ -168,26 +184,44 @@ void fpu__init_cpu_xstate(void)
|
|||
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that in the future we will likely need a pair of
|
||||
* functions here: one for user xstates and the other for
|
||||
* system xstates. For now, they are the same.
|
||||
*/
|
||||
static int xfeature_enabled(enum xfeature xfeature)
|
||||
{
|
||||
return !!(xfeatures_mask & (1UL << xfeature));
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the offsets and sizes of various xstates contained
|
||||
* in the XSAVE state memory layout.
|
||||
*
|
||||
* ( Note that certain features might be non-present, for them
|
||||
* we'll have 0 offset and 0 size. )
|
||||
*/
|
||||
static void __init setup_xstate_features(void)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx, leaf;
|
||||
u32 eax, ebx, ecx, edx, i;
|
||||
/* start at the beginnning of the "extended state" */
|
||||
unsigned int last_good_offset = offsetof(struct xregs_state,
|
||||
extended_state_area);
|
||||
|
||||
xfeatures_nr = fls64(xfeatures_mask);
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if (!xfeature_enabled(i))
|
||||
continue;
|
||||
|
||||
for (leaf = 2; leaf < xfeatures_nr; leaf++) {
|
||||
cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
|
||||
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
|
||||
xstate_offsets[i] = ebx;
|
||||
xstate_sizes[i] = eax;
|
||||
/*
|
||||
* In our xstate size checks, we assume that the
|
||||
* highest-numbered xstate feature has the
|
||||
* highest offset in the buffer. Ensure it does.
|
||||
*/
|
||||
WARN_ONCE(last_good_offset > xstate_offsets[i],
|
||||
"x86/fpu: misordered xstate at %d\n", last_good_offset);
|
||||
last_good_offset = xstate_offsets[i];
|
||||
|
||||
xstate_offsets[leaf] = ebx;
|
||||
xstate_sizes[leaf] = eax;
|
||||
|
||||
printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax);
|
||||
printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,14 +238,14 @@ static void __init print_xstate_feature(u64 xstate_mask)
|
|||
*/
|
||||
static void __init print_xstate_features(void)
|
||||
{
|
||||
print_xstate_feature(XSTATE_FP);
|
||||
print_xstate_feature(XSTATE_SSE);
|
||||
print_xstate_feature(XSTATE_YMM);
|
||||
print_xstate_feature(XSTATE_BNDREGS);
|
||||
print_xstate_feature(XSTATE_BNDCSR);
|
||||
print_xstate_feature(XSTATE_OPMASK);
|
||||
print_xstate_feature(XSTATE_ZMM_Hi256);
|
||||
print_xstate_feature(XSTATE_Hi16_ZMM);
|
||||
print_xstate_feature(XFEATURE_MASK_FP);
|
||||
print_xstate_feature(XFEATURE_MASK_SSE);
|
||||
print_xstate_feature(XFEATURE_MASK_YMM);
|
||||
print_xstate_feature(XFEATURE_MASK_BNDREGS);
|
||||
print_xstate_feature(XFEATURE_MASK_BNDCSR);
|
||||
print_xstate_feature(XFEATURE_MASK_OPMASK);
|
||||
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
|
||||
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -233,8 +267,8 @@ static void __init setup_xstate_comp(void)
|
|||
xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
|
||||
|
||||
if (!cpu_has_xsaves) {
|
||||
for (i = 2; i < xfeatures_nr; i++) {
|
||||
if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if (xfeature_enabled(i)) {
|
||||
xstate_comp_offsets[i] = xstate_offsets[i];
|
||||
xstate_comp_sizes[i] = xstate_sizes[i];
|
||||
}
|
||||
|
@ -242,15 +276,16 @@ static void __init setup_xstate_comp(void)
|
|||
return;
|
||||
}
|
||||
|
||||
xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
|
||||
FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
|
||||
for (i = 2; i < xfeatures_nr; i++) {
|
||||
if (test_bit(i, (unsigned long *)&xfeatures_mask))
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if (xfeature_enabled(i))
|
||||
xstate_comp_sizes[i] = xstate_sizes[i];
|
||||
else
|
||||
xstate_comp_sizes[i] = 0;
|
||||
|
||||
if (i > 2)
|
||||
if (i > FIRST_EXTENDED_XFEATURE)
|
||||
xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
|
||||
+ xstate_comp_sizes[i-1];
|
||||
|
||||
|
@ -290,27 +325,280 @@ static void __init setup_init_fpu_buf(void)
|
|||
copy_xregs_to_kernel_booting(&init_fpstate.xsave);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate total size of enabled xstates in XCR0/xfeatures_mask.
|
||||
*/
|
||||
static void __init init_xstate_size(void)
|
||||
static int xfeature_is_supervisor(int xfeature_nr)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
/*
|
||||
* We currently do not support supervisor states, but if
|
||||
* we did, we could find out like this.
|
||||
*
|
||||
* SDM says: If state component i is a user state component,
|
||||
* ECX[0] return 0; if state component i is a supervisor
|
||||
* state component, ECX[0] returns 1.
|
||||
u32 eax, ebx, ecx, edx;
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx;
|
||||
return !!(ecx & 1);
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
static int xfeature_is_user(int xfeature_nr)
|
||||
{
|
||||
return !xfeature_is_supervisor(xfeature_nr);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* This check is important because it is easy to get XSTATE_*
|
||||
* confused with XSTATE_BIT_*.
|
||||
*/
|
||||
#define CHECK_XFEATURE(nr) do { \
|
||||
WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
|
||||
WARN_ON(nr >= XFEATURE_MAX); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We could cache this like xstate_size[], but we only use
|
||||
* it here, so it would be a waste of space.
|
||||
*/
|
||||
static int xfeature_is_aligned(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
CHECK_XFEATURE(xfeature_nr);
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
|
||||
/*
|
||||
* The value returned by ECX[1] indicates the alignment
|
||||
* of state component i when the compacted format
|
||||
* of the extended region of an XSAVE area is used
|
||||
*/
|
||||
return !!(ecx & 2);
|
||||
}
|
||||
|
||||
static int xfeature_uncompacted_offset(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
CHECK_XFEATURE(xfeature_nr);
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
|
||||
return ebx;
|
||||
}
|
||||
|
||||
static int xfeature_size(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
CHECK_XFEATURE(xfeature_nr);
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
|
||||
return eax;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'XSAVES' implies two different things:
|
||||
* 1. saving of supervisor/system state
|
||||
* 2. using the compacted format
|
||||
*
|
||||
* Use this function when dealing with the compacted format so
|
||||
* that it is obvious which aspect of 'XSAVES' is being handled
|
||||
* by the calling code.
|
||||
*/
|
||||
static int using_compacted_format(void)
|
||||
{
|
||||
return cpu_has_xsaves;
|
||||
}
|
||||
|
||||
static void __xstate_dump_leaves(void)
|
||||
{
|
||||
int i;
|
||||
u32 eax, ebx, ecx, edx;
|
||||
static int should_dump = 1;
|
||||
|
||||
if (!should_dump)
|
||||
return;
|
||||
should_dump = 0;
|
||||
/*
|
||||
* Dump out a few leaves past the ones that we support
|
||||
* just in case there are some goodies up there
|
||||
*/
|
||||
for (i = 0; i < XFEATURE_MAX + 10; i++) {
|
||||
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
|
||||
pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
|
||||
XSTATE_CPUID, i, eax, ebx, ecx, edx);
|
||||
}
|
||||
}
|
||||
|
||||
#define XSTATE_WARN_ON(x) do { \
|
||||
if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \
|
||||
__xstate_dump_leaves(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \
|
||||
if ((nr == nr_macro) && \
|
||||
WARN_ONCE(sz != sizeof(__struct), \
|
||||
"%s: struct is %zu bytes, cpu state %d bytes\n", \
|
||||
__stringify(nr_macro), sizeof(__struct), sz)) { \
|
||||
__xstate_dump_leaves(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We have a C struct for each 'xstate'. We need to ensure
|
||||
* that our software representation matches what the CPU
|
||||
* tells us about the state's size.
|
||||
*/
|
||||
static void check_xstate_against_struct(int nr)
|
||||
{
|
||||
/*
|
||||
* Ask the CPU for the size of the state.
|
||||
*/
|
||||
int sz = xfeature_size(nr);
|
||||
/*
|
||||
* Match each CPU state with the corresponding software
|
||||
* structure.
|
||||
*/
|
||||
XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
|
||||
|
||||
/*
|
||||
* Make *SURE* to add any feature numbers in below if
|
||||
* there are "holes" in the xsave state component
|
||||
* numbers.
|
||||
*/
|
||||
if ((nr < XFEATURE_YMM) ||
|
||||
(nr >= XFEATURE_MAX)) {
|
||||
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
|
||||
XSTATE_WARN_ON(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This essentially double-checks what the cpu told us about
|
||||
* how large the XSAVE buffer needs to be. We are recalculating
|
||||
* it to be safe.
|
||||
*/
|
||||
static void do_extra_xstate_size_checks(void)
|
||||
{
|
||||
int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
int i;
|
||||
|
||||
if (!cpu_has_xsaves) {
|
||||
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||||
xstate_size = ebx;
|
||||
return;
|
||||
}
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if (!xfeature_enabled(i))
|
||||
continue;
|
||||
|
||||
xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
for (i = 2; i < 64; i++) {
|
||||
if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
|
||||
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
|
||||
xstate_size += eax;
|
||||
}
|
||||
check_xstate_against_struct(i);
|
||||
/*
|
||||
* Supervisor state components can be managed only by
|
||||
* XSAVES, which is compacted-format only.
|
||||
*/
|
||||
if (!using_compacted_format())
|
||||
XSTATE_WARN_ON(xfeature_is_supervisor(i));
|
||||
|
||||
/* Align from the end of the previous feature */
|
||||
if (xfeature_is_aligned(i))
|
||||
paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64);
|
||||
/*
|
||||
* The offset of a given state in the non-compacted
|
||||
* format is given to us in a CPUID leaf. We check
|
||||
* them for being ordered (increasing offsets) in
|
||||
* setup_xstate_features().
|
||||
*/
|
||||
if (!using_compacted_format())
|
||||
paranoid_xstate_size = xfeature_uncompacted_offset(i);
|
||||
/*
|
||||
* The compacted-format offset always depends on where
|
||||
* the previous state ended.
|
||||
*/
|
||||
paranoid_xstate_size += xfeature_size(i);
|
||||
}
|
||||
XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate total size of enabled xstates in XCR0/xfeatures_mask.
|
||||
*
|
||||
* Note the SDM's wording here. "sub-function 0" only enumerates
|
||||
* the size of the *user* states. If we use it to size a buffer
|
||||
* that we use 'XSAVES' on, we could potentially overflow the
|
||||
* buffer because 'XSAVES' saves system states too.
|
||||
*
|
||||
* Note that we do not currently set any bits on IA32_XSS so
|
||||
* 'XCR0 | IA32_XSS == XCR0' for now.
|
||||
*/
|
||||
static unsigned int __init calculate_xstate_size(void)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
unsigned int calculated_xstate_size;
|
||||
|
||||
if (!cpu_has_xsaves) {
|
||||
/*
|
||||
* - CPUID function 0DH, sub-function 0:
|
||||
* EBX enumerates the size (in bytes) required by
|
||||
* the XSAVE instruction for an XSAVE area
|
||||
* containing all the *user* state components
|
||||
* corresponding to bits currently set in XCR0.
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||||
calculated_xstate_size = ebx;
|
||||
} else {
|
||||
/*
|
||||
* - CPUID function 0DH, sub-function 1:
|
||||
* EBX enumerates the size (in bytes) required by
|
||||
* the XSAVES instruction for an XSAVE area
|
||||
* containing all the state components
|
||||
* corresponding to bits currently set in
|
||||
* XCR0 | IA32_XSS.
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
|
||||
calculated_xstate_size = ebx;
|
||||
}
|
||||
return calculated_xstate_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Will the runtime-enumerated 'xstate_size' fit in the init
|
||||
* task's statically-allocated buffer?
|
||||
*/
|
||||
static bool is_supported_xstate_size(unsigned int test_xstate_size)
|
||||
{
|
||||
if (test_xstate_size <= sizeof(union fpregs_state))
|
||||
return true;
|
||||
|
||||
pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n",
|
||||
sizeof(union fpregs_state), test_xstate_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
static int init_xstate_size(void)
|
||||
{
|
||||
/* Recompute the context size for enabled features: */
|
||||
unsigned int possible_xstate_size = calculate_xstate_size();
|
||||
|
||||
/* Ensure we have the space to store all enabled: */
|
||||
if (!is_supported_xstate_size(possible_xstate_size))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The size is OK, we are definitely going to use xsave,
|
||||
* make it known to the world that we need more space.
|
||||
*/
|
||||
xstate_size = possible_xstate_size;
|
||||
do_extra_xstate_size_checks();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We enabled the XSAVE hardware, but something went wrong and
|
||||
* we can not use it. Disable it.
|
||||
*/
|
||||
static void fpu__init_disable_system_xstate(void)
|
||||
{
|
||||
xfeatures_mask = 0;
|
||||
cr4_clear_bits(X86_CR4_OSXSAVE);
|
||||
fpu__xstate_clear_all_cpu_caps();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -321,6 +609,7 @@ void __init fpu__init_system_xstate(void)
|
|||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
static int on_boot_cpu = 1;
|
||||
int err;
|
||||
|
||||
WARN_ON_FPU(!on_boot_cpu);
|
||||
on_boot_cpu = 0;
|
||||
|
@ -338,7 +627,7 @@ void __init fpu__init_system_xstate(void)
|
|||
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||||
xfeatures_mask = eax + ((u64)edx << 32);
|
||||
|
||||
if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
|
||||
if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
|
||||
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
|
||||
BUG();
|
||||
}
|
||||
|
@ -348,16 +637,19 @@ void __init fpu__init_system_xstate(void)
|
|||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
fpu__init_cpu_xstate();
|
||||
|
||||
/* Recompute the context size for enabled features: */
|
||||
init_xstate_size();
|
||||
err = init_xstate_size();
|
||||
if (err) {
|
||||
/* something went wrong, boot without any XSAVE support */
|
||||
fpu__init_disable_system_xstate();
|
||||
return;
|
||||
}
|
||||
|
||||
update_regset_xstate_info(xstate_size, xfeatures_mask);
|
||||
fpu__init_prepare_fx_sw_frame();
|
||||
setup_init_fpu_buf();
|
||||
setup_xstate_comp();
|
||||
|
||||
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n",
|
||||
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
|
||||
xfeatures_mask,
|
||||
xstate_size,
|
||||
cpu_has_xsaves ? "compacted" : "standard");
|
||||
|
@ -388,7 +680,7 @@ void fpu__resume_cpu(void)
|
|||
* Inputs:
|
||||
* xstate: the thread's storage area for all FPU data
|
||||
* xstate_feature: state which is defined in xsave.h (e.g.
|
||||
* XSTATE_FP, XSTATE_SSE, etc...)
|
||||
* XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
|
||||
* Output:
|
||||
* address of the state in the xsave area, or NULL if the
|
||||
* field is not present in the xsave buffer.
|
||||
|
@ -439,8 +731,8 @@ EXPORT_SYMBOL_GPL(get_xsave_addr);
|
|||
* Note that this only works on the current task.
|
||||
*
|
||||
* Inputs:
|
||||
* @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP,
|
||||
* XSTATE_SSE, etc...)
|
||||
* @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
|
||||
* XFEATURE_MASK_SSE, etc...)
|
||||
* Output:
|
||||
* address of the state in the xsave area or NULL if the state
|
||||
* is not present or is in its 'init state'.
|
||||
|
|
|
@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
|||
|
||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
const struct bndcsr *bndcsr;
|
||||
const struct mpx_bndcsr *bndcsr;
|
||||
siginfo_t *info;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
|||
* which is all zeros which indicates MPX was not
|
||||
* responsible for the exception.
|
||||
*/
|
||||
bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
|
||||
bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
|
||||
if (!bndcsr)
|
||||
goto exit_trap;
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
|
|||
int feature_bit = 0;
|
||||
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
|
||||
|
||||
xstate_bv &= XSTATE_EXTEND_MASK;
|
||||
xstate_bv &= XFEATURE_MASK_EXTEND;
|
||||
while (xstate_bv) {
|
||||
if (xstate_bv & 0x1) {
|
||||
u32 eax, ebx, ecx, edx, offset;
|
||||
|
@ -51,7 +51,7 @@ u64 kvm_supported_xcr0(void)
|
|||
u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
|
||||
|
||||
if (!kvm_x86_ops->mpx_supported())
|
||||
xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
|
||||
xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
|
||||
|
||||
return xcr0;
|
||||
}
|
||||
|
|
|
@ -663,9 +663,9 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
|
|||
/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
|
||||
if (index != XCR_XFEATURE_ENABLED_MASK)
|
||||
return 1;
|
||||
if (!(xcr0 & XSTATE_FP))
|
||||
if (!(xcr0 & XFEATURE_MASK_FP))
|
||||
return 1;
|
||||
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
|
||||
if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
|
@ -673,23 +673,24 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
|
|||
* saving. However, xcr0 bit 0 is always set, even if the
|
||||
* emulated CPU does not support XSAVE (see fx_init).
|
||||
*/
|
||||
valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
|
||||
valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
|
||||
if (xcr0 & ~valid_bits)
|
||||
return 1;
|
||||
|
||||
if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
|
||||
if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
|
||||
(!(xcr0 & XFEATURE_MASK_BNDCSR)))
|
||||
return 1;
|
||||
|
||||
if (xcr0 & XSTATE_AVX512) {
|
||||
if (!(xcr0 & XSTATE_YMM))
|
||||
if (xcr0 & XFEATURE_MASK_AVX512) {
|
||||
if (!(xcr0 & XFEATURE_MASK_YMM))
|
||||
return 1;
|
||||
if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
|
||||
if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
|
||||
return 1;
|
||||
}
|
||||
kvm_put_guest_xcr0(vcpu);
|
||||
vcpu->arch.xcr0 = xcr0;
|
||||
|
||||
if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
|
||||
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
|
||||
kvm_update_cpuid(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
@ -2905,7 +2906,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
|
|||
* Copy each region from the possibly compacted offset to the
|
||||
* non-compacted offset.
|
||||
*/
|
||||
valid = xstate_bv & ~XSTATE_FPSSE;
|
||||
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
|
||||
while (valid) {
|
||||
u64 feature = valid & -valid;
|
||||
int index = fls64(feature) - 1;
|
||||
|
@ -2943,7 +2944,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
|
|||
* Copy each region from the non-compacted offset to the
|
||||
* possibly compacted offset.
|
||||
*/
|
||||
valid = xstate_bv & ~XSTATE_FPSSE;
|
||||
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
|
||||
while (valid) {
|
||||
u64 feature = valid & -valid;
|
||||
int index = fls64(feature) - 1;
|
||||
|
@ -2971,7 +2972,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
|
|||
&vcpu->arch.guest_fpu.state.fxsave,
|
||||
sizeof(struct fxregs_state));
|
||||
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
|
||||
XSTATE_FPSSE;
|
||||
XFEATURE_MASK_FPSSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2991,7 +2992,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
|
|||
return -EINVAL;
|
||||
load_xsave(vcpu, (u8 *)guest_xsave->region);
|
||||
} else {
|
||||
if (xstate_bv & ~XSTATE_FPSSE)
|
||||
if (xstate_bv & ~XFEATURE_MASK_FPSSE)
|
||||
return -EINVAL;
|
||||
memcpy(&vcpu->arch.guest_fpu.state.fxsave,
|
||||
guest_xsave->region, sizeof(struct fxregs_state));
|
||||
|
@ -7005,7 +7006,7 @@ static void fx_init(struct kvm_vcpu *vcpu)
|
|||
/*
|
||||
* Ensure guest xcr0 is valid for loading
|
||||
*/
|
||||
vcpu->arch.xcr0 = XSTATE_FP;
|
||||
vcpu->arch.xcr0 = XFEATURE_MASK_FP;
|
||||
|
||||
vcpu->arch.cr0 |= X86_CR0_ET;
|
||||
}
|
||||
|
|
|
@ -180,9 +180,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
|||
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
int page_num);
|
||||
|
||||
#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
|
||||
| XSTATE_BNDREGS | XSTATE_BNDCSR \
|
||||
| XSTATE_AVX512)
|
||||
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
|
||||
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
|
||||
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512)
|
||||
extern u64 host_xcr0;
|
||||
|
||||
extern u64 kvm_supported_xcr0(void);
|
||||
|
|
|
@ -169,6 +169,76 @@ void fxch_i(void)
|
|||
fpu_tag_word = tag_word;
|
||||
}
|
||||
|
||||
static void fcmovCC(void)
|
||||
{
|
||||
/* fcmovCC st(i) */
|
||||
int i = FPU_rm;
|
||||
FPU_REG *st0_ptr = &st(0);
|
||||
FPU_REG *sti_ptr = &st(i);
|
||||
long tag_word = fpu_tag_word;
|
||||
int regnr = top & 7;
|
||||
int regnri = (top + i) & 7;
|
||||
u_char sti_tag = (tag_word >> (regnri * 2)) & 3;
|
||||
|
||||
if (sti_tag == TAG_Empty) {
|
||||
FPU_stack_underflow();
|
||||
clear_C1();
|
||||
return;
|
||||
}
|
||||
reg_copy(sti_ptr, st0_ptr);
|
||||
tag_word &= ~(3 << (regnr * 2));
|
||||
tag_word |= (sti_tag << (regnr * 2));
|
||||
fpu_tag_word = tag_word;
|
||||
}
|
||||
|
||||
void fcmovb(void)
|
||||
{
|
||||
if (FPU_EFLAGS & X86_EFLAGS_CF)
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmove(void)
|
||||
{
|
||||
if (FPU_EFLAGS & X86_EFLAGS_ZF)
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmovbe(void)
|
||||
{
|
||||
if (FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF))
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmovu(void)
|
||||
{
|
||||
if (FPU_EFLAGS & X86_EFLAGS_PF)
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmovnb(void)
|
||||
{
|
||||
if (!(FPU_EFLAGS & X86_EFLAGS_CF))
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmovne(void)
|
||||
{
|
||||
if (!(FPU_EFLAGS & X86_EFLAGS_ZF))
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmovnbe(void)
|
||||
{
|
||||
if (!(FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF)))
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void fcmovnu(void)
|
||||
{
|
||||
if (!(FPU_EFLAGS & X86_EFLAGS_PF))
|
||||
fcmovCC();
|
||||
}
|
||||
|
||||
void ffree_(void)
|
||||
{
|
||||
/* ffree st(i) */
|
||||
|
|
|
@ -40,49 +40,33 @@
|
|||
|
||||
#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
|
||||
|
||||
#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
|
||||
/* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */
|
||||
|
||||
/* WARNING: These codes are not documented by Intel in their 80486 manual
|
||||
and may not work on FPU clones or later Intel FPUs. */
|
||||
|
||||
/* Changes to support the un-doc codes provided by Linus Torvalds. */
|
||||
|
||||
#define _d9_d8_ fstp_i /* unofficial code (19) */
|
||||
#define _dc_d0_ fcom_st /* unofficial code (14) */
|
||||
#define _dc_d8_ fcompst /* unofficial code (1c) */
|
||||
#define _dd_c8_ fxch_i /* unofficial code (0d) */
|
||||
#define _de_d0_ fcompst /* unofficial code (16) */
|
||||
#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
|
||||
#define _df_c8_ fxch_i /* unofficial code (0f) */
|
||||
#define _df_d0_ fstp_i /* unofficial code (17) */
|
||||
#define _df_d8_ fstp_i /* unofficial code (1f) */
|
||||
/* WARNING: "u" entries are not documented by Intel in their 80486 manual
|
||||
and may not work on FPU clones or later Intel FPUs.
|
||||
Changes to support them provided by Linus Torvalds. */
|
||||
|
||||
static FUNC const st_instr_table[64] = {
|
||||
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
|
||||
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
|
||||
fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
|
||||
fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
|
||||
fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
|
||||
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
|
||||
fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
|
||||
fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
|
||||
/* Opcode: d8 d9 da db */
|
||||
/* dc dd de df */
|
||||
/* c0..7 */ fadd__, fld_i_, fcmovb, fcmovnb,
|
||||
/* c0..7 */ fadd_i, ffree_, faddp_, ffreep,/*u*/
|
||||
/* c8..f */ fmul__, fxch_i, fcmove, fcmovne,
|
||||
/* c8..f */ fmul_i, fxch_i,/*u*/ fmulp_, fxch_i,/*u*/
|
||||
/* d0..7 */ fcom_st, fp_nop, fcmovbe, fcmovnbe,
|
||||
/* d0..7 */ fcom_st,/*u*/ fst_i_, fcompst,/*u*/ fstp_i,/*u*/
|
||||
/* d8..f */ fcompst, fstp_i,/*u*/ fcmovu, fcmovnu,
|
||||
/* d8..f */ fcompst,/*u*/ fstp_i, fcompp, fstp_i,/*u*/
|
||||
/* e0..7 */ fsub__, FPU_etc, __BAD__, finit_,
|
||||
/* e0..7 */ fsubri, fucom_, fsubrp, fstsw_,
|
||||
/* e8..f */ fsubr_, fconst, fucompp, fucomi_,
|
||||
/* e8..f */ fsub_i, fucomp, fsubp_, fucomip,
|
||||
/* f0..7 */ fdiv__, FPU_triga, __BAD__, fcomi_,
|
||||
/* f0..7 */ fdivri, __BAD__, fdivrp, fcomip,
|
||||
/* f8..f */ fdivr_, FPU_trigb, __BAD__, __BAD__,
|
||||
/* f8..f */ fdiv_i, __BAD__, fdivp_, __BAD__,
|
||||
};
|
||||
|
||||
#else /* Support only documented FPU op-codes */
|
||||
|
||||
static FUNC const st_instr_table[64] = {
|
||||
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
|
||||
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
|
||||
fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
|
||||
fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
|
||||
fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
|
||||
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
|
||||
fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
|
||||
fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
|
||||
};
|
||||
|
||||
#endif /* NO_UNDOC_CODE */
|
||||
|
||||
#define _NONE_ 0 /* Take no special action */
|
||||
#define _REG0_ 1 /* Need to check for not empty st(0) */
|
||||
#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
|
||||
|
@ -94,36 +78,18 @@ static FUNC const st_instr_table[64] = {
|
|||
#define _REGIc 0 /* Compare st(0) and st(rm) */
|
||||
#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
|
||||
|
||||
#ifndef NO_UNDOC_CODE
|
||||
|
||||
/* Un-documented FPU op-codes supported by default. (see above) */
|
||||
|
||||
static u_char const type_table[64] = {
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
|
||||
_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
|
||||
_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
|
||||
_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
|
||||
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
|
||||
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
|
||||
/* Opcode: d8 d9 da db dc dd de df */
|
||||
/* c0..7 */ _REGI_, _NONE_, _REGIn, _REGIn, _REGIi, _REGi_, _REGIp, _REGi_,
|
||||
/* c8..f */ _REGI_, _REGIn, _REGIn, _REGIn, _REGIi, _REGI_, _REGIp, _REGI_,
|
||||
/* d0..7 */ _REGIc, _NONE_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_,
|
||||
/* d8..f */ _REGIc, _REG0_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_,
|
||||
/* e0..7 */ _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
|
||||
/* e8..f */ _REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc,
|
||||
/* f0..7 */ _REGI_, _NONE_, _null_, _REGIc, _REGIi, _null_, _REGIp, _REGIc,
|
||||
/* f8..f */ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
};
|
||||
|
||||
#else /* Support only documented FPU op-codes */
|
||||
|
||||
static u_char const type_table[64] = {
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
|
||||
_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
|
||||
_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
|
||||
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
|
||||
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
|
||||
};
|
||||
|
||||
#endif /* NO_UNDOC_CODE */
|
||||
|
||||
#ifdef RE_ENTRANT_CHECKING
|
||||
u_char emulating = 0;
|
||||
#endif /* RE_ENTRANT_CHECKING */
|
||||
|
|
|
@ -46,6 +46,14 @@ extern void fstsw_(void);
|
|||
extern void fp_nop(void);
|
||||
extern void fld_i_(void);
|
||||
extern void fxch_i(void);
|
||||
extern void fcmovb(void);
|
||||
extern void fcmove(void);
|
||||
extern void fcmovbe(void);
|
||||
extern void fcmovu(void);
|
||||
extern void fcmovnb(void);
|
||||
extern void fcmovne(void);
|
||||
extern void fcmovnbe(void);
|
||||
extern void fcmovnu(void);
|
||||
extern void ffree_(void);
|
||||
extern void ffreep(void);
|
||||
extern void fst_i_(void);
|
||||
|
@ -108,6 +116,10 @@ extern void fcompp(void);
|
|||
extern void fucom_(void);
|
||||
extern void fucomp(void);
|
||||
extern void fucompp(void);
|
||||
extern void fcomi_(void);
|
||||
extern void fcomip(void);
|
||||
extern void fucomi_(void);
|
||||
extern void fucomip(void);
|
||||
/* reg_constant.c */
|
||||
extern void fconst(void);
|
||||
/* reg_ld_str.c */
|
||||
|
|
|
@ -33,11 +33,12 @@
|
|||
|
||||
#define pop_0() { FPU_settag0(TAG_Empty); top++; }
|
||||
|
||||
/* index is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */
|
||||
static u_char const type_table[32] = {
|
||||
_PUSH_, _PUSH_, _PUSH_, _PUSH_,
|
||||
_null_, _null_, _null_, _null_,
|
||||
_REG0_, _REG0_, _REG0_, _REG0_,
|
||||
_REG0_, _REG0_, _REG0_, _REG0_,
|
||||
_PUSH_, _PUSH_, _PUSH_, _PUSH_, /* /0: d9:fld f32, db:fild m32, dd:fld f64, df:fild m16 */
|
||||
_null_, _REG0_, _REG0_, _REG0_, /* /1: d9:undef, db,dd,df:fisttp m32/64/16 */
|
||||
_REG0_, _REG0_, _REG0_, _REG0_, /* /2: d9:fst f32, db:fist m32, dd:fst f64, df:fist m16 */
|
||||
_REG0_, _REG0_, _REG0_, _REG0_, /* /3: d9:fstp f32, db:fistp m32, dd:fstp f64, df:fistp m16 */
|
||||
_NONE_, _null_, _NONE_, _PUSH_,
|
||||
_NONE_, _PUSH_, _null_, _PUSH_,
|
||||
_NONE_, _null_, _NONE_, _REG0_,
|
||||
|
@ -45,15 +46,19 @@ static u_char const type_table[32] = {
|
|||
};
|
||||
|
||||
u_char const data_sizes_16[32] = {
|
||||
4, 4, 8, 2, 0, 0, 0, 0,
|
||||
4, 4, 8, 2, 4, 4, 8, 2,
|
||||
4, 4, 8, 2,
|
||||
0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */
|
||||
4, 4, 8, 2,
|
||||
4, 4, 8, 2,
|
||||
14, 0, 94, 10, 2, 10, 0, 8,
|
||||
14, 0, 94, 10, 2, 10, 2, 8
|
||||
};
|
||||
|
||||
static u_char const data_sizes_32[32] = {
|
||||
4, 4, 8, 2, 0, 0, 0, 0,
|
||||
4, 4, 8, 2, 4, 4, 8, 2,
|
||||
4, 4, 8, 2,
|
||||
0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */
|
||||
4, 4, 8, 2,
|
||||
4, 4, 8, 2,
|
||||
28, 0, 108, 10, 2, 10, 0, 8,
|
||||
28, 0, 108, 10, 2, 10, 2, 8
|
||||
};
|
||||
|
@ -65,6 +70,7 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
|
|||
FPU_REG *st0_ptr;
|
||||
u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */
|
||||
u_char loaded_tag;
|
||||
int sv_cw;
|
||||
|
||||
st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
|
||||
|
||||
|
@ -111,7 +117,8 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
|
|||
}
|
||||
|
||||
switch (type) {
|
||||
case 000: /* fld m32real */
|
||||
/* type is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */
|
||||
case 000: /* fld m32real (d9 /0) */
|
||||
clear_C1();
|
||||
loaded_tag =
|
||||
FPU_load_single((float __user *)data_address, &loaded_data);
|
||||
|
@ -123,13 +130,13 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
|
|||
}
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 001: /* fild m32int */
|
||||
case 001: /* fild m32int (db /0) */
|
||||
clear_C1();
|
||||
loaded_tag =
|
||||
FPU_load_int32((long __user *)data_address, &loaded_data);
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 002: /* fld m64real */
|
||||
case 002: /* fld m64real (dd /0) */
|
||||
clear_C1();
|
||||
loaded_tag =
|
||||
FPU_load_double((double __user *)data_address,
|
||||
|
@ -142,12 +149,44 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
|
|||
}
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 003: /* fild m16int */
|
||||
case 003: /* fild m16int (df /0) */
|
||||
clear_C1();
|
||||
loaded_tag =
|
||||
FPU_load_int16((short __user *)data_address, &loaded_data);
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
/* case 004: undefined (d9 /1) */
|
||||
/* fisttp are enabled if CPUID(1).ECX(0) "sse3" is set */
|
||||
case 005: /* fisttp m32int (db /1) */
|
||||
clear_C1();
|
||||
sv_cw = control_word;
|
||||
control_word |= RC_CHOP;
|
||||
if (FPU_store_int32
|
||||
(st0_ptr, st0_tag, (long __user *)data_address))
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
control_word = sv_cw;
|
||||
break;
|
||||
case 006: /* fisttp m64int (dd /1) */
|
||||
clear_C1();
|
||||
sv_cw = control_word;
|
||||
control_word |= RC_CHOP;
|
||||
if (FPU_store_int64
|
||||
(st0_ptr, st0_tag, (long long __user *)data_address))
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
control_word = sv_cw;
|
||||
break;
|
||||
case 007: /* fisttp m16int (df /1) */
|
||||
clear_C1();
|
||||
sv_cw = control_word;
|
||||
control_word |= RC_CHOP;
|
||||
if (FPU_store_int16
|
||||
(st0_ptr, st0_tag, (short __user *)data_address))
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
control_word = sv_cw;
|
||||
break;
|
||||
case 010: /* fst m32real */
|
||||
clear_C1();
|
||||
FPU_store_single(st0_ptr, st0_tag,
|
||||
|
|
|
@ -249,6 +249,54 @@ static int compare_st_st(int nr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int compare_i_st_st(int nr)
|
||||
{
|
||||
int f, c;
|
||||
FPU_REG *st_ptr;
|
||||
|
||||
if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
|
||||
FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
|
||||
/* Stack fault */
|
||||
EXCEPTION(EX_StackUnder);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
|
||||
partial_status &= ~SW_C0;
|
||||
st_ptr = &st(nr);
|
||||
c = compare(st_ptr, FPU_gettagi(nr));
|
||||
if (c & COMP_NaN) {
|
||||
FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
|
||||
EXCEPTION(EX_Invalid);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
|
||||
switch (c & 7) {
|
||||
case COMP_A_lt_B:
|
||||
f = X86_EFLAGS_CF;
|
||||
break;
|
||||
case COMP_A_eq_B:
|
||||
f = X86_EFLAGS_ZF;
|
||||
break;
|
||||
case COMP_A_gt_B:
|
||||
f = 0;
|
||||
break;
|
||||
case COMP_No_Comp:
|
||||
f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
|
||||
break;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL | 0x122);
|
||||
f = 0;
|
||||
break;
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
|
||||
if (c & COMP_Denormal) {
|
||||
return denormal_operand() < 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compare_u_st_st(int nr)
|
||||
{
|
||||
int f = 0, c;
|
||||
|
@ -299,6 +347,58 @@ static int compare_u_st_st(int nr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int compare_ui_st_st(int nr)
|
||||
{
|
||||
int f = 0, c;
|
||||
FPU_REG *st_ptr;
|
||||
|
||||
if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
|
||||
FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
|
||||
/* Stack fault */
|
||||
EXCEPTION(EX_StackUnder);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
|
||||
partial_status &= ~SW_C0;
|
||||
st_ptr = &st(nr);
|
||||
c = compare(st_ptr, FPU_gettagi(nr));
|
||||
if (c & COMP_NaN) {
|
||||
FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
|
||||
if (c & COMP_SNaN) { /* This is the only difference between
|
||||
un-ordered and ordinary comparisons */
|
||||
EXCEPTION(EX_Invalid);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (c & 7) {
|
||||
case COMP_A_lt_B:
|
||||
f = X86_EFLAGS_CF;
|
||||
break;
|
||||
case COMP_A_eq_B:
|
||||
f = X86_EFLAGS_ZF;
|
||||
break;
|
||||
case COMP_A_gt_B:
|
||||
f = 0;
|
||||
break;
|
||||
case COMP_No_Comp:
|
||||
f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
|
||||
break;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL | 0x123);
|
||||
f = 0;
|
||||
break;
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
|
||||
if (c & COMP_Denormal) {
|
||||
return denormal_operand() < 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
void fcom_st(void)
|
||||
|
@ -348,3 +448,31 @@ void fucompp(void)
|
|||
} else
|
||||
FPU_illegal();
|
||||
}
|
||||
|
||||
/* P6+ compare-to-EFLAGS ops */
|
||||
|
||||
void fcomi_(void)
|
||||
{
|
||||
/* fcomi st(i) */
|
||||
compare_i_st_st(FPU_rm);
|
||||
}
|
||||
|
||||
void fcomip(void)
|
||||
{
|
||||
/* fcomip st(i) */
|
||||
if (!compare_i_st_st(FPU_rm))
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
void fucomi_(void)
|
||||
{
|
||||
/* fucomi st(i) */
|
||||
compare_ui_st_st(FPU_rm);
|
||||
}
|
||||
|
||||
void fucomip(void)
|
||||
{
|
||||
/* fucomip st(i) */
|
||||
if (!compare_ui_st_st(FPU_rm))
|
||||
FPU_pop();
|
||||
}
|
||||
|
|
|
@ -237,7 +237,8 @@ bad_opcode:
|
|||
*/
|
||||
siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
|
||||
{
|
||||
const struct bndreg *bndregs, *bndreg;
|
||||
const struct mpx_bndreg_state *bndregs;
|
||||
const struct mpx_bndreg *bndreg;
|
||||
siginfo_t *info = NULL;
|
||||
struct insn insn;
|
||||
uint8_t bndregno;
|
||||
|
@ -258,13 +259,13 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
|
|||
goto err_out;
|
||||
}
|
||||
/* get bndregs field from current task's xsave area */
|
||||
bndregs = get_xsave_field_ptr(XSTATE_BNDREGS);
|
||||
bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
|
||||
if (!bndregs) {
|
||||
err = -EINVAL;
|
||||
goto err_out;
|
||||
}
|
||||
/* now go select the individual register in the set of 4 */
|
||||
bndreg = &bndregs[bndregno];
|
||||
bndreg = &bndregs->bndreg[bndregno];
|
||||
|
||||
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
|
@ -306,7 +307,7 @@ err_out:
|
|||
|
||||
static __user void *mpx_get_bounds_dir(void)
|
||||
{
|
||||
const struct bndcsr *bndcsr;
|
||||
const struct mpx_bndcsr *bndcsr;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_MPX))
|
||||
return MPX_INVALID_BOUNDS_DIR;
|
||||
|
@ -315,7 +316,7 @@ static __user void *mpx_get_bounds_dir(void)
|
|||
* The bounds directory pointer is stored in a register
|
||||
* only accessible if we first do an xsave.
|
||||
*/
|
||||
bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
|
||||
bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
|
||||
if (!bndcsr)
|
||||
return MPX_INVALID_BOUNDS_DIR;
|
||||
|
||||
|
@ -489,10 +490,10 @@ out_unmap:
|
|||
static int do_mpx_bt_fault(void)
|
||||
{
|
||||
unsigned long bd_entry, bd_base;
|
||||
const struct bndcsr *bndcsr;
|
||||
const struct mpx_bndcsr *bndcsr;
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
|
||||
bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
|
||||
if (!bndcsr)
|
||||
return -EINVAL;
|
||||
/*
|
||||
|
|
|
@ -5,7 +5,8 @@ include ../lib.mk
|
|||
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
||||
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP
|
||||
|
||||
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
|
||||
BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
|
||||
|
@ -35,7 +36,7 @@ clean:
|
|||
$(RM) $(BINARIES_32) $(BINARIES_64)
|
||||
|
||||
$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
|
||||
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
|
||||
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
|
||||
|
||||
$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
|
||||
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
#undef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#undef __USE_GNU
|
||||
#define __USE_GNU 1
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#define TEST(insn) \
|
||||
long double __attribute__((noinline)) insn(long flags) \
|
||||
{ \
|
||||
long double out; \
|
||||
asm ("\n" \
|
||||
" push %1""\n" \
|
||||
" popf""\n" \
|
||||
" fldpi""\n" \
|
||||
" fld1""\n" \
|
||||
" " #insn " %%st(1), %%st" "\n" \
|
||||
" ffree %%st(1)" "\n" \
|
||||
: "=t" (out) \
|
||||
: "r" (flags) \
|
||||
); \
|
||||
return out; \
|
||||
}
|
||||
|
||||
TEST(fcmovb)
|
||||
TEST(fcmove)
|
||||
TEST(fcmovbe)
|
||||
TEST(fcmovu)
|
||||
TEST(fcmovnb)
|
||||
TEST(fcmovne)
|
||||
TEST(fcmovnbe)
|
||||
TEST(fcmovnu)
|
||||
|
||||
enum {
|
||||
CF = 1 << 0,
|
||||
PF = 1 << 2,
|
||||
ZF = 1 << 6,
|
||||
};
|
||||
|
||||
void sighandler(int sig)
|
||||
{
|
||||
printf("[FAIL]\tGot signal %d, exiting\n", sig);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv, char **envp)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* SIGILL triggers on 32-bit kernels w/o fcomi emulation
|
||||
* when run with "no387 nofxsr". Other signals are caught
|
||||
* just in case.
|
||||
*/
|
||||
signal(SIGILL, sighandler);
|
||||
signal(SIGFPE, sighandler);
|
||||
signal(SIGSEGV, sighandler);
|
||||
|
||||
printf("[RUN]\tTesting fcmovCC instructions\n");
|
||||
/* If fcmovCC() returns 1.0, the move wasn't done */
|
||||
err |= !(fcmovb(0) == 1.0); err |= !(fcmovnb(0) != 1.0);
|
||||
err |= !(fcmove(0) == 1.0); err |= !(fcmovne(0) != 1.0);
|
||||
err |= !(fcmovbe(0) == 1.0); err |= !(fcmovnbe(0) != 1.0);
|
||||
err |= !(fcmovu(0) == 1.0); err |= !(fcmovnu(0) != 1.0);
|
||||
|
||||
err |= !(fcmovb(CF) != 1.0); err |= !(fcmovnb(CF) == 1.0);
|
||||
err |= !(fcmove(CF) == 1.0); err |= !(fcmovne(CF) != 1.0);
|
||||
err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0);
|
||||
err |= !(fcmovu(CF) == 1.0); err |= !(fcmovnu(CF) != 1.0);
|
||||
|
||||
err |= !(fcmovb(ZF) == 1.0); err |= !(fcmovnb(ZF) != 1.0);
|
||||
err |= !(fcmove(ZF) != 1.0); err |= !(fcmovne(ZF) == 1.0);
|
||||
err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0);
|
||||
err |= !(fcmovu(ZF) == 1.0); err |= !(fcmovnu(ZF) != 1.0);
|
||||
|
||||
err |= !(fcmovb(PF) == 1.0); err |= !(fcmovnb(PF) != 1.0);
|
||||
err |= !(fcmove(PF) == 1.0); err |= !(fcmovne(PF) != 1.0);
|
||||
err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0);
|
||||
err |= !(fcmovu(PF) != 1.0); err |= !(fcmovnu(PF) == 1.0);
|
||||
|
||||
if (!err)
|
||||
printf("[OK]\tfcmovCC\n");
|
||||
else
|
||||
printf("[FAIL]\tfcmovCC errors: %d\n", err);
|
||||
|
||||
return err;
|
||||
}
|
|
@ -0,0 +1,331 @@
|
|||
#undef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#undef __USE_GNU
|
||||
#define __USE_GNU 1
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fenv.h>
|
||||
|
||||
enum {
|
||||
CF = 1 << 0,
|
||||
PF = 1 << 2,
|
||||
ZF = 1 << 6,
|
||||
ARITH = CF | PF | ZF,
|
||||
};
|
||||
|
||||
long res_fcomi_pi_1;
|
||||
long res_fcomi_1_pi;
|
||||
long res_fcomi_1_1;
|
||||
long res_fcomi_nan_1;
|
||||
/* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */
|
||||
/* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */
|
||||
int snan = 0x7fc11111;
|
||||
int qnan = 0x7f811111;
|
||||
unsigned short snan1[5];
|
||||
/* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */
|
||||
unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff };
|
||||
|
||||
int test(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" fld1""\n"
|
||||
" fldpi""\n"
|
||||
" fcomi %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" ffree %%st(1)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_1_pi""\n"
|
||||
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" fldpi""\n"
|
||||
" fld1""\n"
|
||||
" fcomi %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" ffree %%st(1)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_pi_1""\n"
|
||||
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" fld1""\n"
|
||||
" fld1""\n"
|
||||
" fcomi %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" ffree %%st(1)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_1_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_1_pi & ARITH) != (0)) {
|
||||
printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if ((res_fcomi_pi_1 & ARITH) != (CF)) {
|
||||
printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
|
||||
return 1;
|
||||
}
|
||||
if ((res_fcomi_1_1 & ARITH) != (ZF)) {
|
||||
printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if (fetestexcept(FE_INVALID) != 0) {
|
||||
printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_qnan(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" flds qnan""\n"
|
||||
" fld1""\n"
|
||||
" fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
|
||||
" fcomi %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" ffree %%st(1)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_nan_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
|
||||
printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if (fetestexcept(FE_INVALID) != FE_INVALID) {
|
||||
printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testu_qnan(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" flds qnan""\n"
|
||||
" fld1""\n"
|
||||
" fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
|
||||
" fucomi %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" ffree %%st(1)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_nan_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
|
||||
printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if (fetestexcept(FE_INVALID) != 0) {
|
||||
printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testu_snan(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
// " flds snan""\n" // WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register!
|
||||
// " fstpt snan1""\n" // if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111!
|
||||
// " fnclex""\n" // flds of a snan raised FE_INVALID, clear it
|
||||
" fldt snan80""\n" // fldt never raise FE_INVALID
|
||||
" fld1""\n"
|
||||
" fucomi %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" ffree %%st(1)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_nan_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
|
||||
printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
// printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]);
|
||||
if (fetestexcept(FE_INVALID) != FE_INVALID) {
|
||||
printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testp(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" fld1""\n"
|
||||
" fldpi""\n"
|
||||
" fcomip %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_1_pi""\n"
|
||||
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" fldpi""\n"
|
||||
" fld1""\n"
|
||||
" fcomip %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_pi_1""\n"
|
||||
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" fld1""\n"
|
||||
" fld1""\n"
|
||||
" fcomip %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_1_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_1_pi & ARITH) != (0)) {
|
||||
printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if ((res_fcomi_pi_1 & ARITH) != (CF)) {
|
||||
printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
|
||||
return 1;
|
||||
}
|
||||
if ((res_fcomi_1_1 & ARITH) != (ZF)) {
|
||||
printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if (fetestexcept(FE_INVALID) != 0) {
|
||||
printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testp_qnan(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" flds qnan""\n"
|
||||
" fld1""\n"
|
||||
" fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
|
||||
" fcomip %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_nan_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
|
||||
printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if (fetestexcept(FE_INVALID) != FE_INVALID) {
|
||||
printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testup_qnan(long flags)
|
||||
{
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
|
||||
asm ("\n"
|
||||
" push %0""\n"
|
||||
" popf""\n"
|
||||
" flds qnan""\n"
|
||||
" fld1""\n"
|
||||
" fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
|
||||
" fucomip %%st(1), %%st" "\n"
|
||||
" ffree %%st(0)" "\n"
|
||||
" pushf""\n"
|
||||
" pop res_fcomi_nan_1""\n"
|
||||
:
|
||||
: "r" (flags)
|
||||
);
|
||||
if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
|
||||
printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
|
||||
return 1;
|
||||
}
|
||||
if (fetestexcept(FE_INVALID) != 0) {
|
||||
printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sighandler(int sig)
|
||||
{
|
||||
printf("[FAIL]\tGot signal %d, exiting\n", sig);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv, char **envp)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* SIGILL triggers on 32-bit kernels w/o fcomi emulation
|
||||
* when run with "no387 nofxsr". Other signals are caught
|
||||
* just in case.
|
||||
*/
|
||||
signal(SIGILL, sighandler);
|
||||
signal(SIGFPE, sighandler);
|
||||
signal(SIGSEGV, sighandler);
|
||||
|
||||
printf("[RUN]\tTesting f[u]comi[p] instructions\n");
|
||||
err |= test(0);
|
||||
err |= test_qnan(0);
|
||||
err |= testu_qnan(0);
|
||||
err |= testu_snan(0);
|
||||
err |= test(CF|ZF|PF);
|
||||
err |= test_qnan(CF|ZF|PF);
|
||||
err |= testu_qnan(CF|ZF|PF);
|
||||
err |= testu_snan(CF|ZF|PF);
|
||||
err |= testp(0);
|
||||
err |= testp_qnan(0);
|
||||
err |= testup_qnan(0);
|
||||
err |= testp(CF|ZF|PF);
|
||||
err |= testp_qnan(CF|ZF|PF);
|
||||
err |= testup_qnan(CF|ZF|PF);
|
||||
if (!err)
|
||||
printf("[OK]\tf[u]comi[p]\n");
|
||||
else
|
||||
printf("[FAIL]\tf[u]comi[p] errors: %d\n", err);
|
||||
|
||||
return err;
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
#undef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#undef __USE_GNU
|
||||
#define __USE_GNU 1
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fenv.h>
|
||||
|
||||
unsigned long long res64 = -1;
|
||||
unsigned int res32 = -1;
|
||||
unsigned short res16 = -1;
|
||||
|
||||
int test(void)
|
||||
{
|
||||
int ex;
|
||||
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
asm volatile ("\n"
|
||||
" fld1""\n"
|
||||
" fisttp res16""\n"
|
||||
" fld1""\n"
|
||||
" fisttpl res32""\n"
|
||||
" fld1""\n"
|
||||
" fisttpll res64""\n"
|
||||
: : : "memory"
|
||||
);
|
||||
if (res16 != 1 || res32 != 1 || res64 != 1) {
|
||||
printf("[BAD]\tfisttp 1\n");
|
||||
return 1;
|
||||
}
|
||||
ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
if (ex != 0) {
|
||||
printf("[BAD]\tfisttp 1: wrong exception state\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
asm volatile ("\n"
|
||||
" fldpi""\n"
|
||||
" fisttp res16""\n"
|
||||
" fldpi""\n"
|
||||
" fisttpl res32""\n"
|
||||
" fldpi""\n"
|
||||
" fisttpll res64""\n"
|
||||
: : : "memory"
|
||||
);
|
||||
if (res16 != 3 || res32 != 3 || res64 != 3) {
|
||||
printf("[BAD]\tfisttp pi\n");
|
||||
return 1;
|
||||
}
|
||||
ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
if (ex != FE_INEXACT) {
|
||||
printf("[BAD]\tfisttp pi: wrong exception state\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
asm volatile ("\n"
|
||||
" fldpi""\n"
|
||||
" fchs""\n"
|
||||
" fisttp res16""\n"
|
||||
" fldpi""\n"
|
||||
" fchs""\n"
|
||||
" fisttpl res32""\n"
|
||||
" fldpi""\n"
|
||||
" fchs""\n"
|
||||
" fisttpll res64""\n"
|
||||
: : : "memory"
|
||||
);
|
||||
if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) {
|
||||
printf("[BAD]\tfisttp -pi\n");
|
||||
return 1;
|
||||
}
|
||||
ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
if (ex != FE_INEXACT) {
|
||||
printf("[BAD]\tfisttp -pi: wrong exception state\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
asm volatile ("\n"
|
||||
" fldln2""\n"
|
||||
" fisttp res16""\n"
|
||||
" fldln2""\n"
|
||||
" fisttpl res32""\n"
|
||||
" fldln2""\n"
|
||||
" fisttpll res64""\n"
|
||||
: : : "memory"
|
||||
);
|
||||
/* Test truncation to zero (round-to-nearest would give 1 here) */
|
||||
if (res16 != 0 || res32 != 0 || res64 != 0) {
|
||||
printf("[BAD]\tfisttp ln2\n");
|
||||
return 1;
|
||||
}
|
||||
ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
|
||||
if (ex != FE_INEXACT) {
|
||||
printf("[BAD]\tfisttp ln2: wrong exception state\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sighandler(int sig)
|
||||
{
|
||||
printf("[FAIL]\tGot signal %d, exiting\n", sig);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv, char **envp)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* SIGILL triggers on 32-bit kernels w/o fisttp emulation
|
||||
* when run with "no387 nofxsr". Other signals are caught
|
||||
* just in case.
|
||||
*/
|
||||
signal(SIGILL, sighandler);
|
||||
signal(SIGFPE, sighandler);
|
||||
signal(SIGSEGV, sighandler);
|
||||
|
||||
printf("[RUN]\tTesting fisttp instructions\n");
|
||||
err |= test();
|
||||
if (!err)
|
||||
printf("[OK]\tfisttp\n");
|
||||
else
|
||||
printf("[FAIL]\tfisttp errors: %d\n", err);
|
||||
|
||||
return err;
|
||||
}
|
Loading…
Reference in New Issue