[X86] Model MXCSR for AVX instructions other than AVX512

Summary: Model MXCSR for AVX instructions other than AVX512

Reviewers: craig.topper, RKSimon

Subscribers: hiraditya, llvm-commits, LuoYuanke, LiuChen3

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70875
This commit is contained in:
Wang, Pengfei 2019-11-27 21:09:17 +08:00
parent f61099af9e
commit cf81714a7e
4 changed files with 42 additions and 15 deletions

View File

@ -95,7 +95,8 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1,
Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy, string Suff,
PatFrag MemFrag128, PatFrag MemFrag256,
@ -237,7 +238,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
}
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
hasSideEffects = 0 in
hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
@ -263,7 +264,8 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// the lowest element of the FMA*_Int instruction. Even though such analysis
// may be not implemented yet we allow the routines doing the actual commute
// transformation to decide if one or another instruction is commutable or not.
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
Operand memopr, RegisterClass RC,
X86FoldableSchedWrite sched> {
@ -384,6 +386,7 @@ defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR6
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
PatFrag mem_frag, X86FoldableSchedWrite sched> {
@ -425,7 +428,8 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ValueType VT, X86FoldableSchedWrite sched> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
let isCodeGenOnly = 1, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@ -458,6 +462,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
} // isCodeGenOnly = 1
}
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256,

View File

@ -5542,7 +5542,7 @@ let ExeDomain = SSEPackedDouble in {
// FP round - roundss, roundps, roundsd, roundpd
let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
@ -5552,7 +5552,7 @@ let Predicates = [HasAVX, NoVLX] in {
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
@ -5564,9 +5564,9 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
VEX_4V, VEX_LIG, VEX_WIG;
VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
VEX_4V, VEX_LIG, VEX_WIG;
VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
}
let Predicates = [UseAVX] in {
@ -7326,12 +7326,12 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
}
let Predicates = [HasF16C, NoVLX] in {
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
WriteCvtPS2PHSt>;
WriteCvtPS2PHSt>, SIMD_EXC;
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
WriteCvtPS2PHYSt>, VEX_L;
WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
// Pattern match vcvtph2ps of a scalar i64 load.
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),

View File

@ -1,4 +1,4 @@
; RUN: llc -march=x86-64 -mattr=+mmx -stop-after finalize-isel -o - %s | FileCheck %s
; RUN: llc -march=x86-64 -mattr=+mmx,+fma,+f16c -stop-after finalize-isel -o - %s | FileCheck %s
; This test ensures that the MXCSR is implicitly used by MMX FP instructions.
define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
@ -15,8 +15,31 @@ define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
ret x86_mmx %5
}
define half @mxcsr_f16c(float %a) {
; CHECK: VCVTPS2PH{{.*}}mxcsr
; CHECK: VCVTPH2PS{{.*}}mxcsr
%res = fptrunc float %a to half
ret half %res
}
define <4 x float> @mxcsr_fma_ss(<4 x float> %a, <4 x float> %b) {
; CHECK: VFMADD{{.*}}mxcsr
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float>
%a)
ret <4 x float> %res
}
define <4 x float> @mxcsr_fma_ps(<4 x float> %a, <4 x float> %b) {
; CHECK: VFMADD{{.*}}mxcsr
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float>
%a)
ret <4 x float> %res
}
declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>)
declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx)
declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>)
declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>)
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)

View File

@ -6,5 +6,4 @@ CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: VFMADDSS4rm
CHECK: register_initial_values:
# FIXME: This will be changed to CHECK by the following patch that modeling MXCSR to VFMADDSS.
CHECK-NOT: MXCSR
CHECK: MXCSR