forked from OSchip/llvm-project
[X86] Model MXCSR for AVX instructions other than AVX512
Summary: Model MXCSR for AVX instructions other than AVX512 Reviewers: craig.topper, RKSimon Subscribers: hiraditya, llvm-commits, LuoYuanke, LiuChen3 Tags: #llvm Differential Revision: https://reviews.llvm.org/D70875
This commit is contained in:
parent
f61099af9e
commit
cf81714a7e
|
@ -95,7 +95,8 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
|||
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
|
||||
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1,
|
||||
Uses = [MXCSR], mayRaiseFPException = 1 in
|
||||
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpcodeStr, string PackTy, string Suff,
|
||||
PatFrag MemFrag128, PatFrag MemFrag256,
|
||||
|
@ -237,7 +238,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
|
||||
hasSideEffects = 0 in
|
||||
hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in
|
||||
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpStr, string PackTy, string Suff,
|
||||
SDNode OpNode, RegisterClass RC,
|
||||
|
@ -263,7 +264,8 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
|||
// the lowest element of the FMA*_Int instruction. Even though such analysis
|
||||
// may be not implemented yet we allow the routines doing the actual commute
|
||||
// transformation to decide if one or another instruction is commutable or not.
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0,
|
||||
Uses = [MXCSR], mayRaiseFPException = 1 in
|
||||
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Operand memopr, RegisterClass RC,
|
||||
X86FoldableSchedWrite sched> {
|
||||
|
@ -384,6 +386,7 @@ defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR6
|
|||
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Uses = [MXCSR], mayRaiseFPException = 1 in
|
||||
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
|
||||
PatFrag mem_frag, X86FoldableSchedWrite sched> {
|
||||
|
@ -425,7 +428,8 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
|||
|
||||
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
|
||||
ValueType VT, X86FoldableSchedWrite sched> {
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0,
|
||||
Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
|
@ -458,6 +462,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
|||
} // isCodeGenOnly = 1
|
||||
}
|
||||
|
||||
let Uses = [MXCSR], mayRaiseFPException = 1 in
|
||||
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT128, ValueType OpVT256,
|
||||
PatFrag ld_frag128, PatFrag ld_frag256,
|
||||
|
|
|
@ -5542,7 +5542,7 @@ let ExeDomain = SSEPackedDouble in {
|
|||
|
||||
// FP round - roundss, roundps, roundsd, roundpd
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
// Intrinsic form
|
||||
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
|
||||
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
|
||||
|
@ -5552,7 +5552,7 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
|
||||
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
|
@ -5564,9 +5564,9 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
let Predicates = [UseAVX] in {
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
|
||||
v4f32, v2f64, X86RndScales, 0>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG;
|
||||
VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG;
|
||||
VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
|
||||
}
|
||||
|
||||
let Predicates = [UseAVX] in {
|
||||
|
@ -7326,12 +7326,12 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
|
|||
}
|
||||
|
||||
let Predicates = [HasF16C, NoVLX] in {
|
||||
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
|
||||
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
|
||||
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
|
||||
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
|
||||
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
|
||||
WriteCvtPS2PHSt>;
|
||||
WriteCvtPS2PHSt>, SIMD_EXC;
|
||||
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
|
||||
WriteCvtPS2PHYSt>, VEX_L;
|
||||
WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
|
||||
|
||||
// Pattern match vcvtph2ps of a scalar i64 load.
|
||||
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=x86-64 -mattr=+mmx -stop-after finalize-isel -o - %s | FileCheck %s
|
||||
; RUN: llc -march=x86-64 -mattr=+mmx,+fma,+f16c -stop-after finalize-isel -o - %s | FileCheck %s
|
||||
; This test ensures that the MXCSR is implicitly used by MMX FP instructions.
|
||||
|
||||
define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
|
||||
|
@ -15,8 +15,31 @@ define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
|
|||
ret x86_mmx %5
|
||||
}
|
||||
|
||||
define half @mxcsr_f16c(float %a) {
|
||||
; CHECK: VCVTPS2PH{{.*}}mxcsr
|
||||
; CHECK: VCVTPH2PS{{.*}}mxcsr
|
||||
%res = fptrunc float %a to half
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define <4 x float> @mxcsr_fma_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK: VFMADD{{.*}}mxcsr
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float>
|
||||
%a)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @mxcsr_fma_ps(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK: VFMADD{{.*}}mxcsr
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float>
|
||||
%a)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>)
|
||||
declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx)
|
||||
declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>)
|
||||
declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
|
||||
declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>)
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
|
|
@ -6,5 +6,4 @@ CHECK-NEXT: key:
|
|||
CHECK-NEXT: instructions:
|
||||
CHECK-NEXT: VFMADDSS4rm
|
||||
CHECK: register_initial_values:
|
||||
# FIXME: This will be changed to CHECK by the following patch that modeling MXCSR to VFMADDSS.
|
||||
CHECK-NOT: MXCSR
|
||||
CHECK: MXCSR
|
||||
|
|
Loading…
Reference in New Issue