forked from OSchip/llvm-project
[X86] Remove NotMemoryFoldable from some AVX/AVX512 scalar instructions.
Some of these instructions are already in the manual folding table so we should have them in the auto table too. llvm-svn: 334725
This commit is contained in:
parent
b7788ebb4a
commit
9f829f76e8
|
@ -7337,10 +7337,10 @@ multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNo
|
|||
}
|
||||
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
|
||||
X86froundRnd, WriteCvtSD2SS, f64x_info,
|
||||
f32x_info>, NotMemoryFoldable;
|
||||
f32x_info>;
|
||||
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
|
||||
X86fpextRnd, WriteCvtSS2SD, f32x_info,
|
||||
f64x_info>, NotMemoryFoldable;
|
||||
f64x_info>;
|
||||
|
||||
def : Pat<(f64 (fpextend FR32X:$src)),
|
||||
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
|
||||
|
@ -8294,16 +8294,16 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
|
||||
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
|
||||
f32x_info>, EVEX_CD8<32, CD8VT1>,
|
||||
T8PD, NotMemoryFoldable;
|
||||
T8PD;
|
||||
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
|
||||
f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
|
||||
T8PD, NotMemoryFoldable;
|
||||
T8PD;
|
||||
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
|
||||
SchedWriteFRsqrt.Scl, f32x_info>,
|
||||
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
EVEX_CD8<32, CD8VT1>, T8PD;
|
||||
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
|
||||
SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
|
||||
EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
EVEX_CD8<64, CD8VT1>, T8PD;
|
||||
|
||||
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
|
||||
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -8596,11 +8596,10 @@ multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
|
|||
X86SchedWriteSizes sched> {
|
||||
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS",
|
||||
int_x86_sse_sqrt_ss>,
|
||||
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
|
||||
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
|
||||
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD",
|
||||
int_x86_sse2_sqrt_sd>,
|
||||
EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
|
||||
NotMemoryFoldable;
|
||||
EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
|
||||
}
|
||||
|
||||
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
|
||||
|
|
|
@ -1240,13 +1240,13 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
|
|||
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
|
||||
(ins FR32:$src1, FR64:$src2),
|
||||
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
VEX_4V, VEX_LIG, VEX_WIG,
|
||||
Sched<[WriteCvtSD2SS]>;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
|
||||
(ins FR32:$src1, f64mem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG,
|
||||
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
|
@ -1302,13 +1302,13 @@ let hasSideEffects = 0 in {
|
|||
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
|
||||
(ins FR64:$src1, FR32:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG,
|
||||
Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
|
||||
(ins FR64:$src1, f32mem:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG,
|
||||
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>,
|
||||
Requires<[UseAVX, OptForSize]>;
|
||||
}
|
||||
|
@ -2945,7 +2945,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
f32mem, ssmem, sse_load_f32,
|
||||
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
|
||||
SSEPackedSingle, sched.Scl, AVXTarget>, XS, VEX_4V,
|
||||
VEX_LIG, VEX_WIG, NotMemoryFoldable;
|
||||
VEX_LIG, VEX_WIG;
|
||||
}
|
||||
|
||||
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -2958,7 +2958,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
f64mem, sdmem, sse_load_f64,
|
||||
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
|
||||
OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
}
|
||||
|
||||
// Square root.
|
||||
|
|
Loading…
Reference in New Issue