forked from OSchip/llvm-project
[X86] Split WriteCvtF2F into F32->F64 and F64->F32 scheduler classes
BtVer2 - Fixes schedules for (V)CVTPS2PD instructions A lot of the Intel models still have too many InstRW overrides for these new classes - this needs cleaning up but I wanted to get the classes in first llvm-svn: 332376
This commit is contained in:
parent
3c35290c58
commit
be9a206883
|
@ -7183,10 +7183,10 @@ multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNo
|
|||
}
|
||||
}
|
||||
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
|
||||
X86froundRnd, WriteCvtF2F, f64x_info,
|
||||
X86froundRnd, WriteCvtSD2SS, f64x_info,
|
||||
f32x_info>, NotMemoryFoldable;
|
||||
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
|
||||
X86fpextRnd, WriteCvtF2F, f32x_info,
|
||||
X86fpextRnd, WriteCvtSS2SD, f32x_info,
|
||||
f64x_info>, NotMemoryFoldable;
|
||||
|
||||
def : Pat<(f64 (fpextend FR32X:$src)),
|
||||
|
@ -7277,33 +7277,33 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
|||
|
||||
// Extend Float to Double
|
||||
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched> {
|
||||
X86SchedWriteWidths sched> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
|
||||
fpextend, sched>,
|
||||
fpextend, sched.ZMM>,
|
||||
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
|
||||
X86vfpextRnd, sched>, EVEX_V512;
|
||||
X86vfpextRnd, sched.ZMM>, EVEX_V512;
|
||||
}
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
|
||||
X86vfpext, sched, "{1to2}", "", f64mem>, EVEX_V128;
|
||||
X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
|
||||
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
|
||||
sched>, EVEX_V256;
|
||||
sched.YMM>, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
// Truncate Double to Float
|
||||
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched> {
|
||||
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched>,
|
||||
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
|
||||
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
|
||||
X86vfproundRnd, sched>, EVEX_V512;
|
||||
X86vfproundRnd, sched.ZMM>, EVEX_V512;
|
||||
}
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
|
||||
X86vfpround, sched, "{1to2}", "{x}">, EVEX_V128;
|
||||
X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
|
||||
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
|
||||
sched, "{1to4}", "{y}">, EVEX_V256;
|
||||
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
|
||||
|
||||
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
|
||||
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
|
||||
|
@ -7316,9 +7316,9 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite
|
|||
}
|
||||
}
|
||||
|
||||
defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", WriteCvtF2F>,
|
||||
defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
|
||||
VEX_W, PD, EVEX_CD8<64, CD8VF>;
|
||||
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", WriteCvtF2F>,
|
||||
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
|
||||
PS, EVEX_CD8<32, CD8VH>;
|
||||
|
||||
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
|
||||
|
@ -7864,7 +7864,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
|||
let Predicates = [HasAVX512] in
|
||||
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
|
||||
WriteCvtPH2PSY>,
|
||||
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>,
|
||||
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSY>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
|
|
|
@ -1241,16 +1241,16 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
|
|||
// Convert scalar double to scalar single
|
||||
let hasSideEffects = 0, Predicates = [UseAVX] in {
|
||||
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
|
||||
(ins FR32:$src1, FR64:$src2),
|
||||
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
|
||||
(ins FR32:$src1, FR64:$src2),
|
||||
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
Sched<[WriteCvtSD2SS]>;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
|
||||
(ins FR32:$src1, f64mem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XD, VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
|
||||
(ins FR32:$src1, f64mem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
def : Pat<(f32 (fpround FR64:$src)),
|
||||
|
@ -1260,12 +1260,12 @@ def : Pat<(f32 (fpround FR64:$src)),
|
|||
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (fpround FR64:$src))]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
Sched<[WriteCvtSD2SS]>;
|
||||
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
|
||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
|
||||
XD,
|
||||
Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
|
||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
|
||||
XD, Requires<[UseSSE2, OptForSize]>,
|
||||
Sched<[WriteCvtSD2SS.Folded]>;
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
|
||||
|
@ -1273,30 +1273,29 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
|
|||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
|
||||
XD, VEX_4V, VEX_WIG,
|
||||
Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
|
||||
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtSD2SS]>;
|
||||
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
|
||||
VR128:$src1, sse_load_f64:$src2))]>,
|
||||
XD, VEX_4V, VEX_WIG,
|
||||
Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
|
||||
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
|
||||
XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
|
||||
XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
|
||||
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
|
||||
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
|
||||
VR128:$src1, sse_load_f64:$src2))]>,
|
||||
XD, Requires<[UseSSE2]>,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
|
||||
}
|
||||
} // isCodeGenOnly = 1
|
||||
|
||||
|
@ -1306,14 +1305,14 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
|
|||
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
|
||||
(ins FR64:$src1, FR32:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XS, VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
Sched<[WriteCvtSS2SD]>;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
|
||||
(ins FR64:$src1, f32mem:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||
XS, VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
|
||||
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
def : Pat<(f64 (fpextend FR32:$src)),
|
||||
|
@ -1331,11 +1330,12 @@ def : Pat<(extloadf32 addr:$src),
|
|||
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
|
||||
"cvtss2sd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (fpextend FR32:$src))]>,
|
||||
XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
|
||||
XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
|
||||
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
|
||||
"cvtss2sd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (extloadf32 addr:$src))]>,
|
||||
XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
|
||||
XS, Requires<[UseSSE2, OptForSize]>,
|
||||
Sched<[WriteCvtSS2SD.Folded]>;
|
||||
|
||||
// extload f32 -> f64. This matches load+fpextend because we have a hack in
|
||||
// the isel (PreprocessForFPConvert) that can introduce loads after dag
|
||||
|
@ -1352,25 +1352,25 @@ def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
|
|||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[]>, XS, VEX_4V, VEX_WIG,
|
||||
Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
|
||||
Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[]>, XS, VEX_4V, VEX_WIG,
|
||||
Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
[]>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
|
||||
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
|
||||
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
|
||||
[]>, XS, Requires<[UseSSE2]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
Sched<[WriteCvtSS2SD]>;
|
||||
let mayLoad = 1 in
|
||||
def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
|
||||
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
|
||||
[]>, XS, Requires<[UseSSE2]>,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
|
||||
}
|
||||
} // isCodeGenOnly = 1
|
||||
|
||||
|
@ -1699,30 +1699,30 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
|
||||
PS, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
|
||||
PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
|
||||
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
|
||||
PS, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
|
||||
PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
|
||||
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
|
||||
PS, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
|
||||
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
|
||||
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
|
||||
PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
|
||||
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE2] in {
|
||||
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
|
||||
PS, Sched<[WriteCvtF2F]>;
|
||||
PS, Sched<[WriteCvtPS2PD]>;
|
||||
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"cvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
|
||||
PS, Sched<[WriteCvtF2FLd]>;
|
||||
PS, Sched<[WriteCvtPS2PD.Folded]>;
|
||||
}
|
||||
|
||||
// Convert Packed DW Integers to Packed Double FP
|
||||
|
@ -1787,7 +1787,7 @@ let Predicates = [HasAVX, NoVLX] in
|
|||
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
|
||||
VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
|
||||
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
|
||||
|
||||
// XMM only
|
||||
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
|
||||
|
@ -1796,7 +1796,7 @@ let Predicates = [HasAVX, NoVLX] in
|
|||
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
|
||||
VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
|
||||
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
|
||||
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
|
||||
(VCVTPD2PSrm VR128:$dst, f128mem:$src), 0, "intel">;
|
||||
|
||||
|
@ -1805,11 +1805,11 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (fpround VR256:$src))]>,
|
||||
VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
|
||||
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
|
||||
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
|
||||
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
|
||||
VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
|
||||
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
|
||||
}
|
||||
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
|
||||
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
|
||||
|
@ -1819,11 +1819,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
|
|||
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
Sched<[WriteCvtPD2PS]>;
|
||||
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
|
||||
Sched<[WriteCvtF2FLd]>;
|
||||
Sched<[WriteCvtPD2PS.Folded]>;
|
||||
|
||||
// AVX 256-bit register conversion intrinsics
|
||||
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
|
||||
|
|
|
@ -343,9 +343,16 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
|
|||
}
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
|
||||
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
|
||||
defm : BWWriteResPair<WriteCvtF2F, [BWPort1], 3>; // Float -> Float size conversion.
|
||||
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
|
||||
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
|
||||
|
||||
defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
|
||||
defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
|
||||
defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
|
||||
|
||||
defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
|
||||
defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
|
||||
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
|
||||
|
|
|
@ -222,9 +222,6 @@ defm : HWWriteResPair<WriteFSqrt64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28
|
|||
defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
|
||||
defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
|
||||
|
||||
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
|
||||
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
|
||||
|
@ -251,6 +248,18 @@ defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
|
|||
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
|
||||
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
|
||||
|
||||
defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
|
||||
|
||||
defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1], 3>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
|
||||
|
|
|
@ -217,9 +217,6 @@ defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
|
|||
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
|
||||
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
|
||||
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
|
||||
defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
|
||||
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
|
||||
|
@ -236,8 +233,20 @@ defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
|
|||
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
|
||||
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
|
||||
|
||||
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
|
||||
// Conversion between integer and float.
|
||||
defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
|
||||
|
||||
defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
|
||||
defm : SBWriteResPair<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
|
||||
|
||||
defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
|
||||
defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
|
||||
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
|
||||
|
||||
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
|
||||
|
@ -450,13 +459,6 @@ defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
|
|||
|
||||
// Remaining SNB instrs.
|
||||
|
||||
def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
|
||||
let Latency = 1;
|
||||
let NumMicroOps = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup0], (instregex "(V?)CVTSS2SDrr")>;
|
||||
|
||||
def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
|
||||
let Latency = 1;
|
||||
let NumMicroOps = 1;
|
||||
|
@ -555,13 +557,6 @@ def: InstRW<[SBWriteResGroup12], (instregex "(V?)COMISDrr",
|
|||
"(V?)UCOMISDrr",
|
||||
"(V?)UCOMISSrr")>;
|
||||
|
||||
def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup13], (instregex "(V?)CVTPS2PD(Y?)rr")>;
|
||||
|
||||
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -699,8 +694,6 @@ def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
|
|||
def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr",
|
||||
"MMX_CVT(T?)PD2PIirr",
|
||||
"(V?)CVTDQ2PD(Y?)rr",
|
||||
"(V?)CVTPD2PS(Y?)rr",
|
||||
"(V?)CVTSD2SSrr",
|
||||
"(V?)CVTSI(64)?2SDrr",
|
||||
"(V?)CVT(T?)PD2DQ(Y?)rr")>;
|
||||
|
||||
|
@ -903,8 +896,7 @@ def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm",
|
||||
"(V?)CVTSS2SDrm")>;
|
||||
def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm")>;
|
||||
|
||||
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
|
||||
let Latency = 7;
|
||||
|
@ -1180,8 +1172,6 @@ def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
|
|||
def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm",
|
||||
"MMX_CVT(T?)PD2PIirm",
|
||||
"(V?)CVTDQ2PD(Y?)rm",
|
||||
"(V?)CVTPD2PSrm",
|
||||
"(V?)CVTSD2SSrm",
|
||||
"(V?)CVTSI(64)?2SSrm",
|
||||
"(V?)CVT(T?)PD2DQrm")>;
|
||||
|
||||
|
@ -1212,8 +1202,7 @@ def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
|
|||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm",
|
||||
"VCVT(T?)PD2DQYrm")>;
|
||||
def: InstRW<[SBWriteResGroup107], (instregex "VCVT(T?)PD2DQYrm")>;
|
||||
|
||||
def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
|
||||
let Latency = 12;
|
||||
|
|
|
@ -338,7 +338,14 @@ def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> {
|
|||
// Conversion between integer and float.
|
||||
defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
|
||||
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
|
||||
defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
|
||||
|
||||
defm : SKLWriteResPair<WriteCvtSS2SD, [SKLPort1], 3>;
|
||||
defm : SKLWriteResPair<WriteCvtPS2PD, [SKLPort1], 3>;
|
||||
defm : SKLWriteResPair<WriteCvtPS2PDY, [SKLPort1], 3>;
|
||||
|
||||
defm : SKLWriteResPair<WriteCvtSD2SS, [SKLPort1], 3>;
|
||||
defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort1], 3>;
|
||||
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort1], 3>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
|
||||
|
|
|
@ -336,9 +336,16 @@ def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
|
|||
}
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
|
||||
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
|
||||
defm : SKXWriteResPair<WriteCvtF2F, [SKXPort1], 3>; // Float -> Float size conversion.
|
||||
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>;
|
||||
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>;
|
||||
|
||||
defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>;
|
||||
defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>;
|
||||
defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort1], 3>;
|
||||
|
||||
defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>;
|
||||
defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>;
|
||||
defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort1], 3>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
|
||||
|
|
|
@ -298,7 +298,14 @@ def WriteMMXMOVMSK : SchedWrite;
|
|||
// Conversion between integer and float.
|
||||
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
|
||||
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
|
||||
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
|
||||
|
||||
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
|
||||
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
|
||||
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM).
|
||||
|
||||
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
|
||||
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
|
||||
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM).
|
||||
|
||||
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
|
||||
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
|
||||
|
@ -451,6 +458,13 @@ def SchedWriteFVarBlend
|
|||
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
|
||||
WriteFVarBlendY, WriteFVarBlendY>;
|
||||
|
||||
def SchedWriteCvtPS2PD
|
||||
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
|
||||
WriteCvtPS2PDY, WriteCvtPS2PDY>;
|
||||
def SchedWriteCvtPD2PS
|
||||
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
|
||||
WriteCvtPD2PSY, WriteCvtPD2PSY>;
|
||||
|
||||
def SchedWriteVecALU
|
||||
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
|
||||
def SchedWritePHAdd
|
||||
|
|
|
@ -273,9 +273,16 @@ defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE
|
|||
// Conversions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
|
||||
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
|
||||
defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
|
||||
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
|
||||
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : AtomWriteResPair<WriteCvtPS2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : AtomWriteResPair<WriteCvtPD2PSY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtPH2PS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||
defm : AtomWriteResPair<WriteCvtPH2PSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||
|
@ -571,9 +578,7 @@ def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
|
|||
def : InstRW<[AtomWrite01_7], (instrs AAD8i8,
|
||||
CVTDQ2PDrr,
|
||||
CVTPD2DQrr,
|
||||
CVTPD2PSrr,
|
||||
CVTPS2DQrm,
|
||||
CVTPS2PDrr,
|
||||
CVTTPD2DQrr,
|
||||
CVTTPS2DQrm,
|
||||
MMX_CVTPD2PIirr,
|
||||
|
@ -590,8 +595,6 @@ def : InstRW<[AtomWrite01_8], (instrs LOOPE,
|
|||
FNSTCW16m,
|
||||
CVTDQ2PDrm,
|
||||
CVTPD2DQrm,
|
||||
CVTPD2PSrm,
|
||||
CVTPS2PDrm,
|
||||
CVTTPD2DQrm,
|
||||
MMX_CVTPD2PIirm,
|
||||
MMX_CVTPI2PDirm,
|
||||
|
|
|
@ -360,9 +360,16 @@ defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn
|
|||
// Conversions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
|
||||
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
|
||||
defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
|
||||
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>;
|
||||
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
|
@ -372,20 +379,6 @@ defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
|
|||
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
|
||||
|
||||
def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [1, 2];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteCVTF2F], (instregex "(V)?CVTS(D|S)2S(D|S)rr")>;
|
||||
|
||||
def JWriteCVTF2FLd : SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
|
||||
let Latency = 12;
|
||||
let ResourceCycles = [1, 1, 2];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteCVTF2FLd], (instregex "(V)?CVTS(D|S)2S(D|S)rm")>;
|
||||
|
||||
def JWriteCVTF2SI : SchedWriteRes<[JFPU1, JSTC, JFPA, JALU0]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -560,14 +553,14 @@ def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
|||
let ResourceCycles = [2, 2, 4];
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
|
||||
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>;
|
||||
|
||||
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [2, 2, 2, 4];
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
|
||||
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm)>;
|
||||
|
||||
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
|
|
|
@ -197,9 +197,6 @@ defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
|
|||
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
|
||||
|
@ -213,6 +210,18 @@ defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
|
|||
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
|
||||
|
||||
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
|
||||
|
||||
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
|
|
|
@ -227,7 +227,6 @@ defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
|
|||
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
|
||||
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
|
||||
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
|
||||
|
@ -1186,9 +1185,16 @@ def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDYrm")>;
|
|||
def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
|
||||
let Latency = 5;
|
||||
}
|
||||
|
||||
// CVTPD2PS.
|
||||
// x,x.
|
||||
def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V?)CVTPD2PSrr")>;
|
||||
def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
|
||||
|
||||
// y,y.
|
||||
def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
|
||||
|
||||
def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
|
||||
let Latency = 11;
|
||||
|
@ -1196,28 +1202,28 @@ def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
|
|||
let ResourceCycles = [1,2];
|
||||
}
|
||||
// x,m128.
|
||||
def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V?)CVTPD2PS(X?)rm")>;
|
||||
def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
|
||||
|
||||
// x,m256.
|
||||
def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
||||
let Latency = 11;
|
||||
}
|
||||
def : InstRW<[ZnWriteCVTPD2PSYLd], (instregex "(V?)CVTPD2PSYrm")>;
|
||||
def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
|
||||
|
||||
// CVTSD2SS.
|
||||
// x,x.
|
||||
// Same as WriteCVTPD2PSr
|
||||
def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V)?CVTSD2SSrr")>;
|
||||
def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
|
||||
|
||||
// x,m64.
|
||||
def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V)?CVTSD2SSrm")>;
|
||||
def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
|
||||
|
||||
// CVTPS2PD.
|
||||
// x,x.
|
||||
def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
|
||||
let Latency = 3;
|
||||
}
|
||||
def : InstRW<[ZnWriteCVTPS2PDr], (instregex "(V?)CVTPS2PDrr")>;
|
||||
def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
|
||||
|
||||
// x,m64.
|
||||
// y,m128.
|
||||
|
@ -1225,20 +1231,21 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|||
let Latency = 10;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[ZnWriteCVTPS2PDLd], (instregex "(V?)CVTPS2PD(Y?)rm")>;
|
||||
def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
|
||||
def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
|
||||
|
||||
// y,x.
|
||||
def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
|
||||
let Latency = 3;
|
||||
}
|
||||
def : InstRW<[ZnWriteVCVTPS2PDY], (instregex "VCVTPS2PDYrr")>;
|
||||
def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
|
||||
|
||||
// CVTSS2SD.
|
||||
// x,x.
|
||||
def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
def : InstRW<[ZnWriteCVTSS2SDr], (instregex "(V?)CVTSS2SDrr")>;
|
||||
def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
|
||||
|
||||
// x,m32.
|
||||
def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
||||
|
@ -1246,7 +1253,7 @@ def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
def : InstRW<[ZnWriteCVTSS2SDLd], (instregex "(V?)CVTSS2SDrm")>;
|
||||
def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
|
||||
|
||||
def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
|
||||
let Latency = 5;
|
||||
|
|
|
@ -1752,8 +1752,8 @@ define <4 x i32> @f64to4si(<4 x double> %a) {
|
|||
define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
|
||||
; GENERIC-LABEL: f64to16f32:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -1788,7 +1788,7 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -1823,7 +1823,7 @@ define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
|
|||
define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
|
||||
; GENERIC-LABEL: f32to8f64:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: f32to8f64:
|
||||
|
@ -1838,7 +1838,7 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl
|
|||
; GENERIC-LABEL: f32to4f64_mask:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: f32to4f64_mask:
|
||||
|
@ -4216,7 +4216,7 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind
|
|||
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
|
||||
; GENERIC-LABEL: fptrunc_test:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: fptrunc_test:
|
||||
|
@ -4230,7 +4230,7 @@ define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
|
|||
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
|
||||
; GENERIC-LABEL: fpext_test:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: fpext_test:
|
||||
|
|
|
@ -1818,15 +1818,15 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
|
|||
;
|
||||
; BTVER2-SSE-LABEL: test_cvtps2pd:
|
||||
; BTVER2-SSE: # %bb.0:
|
||||
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
|
||||
; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00]
|
||||
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
|
||||
; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
|
||||
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; BTVER2-LABEL: test_cvtps2pd:
|
||||
; BTVER2: # %bb.0:
|
||||
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
|
||||
; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
|
||||
; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
|
|
|
@ -1122,10 +1122,10 @@ vzeroupper
|
|||
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 8 2.00 * vcvtps2dq (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %ymm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 2 1.00 vcvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * vcvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 2 2.00 vcvtps2pd %xmm0, %ymm2
|
||||
# CHECK-NEXT: 2 7 2.00 * vcvtps2pd (%rax), %ymm2
|
||||
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %ecx
|
||||
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 2 12 1.00 * vcvtsd2si (%rax), %ecx
|
||||
|
@ -1720,7 +1720,7 @@ vzeroupper
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 48.00 2.00 - 349.50 909.50 397.00 409.00 381.00 - 43.00 122.00 118.50 118.50 38.00
|
||||
# CHECK-NEXT: 48.00 2.00 - 349.50 909.50 397.00 411.00 382.00 - 43.00 124.00 118.50 118.50 38.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
|
@ -1830,8 +1830,8 @@ vzeroupper
|
|||
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2dq (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %ymm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtps2pd %xmm0, %ymm2
|
||||
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2pd (%rax), %ymm2
|
||||
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 - - - 1.00 - - - vcvtsd2si %xmm0, %ecx
|
||||
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 - - - 1.00 - - - vcvtsd2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 1.00 - - 1.00 - - - vcvtsd2si (%rax), %ecx
|
||||
|
|
|
@ -431,8 +431,8 @@ xorpd (%rax), %xmm2
|
|||
# CHECK-NEXT: 1 8 1.00 * cvtpi2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 cvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * cvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %ecx
|
||||
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %ecx
|
||||
|
|
Loading…
Reference in New Issue