[X86] Split WriteCvtF2F into F32->F64 and F64->F32 scheduler classes

BtVer2 - Fixes schedules for (V)CVTPS2PD instructions

A lot of the Intel models still have too many InstRW overrides for these new classes - this needs cleaning up but I wanted to get the classes in first

llvm-svn: 332376
This commit is contained in:
Simon Pilgrim 2018-05-15 17:36:49 +00:00
parent 3c35290c58
commit be9a206883
16 changed files with 201 additions and 156 deletions

View File

@ -7183,10 +7183,10 @@ multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNo
}
}
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
X86froundRnd, WriteCvtF2F, f64x_info,
X86froundRnd, WriteCvtSD2SS, f64x_info,
f32x_info>, NotMemoryFoldable;
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
X86fpextRnd, WriteCvtF2F, f32x_info,
X86fpextRnd, WriteCvtSS2SD, f32x_info,
f64x_info>, NotMemoryFoldable;
def : Pat<(f64 (fpextend FR32X:$src)),
@ -7277,33 +7277,33 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
// Extend Float to Double
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched> {
X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
fpextend, sched>,
fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
X86vfpextRnd, sched>, EVEX_V512;
X86vfpextRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
X86vfpext, sched, "{1to2}", "", f64mem>, EVEX_V128;
X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
sched>, EVEX_V256;
sched.YMM>, EVEX_V256;
}
}
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched> {
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched>,
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd, sched>, EVEX_V512;
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
X86vfpround, sched, "{1to2}", "{x}">, EVEX_V128;
X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
sched, "{1to4}", "{y}">, EVEX_V256;
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
@ -7316,9 +7316,9 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite
}
}
defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", WriteCvtF2F>,
defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
VEX_W, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", WriteCvtF2F>,
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
@ -7864,7 +7864,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
WriteCvtPH2PSY>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSY>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {

View File

@ -1241,16 +1241,16 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
Sched<[WriteCvtSD2SS]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
}
def : Pat<(f32 (fpround FR64:$src)),
@ -1260,12 +1260,12 @@ def : Pat<(f32 (fpround FR64:$src)),
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround FR64:$src))]>,
Sched<[WriteCvtF2F]>;
Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
XD,
Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
XD, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSD2SS.Folded]>;
let isCodeGenOnly = 1 in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
@ -1273,30 +1273,29 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
XD, VEX_4V, VEX_WIG,
Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, VEX_4V, VEX_WIG,
Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, Requires<[UseSSE2]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
}
} // isCodeGenOnly = 1
@ -1306,14 +1305,14 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
}
def : Pat<(f64 (fpextend FR32:$src)),
@ -1331,11 +1330,12 @@ def : Pat<(extloadf32 addr:$src),
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fpextend FR32:$src))]>,
XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))]>,
XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
XS, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSS2SD.Folded]>;
// extload f32 -> f64. This matches load+fpextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@ -1352,25 +1352,25 @@ def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG,
Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG,
Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
[]>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtF2F]>;
Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
}
} // isCodeGenOnly = 1
@ -1699,30 +1699,30 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
PS, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
PS, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
PS, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
PS, Sched<[WriteCvtF2F]>;
PS, Sched<[WriteCvtPS2PD]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
PS, Sched<[WriteCvtF2FLd]>;
PS, Sched<[WriteCvtPS2PD.Folded]>;
}
// Convert Packed DW Integers to Packed Double FP
@ -1787,7 +1787,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@ -1796,7 +1796,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrm VR128:$dst, f128mem:$src), 0, "intel">;
@ -1805,11 +1805,11 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround VR256:$src))]>,
VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
}
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
@ -1819,11 +1819,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
Sched<[WriteCvtF2F]>;
Sched<[WriteCvtPD2PS]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
Sched<[WriteCvtF2FLd]>;
Sched<[WriteCvtPD2PS.Folded]>;
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below

View File

@ -343,9 +343,16 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
}
// Conversion between integer and float.
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
defm : BWWriteResPair<WriteCvtF2F, [BWPort1], 3>; // Float -> Float size conversion.
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;

View File

@ -222,9 +222,6 @@ defm : HWWriteResPair<WriteFSqrt64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28
defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
@ -251,6 +248,18 @@ defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
// Conversion between integer and float.
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;

View File

@ -217,9 +217,6 @@ defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
@ -236,8 +233,20 @@ defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
// Conversion between integer and float.
defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : SBWriteResPair<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
@ -450,13 +459,6 @@ defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
// Remaining SNB instrs.
def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
let Latency = 1;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup0], (instregex "(V?)CVTSS2SDrr")>;
def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
let Latency = 1;
let NumMicroOps = 1;
@ -555,13 +557,6 @@ def: InstRW<[SBWriteResGroup12], (instregex "(V?)COMISDrr",
"(V?)UCOMISDrr",
"(V?)UCOMISSrr")>;
def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup13], (instregex "(V?)CVTPS2PD(Y?)rr")>;
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
@ -699,8 +694,6 @@ def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr",
"MMX_CVT(T?)PD2PIirr",
"(V?)CVTDQ2PD(Y?)rr",
"(V?)CVTPD2PS(Y?)rr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI(64)?2SDrr",
"(V?)CVT(T?)PD2DQ(Y?)rr")>;
@ -903,8 +896,7 @@ def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm",
"(V?)CVTSS2SDrm")>;
def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm")>;
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
@ -1180,8 +1172,6 @@ def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm",
"MMX_CVT(T?)PD2PIirm",
"(V?)CVTDQ2PD(Y?)rm",
"(V?)CVTPD2PSrm",
"(V?)CVTSD2SSrm",
"(V?)CVTSI(64)?2SSrm",
"(V?)CVT(T?)PD2DQrm")>;
@ -1212,8 +1202,7 @@ def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm",
"VCVT(T?)PD2DQYrm")>;
def: InstRW<[SBWriteResGroup107], (instregex "VCVT(T?)PD2DQYrm")>;
def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 12;

View File

@ -338,7 +338,14 @@ def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> {
// Conversion between integer and float.
defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
defm : SKLWriteResPair<WriteCvtSS2SD, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2PD, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2PDY, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtSD2SS, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;

View File

@ -336,9 +336,16 @@ def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
}
// Conversion between integer and float.
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
defm : SKXWriteResPair<WriteCvtF2F, [SKXPort1], 3>; // Float -> Float size conversion.
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;

View File

@ -298,7 +298,14 @@ def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM).
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM).
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
@ -451,6 +458,13 @@ def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
WriteFVarBlendY, WriteFVarBlendY>;
def SchedWriteCvtPS2PD
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
WriteCvtPS2PDY, WriteCvtPS2PDY>;
def SchedWriteCvtPD2PS
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
WriteCvtPD2PSY, WriteCvtPD2PSY>;
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
def SchedWritePHAdd

View File

@ -273,9 +273,16 @@ defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE
// Conversions.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPS2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPD2PSY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPH2PS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteCvtPH2PSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
@ -571,9 +578,7 @@ def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_7], (instrs AAD8i8,
CVTDQ2PDrr,
CVTPD2DQrr,
CVTPD2PSrr,
CVTPS2DQrm,
CVTPS2PDrr,
CVTTPD2DQrr,
CVTTPS2DQrm,
MMX_CVTPD2PIirr,
@ -590,8 +595,6 @@ def : InstRW<[AtomWrite01_8], (instrs LOOPE,
FNSTCW16m,
CVTDQ2PDrm,
CVTPD2DQrm,
CVTPD2PSrm,
CVTPS2PDrm,
CVTTPD2DQrm,
MMX_CVTPD2PIirm,
MMX_CVTPI2PDirm,

View File

@ -360,9 +360,16 @@ defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn
// Conversions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>;
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>;
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
@ -372,20 +379,6 @@ defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
let Latency = 7;
let ResourceCycles = [1, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteCVTF2F], (instregex "(V)?CVTS(D|S)2S(D|S)rr")>;
def JWriteCVTF2FLd : SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let Latency = 12;
let ResourceCycles = [1, 1, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteCVTF2FLd], (instregex "(V)?CVTS(D|S)2S(D|S)rm")>;
def JWriteCVTF2SI : SchedWriteRes<[JFPU1, JSTC, JFPA, JALU0]> {
let Latency = 7;
let NumMicroOps = 2;
@ -560,14 +553,14 @@ def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let ResourceCycles = [2, 2, 4];
let NumMicroOps = 3;
}
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>;
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
let Latency = 11;
let ResourceCycles = [2, 2, 2, 4];
let NumMicroOps = 3;
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;

View File

@ -197,9 +197,6 @@ defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
@ -213,6 +210,18 @@ defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Conversion between integer and float.
defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }

View File

@ -227,7 +227,6 @@ defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
@ -1186,9 +1185,16 @@ def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDYrm")>;
def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
let Latency = 4;
}
def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
let Latency = 5;
}
// CVTPD2PS.
// x,x.
def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V?)CVTPD2PSrr")>;
def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
// y,y.
def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
let Latency = 11;
@ -1196,28 +1202,28 @@ def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
let ResourceCycles = [1,2];
}
// x,m128.
def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V?)CVTPD2PS(X?)rm")>;
def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
// x,m256.
def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
}
def : InstRW<[ZnWriteCVTPD2PSYLd], (instregex "(V?)CVTPD2PSYrm")>;
def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
// CVTSD2SS.
// x,x.
// Same as WriteCVTPD2PSr
def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V)?CVTSD2SSrr")>;
def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
// x,m64.
def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V)?CVTSD2SSrm")>;
def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
// CVTPS2PD.
// x,x.
def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
let Latency = 3;
}
def : InstRW<[ZnWriteCVTPS2PDr], (instregex "(V?)CVTPS2PDrr")>;
def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
// x,m64.
// y,m128.
@ -1225,20 +1231,21 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 10;
let NumMicroOps = 2;
}
def : InstRW<[ZnWriteCVTPS2PDLd], (instregex "(V?)CVTPS2PD(Y?)rm")>;
def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
// y,x.
def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
let Latency = 3;
}
def : InstRW<[ZnWriteVCVTPS2PDY], (instregex "VCVTPS2PDYrr")>;
def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
// CVTSS2SD.
// x,x.
def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
let Latency = 4;
}
def : InstRW<[ZnWriteCVTSS2SDr], (instregex "(V?)CVTSS2SDrr")>;
def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
// x,m32.
def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
@ -1246,7 +1253,7 @@ def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
def : InstRW<[ZnWriteCVTSS2SDLd], (instregex "(V?)CVTSS2SDrm")>;
def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
let Latency = 5;

View File

@ -1752,8 +1752,8 @@ define <4 x i32> @f64to4si(<4 x double> %a) {
define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
; GENERIC-LABEL: f64to16f32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [3:1.00]
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00]
; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -1788,7 +1788,7 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -1823,7 +1823,7 @@ define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
; GENERIC-LABEL: f32to8f64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to8f64:
@ -1838,7 +1838,7 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl
; GENERIC-LABEL: f32to4f64_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to4f64_mask:
@ -4216,7 +4216,7 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
; GENERIC-LABEL: fptrunc_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fptrunc_test:
@ -4230,7 +4230,7 @@ define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
; GENERIC-LABEL: fpext_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fpext_test:

View File

@ -1818,15 +1818,15 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
;
; BTVER2-SSE-LABEL: test_cvtps2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00]
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtps2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;

View File

@ -1122,10 +1122,10 @@ vzeroupper
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 1 2 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: 2 2 2.00 vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: 2 7 2.00 * vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * vcvtsd2si (%rax), %ecx
@ -1720,7 +1720,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 48.00 2.00 - 349.50 909.50 397.00 409.00 381.00 - 43.00 122.00 118.50 118.50 38.00
# CHECK-NEXT: 48.00 2.00 - 349.50 909.50 397.00 411.00 382.00 - 43.00 124.00 118.50 118.50 38.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@ -1830,8 +1830,8 @@ vzeroupper
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 - - - 1.00 - - - vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 - - - 1.00 - - - vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 1.00 - - 1.00 - - - vcvtsd2si (%rax), %ecx

View File

@ -431,8 +431,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 8 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cvtps2pd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * cvtps2pd (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %ecx