[X86] Add NT load/store scheduler classes

llvm-svn: 332274
This commit is contained in:
Simon Pilgrim 2018-05-14 18:37:19 +00:00
parent 62f7af712c
commit 215ce4a1ca
13 changed files with 148 additions and 84 deletions

View File

@ -4373,11 +4373,11 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
}
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
SchedWriteVecMoveLS>, PD;
SchedWriteVecMoveLSNT>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
SchedWriteFMoveLS>, PD, VEX_W;
SchedWriteFMoveLSNT>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
SchedWriteFMoveLS>, PS;
SchedWriteFMoveLSNT>, PS;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),

View File

@ -272,7 +272,7 @@ let Predicates = [HasSSE1] in
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
Sched<[SchedWriteVecMoveLS.MMX.MR]>;
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
let AddedComplexity = 15 in

View File

@ -3017,7 +3017,7 @@ defm : scalar_unary_math_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd,
let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in {
let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@ -3030,7 +3030,7 @@ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
addr:$dst)]>, VEX, VEX_WIG;
} // SchedRW
let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
@ -3049,17 +3049,17 @@ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
addr:$dst)]>, VEX, VEX_WIG,
Sched<[SchedWriteVecMoveLS.XMM.MR]>;
Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)]>, VEX, VEX_L, VEX_WIG,
Sched<[SchedWriteVecMoveLS.YMM.MR]>;
Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
} // ExeDomain
} // Predicates
let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@ -3068,12 +3068,12 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
} // SchedRW
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
let SchedRW = [WriteStore] in {
let SchedRW = [WriteStoreNT] in {
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@ -6409,14 +6409,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.XMM.RM]>;
Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
@ -6935,7 +6935,7 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
// Non-temporal (unaligned) scalar stores.
let AddedComplexity = 400 in { // Prefer non-temporal versions
let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
"movntss\t{$src, $dst|$dst, $src}", []>, XS;

View File

@ -147,9 +147,10 @@ defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; }
def : WriteRes<WriteStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteMove, [BWPort0156]>;
defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1,1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@ -171,6 +172,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
@ -266,11 +270,15 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [BWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [BWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
@ -578,8 +586,6 @@ def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
}
def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",

View File

@ -107,9 +107,10 @@ def : WriteRes<WriteRMW, [HWPort237,HWPort4]>;
// Store_addr on 237.
// Store_data on 4.
def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [HWPort0156]>;
defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
def : WriteRes<WriteZero, []>;
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
@ -161,6 +162,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
@ -257,11 +261,15 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [HWPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [HWPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
@ -754,8 +762,6 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
}
def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",

View File

@ -97,6 +97,7 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [SBPort015]>;
def : WriteRes<WriteZero, []>;
@ -153,6 +154,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
@ -237,11 +241,15 @@ def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4;
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;

View File

@ -145,9 +145,10 @@ defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; }
def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteMove, [SKLPort0156]>;
defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
defm : X86WriteRes<WriteStore, [SKLPort237, SKLPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [SKLPort237, SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteMove, [SKLPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKLPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [SKLPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
@ -580,8 +588,6 @@ def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
}
def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",

View File

@ -145,9 +145,10 @@ defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; }
def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteMove, [SKXPort0156]>;
defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
@ -605,8 +613,6 @@ def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
"KMOV(B|D|Q|W)mk",
"MMX_MOVD64mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
"VMOV(H|L)(PD|PS)Z128mr(b?)",
"(V?)MOV(H|L)(PD|PS)mr",

View File

@ -92,6 +92,7 @@ class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
// Loads, stores, and moves, not folded with other operations.
def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
// Arithmetic.
@ -145,6 +146,9 @@ def WriteFMaskedLoadY : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFStoreX : SchedWrite;
def WriteFStoreY : SchedWrite;
def WriteFStoreNT : SchedWrite;
def WriteFStoreNTX : SchedWrite;
def WriteFStoreNTY : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
@ -229,11 +233,15 @@ defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
def WriteVecLoad : SchedWrite;
def WriteVecLoadX : SchedWrite;
def WriteVecLoadY : SchedWrite;
def WriteVecLoadNT : SchedWrite;
def WriteVecLoadNTY : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
def WriteVecStoreNT : SchedWrite;
def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
@ -352,6 +360,16 @@ def SchedWriteFMoveLS
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
def WriteFMoveLSNT
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
def WriteFMoveLSNTX
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
def WriteFMoveLSNTY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
def SchedWriteFMoveLSNT
: X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
WriteFMoveLSNTY, WriteFMoveLSNTY>;
def WriteVecMoveLS
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
def WriteVecMoveLSX
@ -362,6 +380,16 @@ def SchedWriteVecMoveLS
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
def WriteVecMoveLSNT
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
def WriteVecMoveLSNTX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
def WriteVecMoveLSNTY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
def SchedWriteVecMoveLSNT
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;

View File

@ -146,6 +146,7 @@ defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
def : WriteRes<WriteLoad, [AtomPort0]>;
def : WriteRes<WriteStore, [AtomPort0]>;
def : WriteRes<WriteStoreNT, [AtomPort0]>;
def : WriteRes<WriteMove, [AtomPort01]>;
// Treat misc copies as a move.
@ -190,6 +191,9 @@ def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFStoreX, [AtomPort0]>;
def : WriteRes<WriteFStoreY, [AtomPort0]>;
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
def : WriteRes<WriteFStoreNTY, [AtomPort0]>;
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
@ -281,12 +285,16 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
def : WriteRes<WriteVecLoadY, [AtomPort0]>;
def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
def : WriteRes<WriteVecLoadNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
def : WriteRes<WriteVecStoreY, [AtomPort0]>;
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
def : WriteRes<WriteVecStoreNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;

View File

@ -228,6 +228,7 @@ def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8,
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
def : WriteRes<WriteStore, [JSAGU]>;
def : WriteRes<WriteStoreNT, [JSAGU]>;
def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
@ -277,6 +278,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
@ -407,12 +411,16 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>;
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
@ -572,28 +580,6 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 2;
}
def : InstRW<[JWriteVMOVNTDQSt], (instrs MMX_MOVNTQmr, MOVNTDQmr, VMOVNTDQmr)>;
def JWriteVMOVNTDQYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 2;
let ResourceCycles = [2, 2, 2];
}
def : InstRW<[JWriteVMOVNTDQYSt], (instrs VMOVNTDQYmr)>;
def JWriteMOVNTSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 3;
}
def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>;
def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 3;
let ResourceCycles = [2, 2, 2];
}
def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTPDYmr, VMOVNTPSYmr)>;
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];

View File

@ -80,6 +80,7 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
def : WriteRes<WriteZero, []>;
@ -141,6 +142,9 @@ def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
@ -214,11 +218,15 @@ def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;

View File

@ -144,6 +144,7 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<WriteRMW, [ZnAGU]>;
def : WriteRes<WriteStore, [ZnAGU]>;
def : WriteRes<WriteStoreNT, [ZnAGU]>;
def : WriteRes<WriteMove, [ZnALU]>;
def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
@ -197,6 +198,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
@ -274,11 +278,15 @@ def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
@ -1344,12 +1352,6 @@ def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
}
def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
// MOVNTSS/MOVNTSD
def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
let Latency = 8;
}
def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
//-- SHA instructions --//
// SHA256MSG2
def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;