forked from OSchip/llvm-project
[X86] Add NT load/store scheduler classes
llvm-svn: 332274
This commit is contained in:
parent
62f7af712c
commit
215ce4a1ca
|
@ -4373,11 +4373,11 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
|
||||
SchedWriteVecMoveLS>, PD;
|
||||
SchedWriteVecMoveLSNT>, PD;
|
||||
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
|
||||
SchedWriteFMoveLS>, PD, VEX_W;
|
||||
SchedWriteFMoveLSNT>, PD, VEX_W;
|
||||
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
|
||||
SchedWriteFMoveLS>, PS;
|
||||
SchedWriteFMoveLSNT>, PS;
|
||||
|
||||
let Predicates = [HasAVX512], AddedComplexity = 400 in {
|
||||
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
|
||||
|
|
|
@ -272,7 +272,7 @@ let Predicates = [HasSSE1] in
|
|||
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
|
||||
"movntq\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
|
||||
Sched<[SchedWriteVecMoveLS.MMX.MR]>;
|
||||
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
|
||||
|
||||
let Predicates = [HasMMX] in {
|
||||
let AddedComplexity = 15 in
|
||||
|
|
|
@ -3017,7 +3017,7 @@ defm : scalar_unary_math_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd,
|
|||
|
||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
|
||||
let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
|
||||
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||
(ins f128mem:$dst, VR128:$src),
|
||||
"movntps\t{$src, $dst|$dst, $src}",
|
||||
|
@ -3030,7 +3030,7 @@ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
|
|||
addr:$dst)]>, VEX, VEX_WIG;
|
||||
} // SchedRW
|
||||
|
||||
let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
|
||||
let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
|
||||
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||
(ins f256mem:$dst, VR256:$src),
|
||||
"movntps\t{$src, $dst|$dst, $src}",
|
||||
|
@ -3049,17 +3049,17 @@ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
|
|||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v2i64 VR128:$src),
|
||||
addr:$dst)]>, VEX, VEX_WIG,
|
||||
Sched<[SchedWriteVecMoveLS.XMM.MR]>;
|
||||
Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
|
||||
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||
(ins i256mem:$dst, VR256:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v4i64 VR256:$src),
|
||||
addr:$dst)]>, VEX, VEX_L, VEX_WIG,
|
||||
Sched<[SchedWriteVecMoveLS.YMM.MR]>;
|
||||
Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
|
||||
} // ExeDomain
|
||||
} // Predicates
|
||||
|
||||
let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
|
||||
let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
|
||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movntps\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
||||
|
@ -3068,12 +3068,12 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
|||
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
|
||||
} // SchedRW
|
||||
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
|
||||
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
|
||||
|
||||
let SchedRW = [WriteStore] in {
|
||||
let SchedRW = [WriteStoreNT] in {
|
||||
// There is no AVX form for instructions below this point
|
||||
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||
"movnti{l}\t{$src, $dst|$dst, $src}",
|
||||
|
@ -6409,14 +6409,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
|
|||
let Predicates = [HasAVX, NoVLX] in
|
||||
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
|
||||
Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
|
||||
Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
|
||||
let Predicates = [HasAVX2, NoVLX] in
|
||||
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
|
||||
Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
|
||||
Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
|
||||
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movntdqa\t{$src, $dst|$dst, $src}", []>,
|
||||
Sched<[SchedWriteVecMoveLS.XMM.RM]>;
|
||||
Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX] in {
|
||||
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
|
||||
|
@ -6935,7 +6935,7 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
|
|||
|
||||
// Non-temporal (unaligned) scalar stores.
|
||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||
let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
|
||||
let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
|
||||
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
|
||||
"movntss\t{$src, $dst|$dst, $src}", []>, XS;
|
||||
|
||||
|
|
|
@ -147,9 +147,10 @@ defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
|
|||
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
|
||||
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteStore, [BWPort237, BWPort4]>;
|
||||
def : WriteRes<WriteMove, [BWPort0156]>;
|
||||
defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1,1], 1>;
|
||||
|
||||
// Idioms that clear a register, like xorps %xmm0, %xmm0.
|
||||
// These can often bypass execution ports completely.
|
||||
|
@ -171,6 +172,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
|
|||
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
|
||||
|
@ -266,11 +270,15 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
|
|||
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [BWPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [BWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [BWPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
|
||||
|
@ -578,8 +586,6 @@ def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
|
|||
}
|
||||
def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
|
||||
"MMX_MOVD64mr",
|
||||
"MOVNTI_64mr",
|
||||
"MOVNTImr",
|
||||
"ST_FP(32|64|80)m",
|
||||
"(V?)MOV(H|L)(PD|PS)mr",
|
||||
"(V?)MOVPDI2DImr",
|
||||
|
|
|
@ -107,10 +107,11 @@ def : WriteRes<WriteRMW, [HWPort237,HWPort4]>;
|
|||
|
||||
// Store_addr on 237.
|
||||
// Store_data on 4.
|
||||
def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
|
||||
def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteMove, [HWPort0156]>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
|
||||
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
|
||||
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
|
||||
|
@ -161,6 +162,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
|
|||
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
|
||||
|
@ -257,11 +261,15 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
|
|||
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [HWPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [HWPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [HWPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [HWPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
|
||||
|
@ -754,8 +762,6 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
|
|||
}
|
||||
def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
|
||||
"MMX_MOVD64mr",
|
||||
"MOVNTI_64mr",
|
||||
"MOVNTImr",
|
||||
"ST_FP(32|64|80)m",
|
||||
"(V?)MOV(H|L)(PD|PS)mr",
|
||||
"(V?)MOVPDI2DImr",
|
||||
|
|
|
@ -96,10 +96,11 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
|
|||
// 2/3 cycle to recompute the address.
|
||||
def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
|
||||
|
||||
def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
|
||||
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteMove, [SBPort015]>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
|
||||
def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
|
||||
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteMove, [SBPort015]>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
|
||||
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
|
||||
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
|
||||
|
@ -153,6 +154,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
|
|||
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
|
||||
|
@ -237,11 +241,15 @@ def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4;
|
|||
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
|
||||
|
|
|
@ -145,9 +145,10 @@ defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
|
|||
defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
|
||||
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>;
|
||||
def : WriteRes<WriteMove, [SKLPort0156]>;
|
||||
defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteStore, [SKLPort237, SKLPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteStoreNT, [SKLPort237, SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteMove, [SKLPort0156], 1, [1], 1>;
|
||||
|
||||
// Idioms that clear a register, like xorps %xmm0, %xmm0.
|
||||
// These can often bypass execution ports completely.
|
||||
|
@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
|
|||
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
|
||||
|
@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
|
|||
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [SKLPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [SKLPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [SKLPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [SKLPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
|
||||
|
@ -580,8 +588,6 @@ def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
|
|||
}
|
||||
def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
|
||||
"MMX_MOVD64mr",
|
||||
"MOVNTI_64mr",
|
||||
"MOVNTImr",
|
||||
"ST_FP(32|64|80)m",
|
||||
"(V?)MOV(H|L)(PD|PS)mr",
|
||||
"(V?)MOVPDI2DImr",
|
||||
|
|
|
@ -145,9 +145,10 @@ defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
|
|||
defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
|
||||
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>;
|
||||
def : WriteRes<WriteMove, [SKXPort0156]>;
|
||||
defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>;
|
||||
|
||||
// Idioms that clear a register, like xorps %xmm0, %xmm0.
|
||||
// These can often bypass execution ports completely.
|
||||
|
@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
|
|||
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
|
||||
|
@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
|
|||
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
|
||||
|
@ -605,8 +613,6 @@ def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
|
|||
def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
|
||||
"KMOV(B|D|Q|W)mk",
|
||||
"MMX_MOVD64mr",
|
||||
"MOVNTI_64mr",
|
||||
"MOVNTImr",
|
||||
"ST_FP(32|64|80)m",
|
||||
"VMOV(H|L)(PD|PS)Z128mr(b?)",
|
||||
"(V?)MOV(H|L)(PD|PS)mr",
|
||||
|
|
|
@ -90,9 +90,10 @@ class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
|
|||
}
|
||||
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
def WriteLoad : SchedWrite;
|
||||
def WriteStore : SchedWrite;
|
||||
def WriteMove : SchedWrite;
|
||||
def WriteLoad : SchedWrite;
|
||||
def WriteStore : SchedWrite;
|
||||
def WriteStoreNT : SchedWrite;
|
||||
def WriteMove : SchedWrite;
|
||||
|
||||
// Arithmetic.
|
||||
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
|
||||
|
@ -145,6 +146,9 @@ def WriteFMaskedLoadY : SchedWrite;
|
|||
def WriteFStore : SchedWrite;
|
||||
def WriteFStoreX : SchedWrite;
|
||||
def WriteFStoreY : SchedWrite;
|
||||
def WriteFStoreNT : SchedWrite;
|
||||
def WriteFStoreNTX : SchedWrite;
|
||||
def WriteFStoreNTY : SchedWrite;
|
||||
def WriteFMaskedStore : SchedWrite;
|
||||
def WriteFMaskedStoreY : SchedWrite;
|
||||
def WriteFMove : SchedWrite;
|
||||
|
@ -229,11 +233,15 @@ defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
|
|||
def WriteVecLoad : SchedWrite;
|
||||
def WriteVecLoadX : SchedWrite;
|
||||
def WriteVecLoadY : SchedWrite;
|
||||
def WriteVecLoadNT : SchedWrite;
|
||||
def WriteVecLoadNTY : SchedWrite;
|
||||
def WriteVecMaskedLoad : SchedWrite;
|
||||
def WriteVecMaskedLoadY : SchedWrite;
|
||||
def WriteVecStore : SchedWrite;
|
||||
def WriteVecStoreX : SchedWrite;
|
||||
def WriteVecStoreY : SchedWrite;
|
||||
def WriteVecStoreNT : SchedWrite;
|
||||
def WriteVecStoreNTY : SchedWrite;
|
||||
def WriteVecMaskedStore : SchedWrite;
|
||||
def WriteVecMaskedStoreY : SchedWrite;
|
||||
def WriteVecMove : SchedWrite;
|
||||
|
@ -352,6 +360,16 @@ def SchedWriteFMoveLS
|
|||
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
|
||||
WriteFMoveLSY, WriteFMoveLSY>;
|
||||
|
||||
def WriteFMoveLSNT
|
||||
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
|
||||
def WriteFMoveLSNTX
|
||||
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
|
||||
def WriteFMoveLSNTY
|
||||
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
|
||||
def SchedWriteFMoveLSNT
|
||||
: X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
|
||||
WriteFMoveLSNTY, WriteFMoveLSNTY>;
|
||||
|
||||
def WriteVecMoveLS
|
||||
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
|
||||
def WriteVecMoveLSX
|
||||
|
@ -362,6 +380,16 @@ def SchedWriteVecMoveLS
|
|||
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
|
||||
WriteVecMoveLSY, WriteVecMoveLSY>;
|
||||
|
||||
def WriteVecMoveLSNT
|
||||
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
|
||||
def WriteVecMoveLSNTX
|
||||
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
|
||||
def WriteVecMoveLSNTY
|
||||
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
|
||||
def SchedWriteVecMoveLSNT
|
||||
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
|
||||
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
|
||||
|
||||
// Vector width wrappers.
|
||||
def SchedWriteFAdd
|
||||
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
|
||||
|
|
|
@ -144,9 +144,10 @@ defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
|
|||
// Loads, stores, and moves, not folded with other operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteMove, [AtomPort01]>;
|
||||
def : WriteRes<WriteLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteStoreNT, [AtomPort0]>;
|
||||
def : WriteRes<WriteMove, [AtomPort01]>;
|
||||
|
||||
// Treat misc copies as a move.
|
||||
def : InstRW<[WriteMove], (instrs COPY)>;
|
||||
|
@ -190,6 +191,9 @@ def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>;
|
|||
def : WriteRes<WriteFStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreX, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreY, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreNTY, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
|
||||
|
||||
|
@ -281,12 +285,16 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
|||
def : WriteRes<WriteVecLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecLoadY, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecLoadNTY, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
|
||||
|
||||
def : WriteRes<WriteVecStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecStoreY, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecStoreNTY, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
|
||||
|
||||
|
|
|
@ -226,9 +226,10 @@ def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8,
|
|||
// Loads, stores, and moves, not folded with other operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
|
||||
def : WriteRes<WriteStore, [JSAGU]>;
|
||||
def : WriteRes<WriteMove, [JALU01]>;
|
||||
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
|
||||
def : WriteRes<WriteStore, [JSAGU]>;
|
||||
def : WriteRes<WriteStoreNT, [JSAGU]>;
|
||||
def : WriteRes<WriteMove, [JALU01]>;
|
||||
|
||||
// Load/store MXCSR.
|
||||
// FIXME: These are copy and pasted from WriteLoad/Store.
|
||||
|
@ -277,6 +278,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
|
|||
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
|
||||
|
||||
|
@ -407,12 +411,16 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>;
|
|||
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
|
||||
|
||||
|
@ -572,28 +580,6 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
|
|||
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
|
||||
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
|
||||
|
||||
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
let Latency = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVMOVNTDQSt], (instrs MMX_MOVNTQmr, MOVNTDQmr, VMOVNTDQmr)>;
|
||||
|
||||
def JWriteVMOVNTDQYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2, 2, 2];
|
||||
}
|
||||
def : InstRW<[JWriteVMOVNTDQYSt], (instrs VMOVNTDQYmr)>;
|
||||
|
||||
def JWriteMOVNTSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
let Latency = 3;
|
||||
}
|
||||
def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>;
|
||||
|
||||
def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
let Latency = 3;
|
||||
let ResourceCycles = [2, 2, 2];
|
||||
}
|
||||
def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTPDYmr, VMOVNTPSYmr)>;
|
||||
|
||||
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 2, 4];
|
||||
|
|
|
@ -79,10 +79,11 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
|
|||
// need an extra port cycle to recompute the address.
|
||||
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
|
||||
|
||||
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
|
||||
// Load/store MXCSR.
|
||||
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
|
||||
|
@ -141,6 +142,9 @@ def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
|||
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
|
||||
|
@ -214,11 +218,15 @@ def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
|||
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
|
||||
|
|
|
@ -143,9 +143,10 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
|
|||
// operation in codegen
|
||||
def : WriteRes<WriteRMW, [ZnAGU]>;
|
||||
|
||||
def : WriteRes<WriteStore, [ZnAGU]>;
|
||||
def : WriteRes<WriteMove, [ZnALU]>;
|
||||
def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
|
||||
def : WriteRes<WriteStore, [ZnAGU]>;
|
||||
def : WriteRes<WriteStoreNT, [ZnAGU]>;
|
||||
def : WriteRes<WriteMove, [ZnALU]>;
|
||||
def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
|
||||
|
||||
def : WriteRes<WriteZero, []>;
|
||||
def : WriteRes<WriteLEA, [ZnALU]>;
|
||||
|
@ -197,6 +198,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
|
|||
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
|
||||
|
@ -274,11 +278,15 @@ def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
|
|||
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
|
||||
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
|
||||
|
@ -1344,12 +1352,6 @@ def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
|
|||
}
|
||||
def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
|
||||
|
||||
// MOVNTSS/MOVNTSD
|
||||
def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
|
||||
let Latency = 8;
|
||||
}
|
||||
def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
|
||||
|
||||
//-- SHA instructions --//
|
||||
// SHA256MSG2
|
||||
def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
|
||||
|
|
Loading…
Reference in New Issue