From f107b7275c7f4dfc6aea1461201ff30f421bd0e9 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 3 Aug 2018 10:43:05 +0000 Subject: [PATCH] [SystemZ] Improve handling of instructions which expand to several groups Some instructions expand to more than one decoder group. This has been hitherto ignored, but is handled with this patch. Review: Ulrich Weigand https://reviews.llvm.org/D50187 llvm-svn: 338849 --- .../SystemZ/SystemZHazardRecognizer.cpp | 32 ++++--- llvm/lib/Target/SystemZ/SystemZSchedule.td | 12 +-- llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 60 +++++++------ llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 63 +++++++------ .../lib/Target/SystemZ/SystemZScheduleZ196.td | 66 +++++++------- .../Target/SystemZ/SystemZScheduleZEC12.td | 64 +++++++------ .../SystemZ/postra-sched-expandedops.mir | 90 +++++++++++++++++++ 7 files changed, 259 insertions(+), 128 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index b9e5788cf018..2e3e56280f9d 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -49,14 +49,14 @@ getNumDecoderSlots(SUnit *SU) const { if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. - if (SC->BeginGroup) { - if (!SC->EndGroup) - return 2; // Cracked instruction - else - return 3; // Expanded/group-alone instruction - } + assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) && + "Only cracked instruction can have 2 uops."); + assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) && + "Expanded instructions always group alone."); + assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) && + "Expanded instructions fill the group(s)."); - return 1; // Normal instruction + return SC->NumMicroOps; } unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { @@ -139,16 +139,22 @@ void SystemZHazardRecognizer::nextGroup() { LLVM_DEBUG(dumpCurrGroup("Completed decode group")); LLVM_DEBUG(CurGroupDbg = "";); - GrpCount++; + int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1); + assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) && + "Current decoder group bad."); // Reset counter for next group. CurrGroupSize = 0; CurrGroupHas4RegOps = false; + GrpCount += ((unsigned) NumGroups); + // Decrease counters for execution units by one. for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) if (ProcResourceCounters[i] > 0) - ProcResourceCounters[i]--; + ProcResourceCounters[i] = + ((ProcResourceCounters[i] > NumGroups) ? + (ProcResourceCounters[i] - NumGroups) : 0); // Clear CriticalResourceIdx if it is now below the threshold. if (CriticalResourceIdx != UINT_MAX && @@ -323,13 +329,13 @@ EmitInstruction(SUnit *SU) { // in current group. CurrGroupSize += getNumDecoderSlots(SU); CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); - unsigned GroupLim = - ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3); - assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!"); + unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3); + assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU)) + && "SU does not fit into decoder group!"); // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == GroupLim || SC->EndGroup) + if (CurrGroupSize >= GroupLim || SC->EndGroup) nextGroup(); } diff --git a/llvm/lib/Target/SystemZ/SystemZSchedule.td b/llvm/lib/Target/SystemZ/SystemZSchedule.td index 385a94b5d6a9..83bf97e6841a 100644 --- a/llvm/lib/Target/SystemZ/SystemZSchedule.td +++ b/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -12,11 +12,13 @@ // These resources are used to express decoder grouping rules. The number of // decoder slots needed by an instructions is normally one, but there are // exceptions. -def NormalGr : SchedWrite; -def Cracked : SchedWrite; -def GroupAlone : SchedWrite; -def BeginGroup : SchedWrite; -def EndGroup : SchedWrite; +def NormalGr : SchedWrite; +def Cracked : SchedWrite; +def GroupAlone : SchedWrite; +def GroupAlone2 : SchedWrite; +def GroupAlone3 : SchedWrite; +def BeginGroup : SchedWrite; +def EndGroup : SchedWrite; // A SchedWrite added to other SchedWrites to make LSU latency parameterizable. def LSULatency : SchedWrite; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index 5d32232107af..1a87db6e6605 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -48,6 +48,16 @@ def : WriteRes { let BeginGroup = 1; let EndGroup = 1; } +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} // Incoming latency removed from the register operand which is used together // with a memory operand by the instruction. @@ -131,7 +141,7 @@ def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; -def : InstRW<[WLat1, FXa2, FXb2, GroupAlone], +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], (instregex "B(R)?X(H|L).*$")>; // Compare and branch @@ -474,13 +484,13 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXa2, LSU, GroupAlone], //===----------------------------------------------------------------------===// def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; -def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], (instregex "DSG(F)?$")>; def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; -def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "DL(G)?$")>; //===----------------------------------------------------------------------===// // Shifts @@ -490,7 +500,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; -def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone], +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], (instregex "S(L|R)D(A|L)$")>; // Rotate @@ -597,7 +607,7 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], (instregex "CS(G|Y)?$")>; // Compare double and swap -def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone], +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], (instregex "CDS(Y)?$")>; def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone], (instregex "CDSG$")>; @@ -620,7 +630,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; //===----------------------------------------------------------------------===// def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; -def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone], +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], (instregex "TRT$")>; def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; @@ -643,12 +653,12 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], (instregex "CVBG$")>; -def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], (instregex "CVB(Y)?$")>; -def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>; -def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; @@ -704,7 +714,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; //===----------------------------------------------------------------------===// // Transaction begin -def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>; +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; // Transaction end def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; @@ -813,9 +823,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; // Convert from fixed / logical def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; // Convert to fixed / logical def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], @@ -941,7 +951,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; // Convert from fixed def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; // Convert to fixed def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; @@ -1054,9 +1064,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; // Convert from fixed / logical def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>; -def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>; def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>; -def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>; // Convert to fixed / logical def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], @@ -1068,19 +1078,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; // Convert from / to signed / unsigned packed def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; -def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; -def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; // Convert from / to zoned def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; -def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; // Convert from / to packed def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; -def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; @@ -1129,7 +1139,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; // Reround def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; -def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; // Shift significand left/right def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; @@ -1137,7 +1147,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// // DFP: Comparisons @@ -1491,8 +1501,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; -def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>; -def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index 515f968e5091..0f26ffce0e09 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -48,6 +48,16 @@ def : WriteRes { let BeginGroup = 1; let EndGroup = 1; } +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} // Incoming latency removed from the register operand which is used together // with a memory operand by the instruction. @@ -132,7 +142,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; -def : InstRW<[WLat1, FXa2, FXb2, GroupAlone], +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], (instregex "B(R)?X(H|L).*$")>; // Compare and branch @@ -483,13 +493,14 @@ def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; //===----------------------------------------------------------------------===// def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; -def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], (instregex "DSG(F)?$")>; def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; -def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; //===----------------------------------------------------------------------===// // Shifts @@ -499,7 +510,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; -def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone], +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], (instregex "S(L|R)D(A|L)$")>; // Rotate @@ -606,7 +617,7 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], (instregex "CS(G|Y)?$")>; // Compare double and swap -def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone], +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], (instregex "CDS(Y)?$")>; def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone], (instregex "CDSG$")>; @@ -629,7 +640,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; //===----------------------------------------------------------------------===// def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; -def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone], +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], (instregex "TRT$")>; def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; @@ -662,12 +673,12 @@ def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], (instregex "CVBG$")>; -def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], (instregex "CVB(Y)?$")>; -def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>; -def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; @@ -723,7 +734,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; //===----------------------------------------------------------------------===// // Transaction begin -def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>; +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; // Transaction end def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; @@ -832,9 +843,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; // Convert from fixed / logical def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; // Convert to fixed / logical def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], @@ -960,7 +971,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; // Convert from fixed def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; // Convert to fixed def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; @@ -1071,9 +1082,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; // Convert from fixed / logical def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>; -def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>; def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>; -def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>; // Convert to fixed / logical def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], @@ -1085,19 +1096,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; // Convert from / to signed / unsigned packed def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; -def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; -def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; // Convert from / to zoned def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; -def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; // Convert from / to packed def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; -def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; @@ -1146,7 +1157,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; // Reround def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; -def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; // Shift significand left/right def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; @@ -1154,7 +1165,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// // DFP: Comparisons @@ -1469,7 +1480,7 @@ def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; //===----------------------------------------------------------------------===// def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "VLIP$")>; -def : InstRW<[WLat6, VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; def : InstRW<[WLat1, VecDFX, FXb, LSU, Cracked], (instregex "VUPKZ$")>; def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVB(G)?$")>; def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVD(G)?$")>; @@ -1580,8 +1591,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; -def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>; -def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 3012b565d5ef..7535739f813a 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -39,15 +39,21 @@ let NumMicroOps = 1 in { def : WriteRes { let BeginGroup = 1; } def : WriteRes { let EndGroup = 1; } } -def : WriteRes { - let NumMicroOps = 2; - let BeginGroup = 1; -} def : WriteRes { let NumMicroOps = 3; let BeginGroup = 1; let EndGroup = 1; } +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} // Incoming latency removed from the register operand which is used together // with a memory operand by the instruction. @@ -114,7 +120,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>; def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>; def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCT(G|H)?$")>; def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>; -def : InstRW<[WLat1, FXU3, LSU, GroupAlone], +def : InstRW<[WLat1, FXU3, LSU, GroupAlone2], (instregex "B(R)?X(H|L).*$")>; // Compare and branch @@ -439,14 +445,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone], // Division and remainder //===----------------------------------------------------------------------===// -def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>; -def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone], +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], (instregex "D$")>; -def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone], +def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3], (instregex "DSG(F)?$")>; -def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone], +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], (instregex "DL(G)?$")>; //===----------------------------------------------------------------------===// @@ -457,7 +463,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "SLA(G|K)?$")>; -def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone], +def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2], (instregex "S(L|R)D(A|L)$")>; // Rotate @@ -560,7 +566,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CS(G|Y)?$")>; // Compare double and swap -def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone], +def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2], (instregex "CDS(Y)?$")>; def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone], (instregex "CDSG$")>; @@ -604,12 +610,12 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")> // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2], (instregex "CVBG$")>; -def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone], +def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone2], (instregex "CVB(Y)?$")>; -def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>; -def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone3], (instregex "CVD(Y)?$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; def : InstRW<[WLat10, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; @@ -701,13 +707,13 @@ def : InstRW<[], (instregex "Insn.*")>; // Load zero def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>; -def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LZXR$")>; +def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LZXR$")>; // Load def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>; def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>; -def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>; +def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>; // Load and Test def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>; @@ -747,10 +753,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>; // Convert from fixed / logical def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>; -def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>; def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>; def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>; -def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>; // Convert to fixed / logical def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], @@ -874,7 +880,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>; // Convert from fixed def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>; -def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>; +def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>; // Convert to fixed def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], @@ -986,11 +992,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>; // Convert from fixed / logical def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>; def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>; -def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>; -def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>; def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>; -def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>; -def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>; +def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>; // Convert to fixed / logical def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>; @@ -1002,9 +1008,9 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>; // Convert from / to signed / unsigned packed def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>; -def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>; def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>; -def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>; +def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>; // Perform floating-point operation def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; @@ -1051,7 +1057,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>; // Reround def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>; -def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>; // Shift significand left/right def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>; @@ -1059,7 +1065,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>; -def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>; +def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// // DFP: Comparisons diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 892f493570d1..a21d2c4cef70 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -39,15 +39,21 @@ let NumMicroOps = 1 in { def : WriteRes { let BeginGroup = 1; } def : WriteRes { let EndGroup = 1; } } -def : WriteRes { - let NumMicroOps = 2; - let BeginGroup = 1; -} def : WriteRes { let NumMicroOps = 3; let BeginGroup = 1; let EndGroup = 1; } +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} // Incoming latency removed from the register operand which is used together // with a memory operand by the instruction. @@ -119,7 +125,7 @@ def : InstRW<[WLat1, LSU, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; def : InstRW<[WLat1, FXU, EndGroup], (instregex "BRCT(G)?$")>; def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCTH$")>; def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>; -def : InstRW<[WLat1, FXU3, LSU, GroupAlone], +def : InstRW<[WLat1, FXU3, LSU, GroupAlone2], (instregex "B(R)?X(H|L).*$")>; // Compare and branch @@ -450,14 +456,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone], // Division and remainder //===----------------------------------------------------------------------===// -def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>; -def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone], +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], (instregex "D$")>; -def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone], +def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3], (instregex "DSG(F)?$")>; -def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone], +def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3], (instregex "DL(G)?$")>; //===----------------------------------------------------------------------===// @@ -468,7 +474,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLA(G|K)?$")>; -def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone], +def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2], (instregex "S(L|R)D(A|L)$")>; // Rotate @@ -572,7 +578,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone], (instregex "CS(G|Y)?$")>; // Compare double and swap -def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone], +def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2], (instregex "CDS(Y)?$")>; def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone], (instregex "CDSG$")>; @@ -595,7 +601,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; //===----------------------------------------------------------------------===// def : InstRW<[WLat1, LSU, GroupAlone], (instregex "TR$")>; -def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone], +def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone2], (instregex "TRT$")>; def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; @@ -617,11 +623,11 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")> // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone], +def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2], (instregex "CVBG$")>; def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone], (instregex "CVB(Y)?$")>; -def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>; def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; @@ -785,10 +791,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>; // Convert from fixed / logical def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>; -def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>; def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>; def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>; -def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>; +def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>; // Convert to fixed / logical def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], @@ -912,7 +918,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>; // Convert from fixed def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>; -def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>; +def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>; // Convert to fixed def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone], @@ -1024,11 +1030,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>; // Convert from fixed / logical def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>; def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>; -def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>; -def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>; def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>; -def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>; -def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>; +def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>; // Convert to fixed / logical def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>; @@ -1040,13 +1046,13 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>; // Convert from / to signed / unsigned packed def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>; -def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>; def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>; -def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>; +def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>; // Convert from / to zoned def : InstRW<[WLat4LSU, LSU, DFU2, GroupAlone], (instregex "CDZT$")>; -def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone3], (instregex "CXZT$")>; def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZDT$")>; def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZXT$")>; @@ -1095,7 +1101,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>; // Reround def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>; -def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>; +def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>; // Shift significand left/right def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>; @@ -1103,7 +1109,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>; -def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>; +def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// // DFP: Comparisons @@ -1223,7 +1229,7 @@ def : InstRW<[WLat30, MCD], (instregex "SCKPF$")>; def : InstRW<[WLat30, MCD], (instregex "SCKC$")>; def : InstRW<[WLat30, MCD], (instregex "SPT$")>; def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "STCK(F)?$")>; -def : InstRW<[WLat20, LSU4, FXU2, GroupAlone], (instregex "STCKE$")>; +def : InstRW<[WLat20, LSU4, FXU2, GroupAlone2], (instregex "STCKE$")>; def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; def : InstRW<[WLat30, MCD], (instregex "STPT$")>; diff --git a/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir b/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir new file mode 100644 index 000000000000..43b9a1b8a132 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir @@ -0,0 +1,90 @@ +# RUN: llc %s -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=postmisched \ +# RUN: -debug-only=machine-scheduler -o - 2>&1 | FileCheck %s +# REQUIRES: asserts + +# Test that the cycle index is the same before and after scheduling an +# instruction with 6 decoder slots. + +# CHECK: ++ | Current cycle index: 3 +# CHECK-NEXT: ++ | Resource counters: Z13_FXaUnit:2 +# CHECK-NEXT: ** ScheduleDAGMI::schedule picking next node +# CHECK-NEXT: ** Available: {SU(3):DL/FXa(4cyc)/LSU/6uops/GroupsAlone, SU(6):LARL/FXa} +# CHECK-NEXT: ** Best so far: SU(3):DL/FXa(4cyc)/LSU/6uops/GroupsAlone Grouping cost:-1 Height:43 +# CHECK-NEXT: ** Tried : SU(6):LARL/FXa Height:14 +# CHECK-NEXT: ** Scheduling SU(3) Grouping cost:-1 +# CHECK-NEXT: ++ HazardRecognizer emitting SU(3):DL/FXa(4cyc)/LSU/6uops/GroupsAlone +# CHECK-NEXT: ++ Decode group before emission: +# CHECK-NEXT: ++ Completed decode group: { SU(3):DL/FXa(4cyc)/LSU/6uops/GroupsAlone } (6 decoder slots) +# CHECK-NEXT: ++ | Current decoder group: +# CHECK-NEXT: ++ | Current cycle index: 3 + + +--- | + ; ModuleID = '' + source_filename = "" + target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" + target triple = "s390x--linux-gnu" + + %0 = type { i8, i8, i8, i8, i16, i32, i32, i32 } + + @TTSize = external dso_local local_unnamed_addr global i32, align 4 + @AS_TTable = external dso_local local_unnamed_addr global %0*, align 8 + @Variant = external dso_local local_unnamed_addr global i32, align 4 + + define dso_local void @LearnStoreTT(i32 signext %arg, i32 zeroext %arg1, i32 signext %arg2) #0 { + bb: + %tmp = load i32, i32* @TTSize, align 4 + %tmp3 = urem i32 %arg1, %tmp + %tmp4 = load %0*, %0** @AS_TTable, align 8 + %tmp5 = zext i32 %tmp3 to i64 + %tmp6 = load i32, i32* @Variant, align 4 + %tmp7 = add i32 %tmp6, -3 + %tmp8 = icmp ugt i32 %tmp7, 1 + %tmp9 = select i1 %tmp8, i8 3, i8 1 + store i8 %tmp9, i8* undef, align 1 + store i32 %arg, i32* undef, align 4 + %tmp10 = trunc i32 %arg2 to i8 + store i8 %tmp10, i8* null, align 1 + %tmp11 = getelementptr inbounds %0, %0* %tmp4, i64 %tmp5, i32 2 + store i8 0, i8* %tmp11, align 2 + ret void + } + + attributes #0 = { "target-cpu"="z13" } + +... +--- +name: LearnStoreTT +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$r2d' } + - { reg: '$r3d' } + - { reg: '$r4d' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.bb: + liveins: $r2d, $r3d, $r4d + + $r1d = LGR $r3d, implicit-def $r0q + renamable $r3d = LARL @TTSize + renamable $r0d = LLILL 0, implicit killed $r0q, implicit-def $r0q + renamable $r0q = DL killed renamable $r0q, killed renamable $r3d, 0, $noreg :: (dereferenceable load 4 from @TTSize) + renamable $r3d = LGRL @AS_TTable :: (dereferenceable load 8 from @AS_TTable) + renamable $r1d = LLGFR renamable $r0l, implicit killed $r0q + renamable $r5d = LARL @Variant + renamable $r0l = LHI -3 + renamable $r0l = A killed renamable $r0l, killed renamable $r5d, 0, $noreg, implicit-def dead $cc :: (dereferenceable load 4 from @Variant) + CLFI killed renamable $r0l, 1, implicit-def $cc + renamable $r0l = LHI 1 + renamable $r0l = LOCHI killed renamable $r0l, 3, 14, 2, implicit killed $cc + STC killed renamable $r0l, undef renamable $r1d, 0, $noreg :: (store 1 into `i8* undef`) + ST renamable $r2l, undef renamable $r1d, 0, $noreg, implicit killed $r2d :: (store 4 into `i32* undef`) + STC renamable $r4l, $noreg, 0, $noreg, implicit killed $r4d :: (store 1 into `i8* null`) + renamable $r1d = MGHI killed renamable $r1d, 20 + renamable $r0l = LHI 0 + STC killed renamable $r0l, killed renamable $r3d, 2, killed renamable $r1d :: (store 1 into %ir.tmp11, align 2) + Return + +...