diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index 7e0ca1478e5f..2b6429fbebac 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -57,7 +57,8 @@ namespace llvm { Barrier, ///< An unknown scheduling barrier. MayAliasMem, ///< Nonvolatile load/Store instructions that may alias. MustAliasMem, ///< Nonvolatile load/Store instructions that must alias. - Artificial ///< Arbitrary weak DAG edge (no actual dependence). + Artificial, ///< Arbitrary weak DAG edge (no actual dependence). + Cluster ///< Weak DAG edge linking a chain of clustered instrs. }; private: @@ -200,12 +201,27 @@ namespace llvm { return getKind() == Order && Contents.OrdKind == MustAliasMem; } + /// isWeak - Test if this a weak dependence. Weak dependencies are + /// considered DAG edges for height computation and other heuristics, but do + /// not force ordering. Breaking a weak edge may require the scheduler to + /// compensate, for example by inserting a copy. + bool isWeak() const { + return getKind() == Order + && (Contents.OrdKind == Artificial || Contents.OrdKind == Cluster); + } + /// isArtificial - Test if this is an Order dependence that is marked /// as "artificial", meaning it isn't necessary for correctness. bool isArtificial() const { return getKind() == Order && Contents.OrdKind == Artificial; } + /// isCluster - Test if this is an Order dependence that is marked + /// as "cluster", meaning it is artificial and wants to be adjacent. + bool isCluster() const { + return getKind() == Order && Contents.OrdKind == Cluster; + } + /// isAssignedRegDep - Test if this is a Data dependence that is /// associated with a register. bool isAssignedRegDep() const { @@ -267,6 +283,8 @@ namespace llvm { unsigned NumSuccs; // # of SDep::Data sucss. unsigned NumPredsLeft; // # of preds not scheduled. unsigned NumSuccsLeft; // # of succs not scheduled. + unsigned WeakPredsLeft; // # of weak preds not scheduled. + unsigned WeakSuccsLeft; // # of weak succs not scheduled. unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use. unsigned short Latency; // Node latency. bool isVRegCycle : 1; // May use and def the same vreg. @@ -301,12 +319,12 @@ namespace llvm { SUnit(SDNode *node, unsigned nodenum) : Node(node), Instr(0), OrigNode(0), SchedClass(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), - isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), - isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isScheduleLow(false), isCloned(false), - SchedulingPref(Sched::None), + NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0), + Latency(0), isVRegCycle(false), isCall(false), isCallOp(false), + isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), + hasPhysRegClobbers(false), isPending(false), isAvailable(false), + isScheduled(false), isScheduleHigh(false), isScheduleLow(false), + isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -315,12 +333,12 @@ namespace llvm { SUnit(MachineInstr *instr, unsigned nodenum) : Node(0), Instr(instr), OrigNode(0), SchedClass(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), - isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), - isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isScheduleLow(false), isCloned(false), - SchedulingPref(Sched::None), + NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0), + Latency(0), isVRegCycle(false), isCall(false), isCallOp(false), + isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), + hasPhysRegClobbers(false), isPending(false), isAvailable(false), + isScheduled(false), isScheduleHigh(false), isScheduleLow(false), + isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -328,12 +346,12 @@ namespace llvm { SUnit() : Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(~0u), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), - isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), - isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isScheduleLow(false), isCloned(false), - SchedulingPref(Sched::None), + NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0), + Latency(0), isVRegCycle(false), isCall(false), isCallOp(false), + isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), + hasPhysRegClobbers(false), isPending(false), isAvailable(false), + isScheduled(false), isScheduleHigh(false), isScheduleLow(false), + isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -372,7 +390,7 @@ namespace llvm { /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. - bool addPred(const SDep &D); + bool addPred(const SDep &D, bool Required = true); /// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of @@ -654,6 +672,7 @@ namespace llvm { class ScheduleDAGTopologicalSort { /// SUnits - A reference to the ScheduleDAG's SUnits. std::vector &SUnits; + SUnit *ExitSU; /// Index2Node - Maps topological index to the node number. std::vector Index2Node; @@ -675,7 +694,7 @@ namespace llvm { void Allocate(int n, int index); public: - explicit ScheduleDAGTopologicalSort(std::vector &SUnits); + ScheduleDAGTopologicalSort(std::vector &SUnits, SUnit *ExitSU); /// InitDAGTopologicalSorting - create the initial topological /// ordering from the DAG to be scheduled. diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index a4817d09c0d3..71cc072d47e4 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -310,6 +310,10 @@ void ReadyQueue::dump() { void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + return; + } #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -338,6 +342,10 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); + if (PredEdge->isWeak()) { + --PredSU->WeakSuccsLeft; + return; + } #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -530,17 +538,20 @@ void ScheduleDAGMI::postprocessDAG() { } // Release all DAG roots for scheduling. +// +// Nodes with unreleased weak edges can still be roots. void ScheduleDAGMI::releaseRoots() { SmallVector BotRoots; for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); + if (!I->NumPredsLeft && SU != &EntrySU) + SchedImpl->releaseTopNode(SU); // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - BotRoots.push_back(&(*I)); + if (!I->NumSuccsLeft && SU != &ExitSU) + BotRoots.push_back(SU); } // Release bottom roots in reverse order so the higher priority nodes appear // first. This is more natural and slightly more efficient. @@ -555,13 +566,12 @@ void ScheduleDAGMI::initQueues() { // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); - // Release edges from the special Entry node or to the special Exit node. + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + releaseRoots(); + releaseSuccessors(&EntrySU); releasePredecessors(&ExitSU); - // Release all DAG roots for scheduling. - releaseRoots(); - SchedImpl->registerRoots(); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index d57bc7362de9..4284c42eb261 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -111,9 +111,6 @@ namespace { /// added to the AvailableQueue. std::vector PendingQueue; - /// Topo - A topological ordering for SUnits. - ScheduleDAGTopologicalSort Topo; - /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; @@ -198,7 +195,7 @@ SchedulePostRATDList::SchedulePostRATDList( AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA), + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), LiveRegs(TRI->getNumRegs()) { const TargetMachine &TM = MF.getTarget(); @@ -580,10 +577,14 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { //===----------------------------------------------------------------------===// /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the PendingQueue if the count reaches zero. Also update its cycle bound. +/// the PendingQueue if the count reaches zero. void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); + if (SuccEdge->isArtificial()) { + --SuccSU->WeakPredsLeft; + return; + } #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -653,8 +654,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { // Add all leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. - bool available = SUnits[i].Preds.empty(); - if (available) { + if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) { AvailableQueue.push(&SUnits[i]); SUnits[i].isAvailable = true; } diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp index 9a6507100170..622403666373 100644 --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -62,10 +62,14 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. -bool SUnit::addPred(const SDep &D) { +bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this depenence, don't add a redundant one. for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + // Zero-latency weak edges may be added purely for heuristic ordering. Don't + // add them if another kind of edge already exists. + if (!Required && I->getSUnit() == D.getSUnit()) + return false; if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { @@ -96,13 +100,26 @@ bool SUnit::addPred(const SDep &D) { ++NumPreds; ++N->NumSuccs; } + // SD scheduler relies on artificial edges to enforce physreg + // antidependence, so it doesn't treat them as weak edges. + bool isWeak = D.isWeak() && N->isInstr(); if (!N->isScheduled) { - assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); - ++NumPredsLeft; + if (isWeak) { + ++WeakPredsLeft; + } + else { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); + ++NumPredsLeft; + } } if (!isScheduled) { - assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); - ++N->NumSuccsLeft; + if (isWeak) { + ++N->WeakSuccsLeft; + } + else { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++N->NumSuccsLeft; + } } Preds.push_back(D); N->Succs.push_back(P); @@ -143,13 +160,22 @@ void SUnit::removePred(const SDep &D) { --NumPreds; --N->NumSuccs; } + bool isWeak = D.isWeak() && N->isInstr(); if (!N->isScheduled) { - assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); - --NumPredsLeft; + if (isWeak) + --WeakPredsLeft; + else { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); + --NumPredsLeft; + } } if (!isScheduled) { - assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); - --N->NumSuccsLeft; + if (isWeak) + --N->WeakSuccsLeft; + else { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); + --N->NumSuccsLeft; + } } if (P.getLatency() != 0) { this->setDepthDirty(); @@ -292,6 +318,10 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " # preds left : " << NumPredsLeft << "\n"; dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + if (WeakPredsLeft) + dbgs() << " # weak preds left : " << WeakPredsLeft << "\n"; + if (WeakSuccsLeft) + dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; dbgs() << " Depth : " << Depth << "\n"; @@ -429,6 +459,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { Node2Index.resize(DAGSize); // Initialize the data structures. + if (ExitSU) + WorkList.push_back(ExitSU); for (unsigned i = 0, e = DAGSize; i != e; ++i) { SUnit *SU = &SUnits[i]; int NodeNum = SU->NodeNum; @@ -448,11 +480,12 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { while (!WorkList.empty()) { SUnit *SU = WorkList.back(); WorkList.pop_back(); - Allocate(SU->NodeNum, --Id); + if (SU->NodeNum < DAGSize) + Allocate(SU->NodeNum, --Id); for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit *SU = I->getSUnit(); - if (!--Node2Index[SU->NodeNum]) + if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum]) // If all dependencies of the node are processed already, // then the node can be computed now. WorkList.push_back(SU); @@ -513,7 +546,10 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, WorkList.pop_back(); Visited.set(SU->NodeNum); for (int I = SU->Succs.size()-1; I >= 0; --I) { - int s = SU->Succs[I].getSUnit()->NodeNum; + unsigned s = SU->Succs[I].getSUnit()->NodeNum; + // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). + if (s >= Node2Index.size()) + continue; if (Node2Index[s] == UpperBound) { HasLoop = true; return; @@ -554,15 +590,16 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, } -/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will -/// create a cycle. -bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { - if (IsReachable(TargetSU, SU)) +/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will +/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU). +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { + // Is SU reachable from TargetSU via successor edges? + if (IsReachable(SU, TargetSU)) return true; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) + for (SUnit::pred_iterator + I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I) if (I->isAssignedRegDep() && - IsReachable(TargetSU, I->getSUnit())) + IsReachable(SU, I->getSUnit())) return true; return false; } @@ -592,6 +629,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) { } ScheduleDAGTopologicalSort:: -ScheduleDAGTopologicalSort(std::vector &sunits) : SUnits(sunits) {} +ScheduleDAGTopologicalSort(std::vector &sunits, SUnit *exitsu) + : SUnits(sunits), ExitSU(exitsu) {} ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index a4d4a93e6dd5..836349f6b78a 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -245,21 +245,26 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { if (UseSU == SU) continue; - SDep dep(SU, SDep::Data, *Alias); - // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. int UseOp = UseList[i].OpIdx; - MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr(); - dep.setLatency( + MachineInstr *RegUse = 0; + SDep Dep; + if (UseOp < 0) + Dep = SDep(SU, SDep::Artificial); + else { + Dep = SDep(SU, SDep::Data, *Alias); + RegUse = UseSU->getInstr(); + Dep.setMinLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/true)); + } + Dep.setLatency( SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp, /*FindMin=*/false)); - dep.setMinLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/true)); - ST.adjustSchedDependency(SU, UseSU, dep); - UseSU->addPred(dep); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c55456902c87..dc8f0ee4a27b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -156,7 +156,7 @@ public: CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), - Topo(SUnits) { + Topo(SUnits, NULL) { const TargetMachine &tm = mf.getTarget(); if (DisableSchedCycles || !NeedLatency) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 30f03ac737b9..f8ca7b1d407b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { llvm_unreachable(0); } #endif + assert(!D.isWeak() && "unexpected artificial DAG edge"); + --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); diff --git a/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll index b05ec6367ee4..ca0964a05933 100644 --- a/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll +++ b/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll @@ -13,6 +13,7 @@ ; CHECK-NOT: ch SU ; CHECK: ch SU(2): Latency=1 ; CHECK-NOT: ch SU +; CHECK: Successors: ; CHECK: ** List Scheduling ; CHECK: SU(2){{.*}}STR{{.*}} ; CHECK-NOT: ch SU @@ -22,6 +23,7 @@ ; CHECK-NOT: ch SU ; CHECK: ch SU(2): Latency=1 ; CHECK-NOT: ch SU +; CHECK: Successors: define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind { entry: store volatile i32 65540, i32* %p1, align 4, !tbaa !0