diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index f77905336b26..ce4e45841151 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -539,21 +539,30 @@ void SIScheduleBlock::addPred(SIScheduleBlock *Pred) { Preds.push_back(Pred); assert(none_of(Succs, - [=](SIScheduleBlock *S) { return PredID == S->getID(); }) && + [=](std::pair S) { + return PredID == S.first->getID(); + }) && "Loop in the Block Graph!"); } -void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) { +void SIScheduleBlock::addSucc(SIScheduleBlock *Succ, + SIScheduleBlockLinkKind Kind) { unsigned SuccID = Succ->getID(); // Check if not already predecessor. - for (SIScheduleBlock* S : Succs) { - if (SuccID == S->getID()) + for (std::pair &S : Succs) { + if (SuccID == S.first->getID()) { + if (S.second == SIScheduleBlockLinkKind::NoData && + Kind == SIScheduleBlockLinkKind::Data) + S.second = Kind; return; + } } if (Succ->isHighLatencyBlock()) ++NumHighLatencySuccessors; - Succs.push_back(Succ); + Succs.push_back(std::make_pair(Succ, Kind)); + assert(none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"); @@ -573,8 +582,10 @@ void SIScheduleBlock::printDebug(bool full) { } dbgs() << "\nSuccessors:\n"; - for (SIScheduleBlock* S : Succs) { - S->printDebug(false); + for (std::pair S : Succs) { + if (S.second == SIScheduleBlockLinkKind::Data) + dbgs() << "(Data Dep) "; + S.first->printDebug(false); } if (Scheduled) { @@ -1096,7 +1107,8 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) continue; if (Node2CurrentBlock[Succ->NodeNum] != SUID) - CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]); + CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]], + SuccDep.isCtrl() ? NoData : Data); } for (SDep& PredDep : SU->Preds) { SUnit *Pred = PredDep.getSUnit(); @@ -1290,10 +1302,8 @@ void SIScheduleBlockCreator::fillStats() { Block->Height = 0; else { unsigned Height = 0; - for (SIScheduleBlock *Succ : Block->getSuccs()) { - if (Height < Succ->Height + 1) - Height = Succ->Height + 1; - } + for (const auto &Succ : Block->getSuccs()) + Height = std::min(Height, Succ.first->Height + 1); Block->Height = Height; } } @@ -1574,17 +1584,13 @@ void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block, } void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) { - for (SIScheduleBlock* Block : Parent->getSuccs()) { - --BlockNumPredsLeft[Block->getID()]; - if (BlockNumPredsLeft[Block->getID()] == 0) { - ReadyBlocks.push_back(Block); - } - // TODO: Improve check. When the dependency between the high latency - // instructions and the instructions of the other blocks are WAR or WAW - // there will be no wait triggered. We would like these cases to not - // update LastPosHighLatencyParentScheduled. - if (Parent->isHighLatencyBlock()) - LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled; + for (const auto &Block : Parent->getSuccs()) { + if (--BlockNumPredsLeft[Block.first->getID()] == 0) + ReadyBlocks.push_back(Block.first); + + if (Parent->isHighLatencyBlock() && + Block.second == SIScheduleBlockLinkKind::Data) + LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled; } } diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h index 6978b83910d0..085ffcdc9958 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -54,6 +54,11 @@ struct SISchedulerCandidate { class SIScheduleDAGMI; class SIScheduleBlockCreator; +enum SIScheduleBlockLinkKind { + NoData, + Data +}; + class SIScheduleBlock { SIScheduleDAGMI *DAG; SIScheduleBlockCreator *BC; @@ -92,7 +97,8 @@ class SIScheduleBlock { unsigned ID; std::vector Preds; // All blocks predecessors. - std::vector Succs; // All blocks successors. + // All blocks successors, and the kind of link + std::vector> Succs; unsigned NumHighLatencySuccessors = 0; public: @@ -112,10 +118,11 @@ public: // Add block pred, which has instruction predecessor of SU. void addPred(SIScheduleBlock *Pred); - void addSucc(SIScheduleBlock *Succ); + void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind); const std::vector& getPreds() const { return Preds; } - const std::vector& getSuccs() const { return Succs; } + ArrayRef> + getSuccs() const { return Succs; } unsigned Height; // Maximum topdown path length to block without outputs unsigned Depth; // Maximum bottomup path length to block without inputs