From 3b2f5df12c886674d592c6ce1aa51d418e95b751 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 18 Jun 2019 12:59:46 +0000 Subject: [PATCH] [MCA] Slightly refactor the bottleneck analysis view. NFCI This patch slightly refactors data structures internally used by the bottleneck analysis to track data and resource dependencies. This patch also updates methods used to print out information about dependency edges when in debug mode. This is the last of a sequence of commits done in preparation for an upcoming patch that fixes PR37494. No functional change intended. llvm-svn: 363677 --- llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 2 +- .../llvm-mca/Views/BottleneckAnalysis.cpp | 104 +++++++++--------- .../tools/llvm-mca/Views/BottleneckAnalysis.h | 51 ++++++--- llvm/tools/llvm-mca/llvm-mca.cpp | 6 +- 4 files changed, 92 insertions(+), 71 deletions(-) diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index f2a5cf86ca49..ae9a49c64855 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -158,7 +158,7 @@ public: } void cycleEvent() { - if (CriticalPredecessor.Cycles) + if (isWaiting() && CriticalPredecessor.Cycles) CriticalPredecessor.Cycles--; } }; diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp index c10e47d5165d..8c825271e4fc 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -143,114 +143,118 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { } #ifndef NDEBUG -void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, - const DependencyEdge &DE, +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, + const DependencyEdge &DepEdge, MCInstPrinter &MCIP) const { - bool LoopCarried = FromIID >= DE.IID; - OS << " FROM: " << FromIID << " TO: " << DE.IID - << (LoopCarried ? " (loop carried)" : " "); - if (DE.Type == DT_REGISTER) { + unsigned FromIID = DepEdge.FromIID; + unsigned ToIID = DepEdge.ToIID; + assert(FromIID < ToIID && "Graph should be acyclic!"); + + const DependencyEdge::Dependency &DE = DepEdge.Dep; + assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); + + OS << " FROM: " << FromIID << " TO: " << ToIID << " "; + if (DE.Type == DependencyEdge::DT_REGISTER) { OS << " - REGISTER: "; MCIP.printRegName(OS, DE.ResourceOrRegID); - } else if (DE.Type == DT_MEMORY) { + } else if (DE.Type == DependencyEdge::DT_MEMORY) { OS << " - MEMORY"; } else { - assert(DE.Type == DT_RESOURCE && "Unexpected unsupported dependency type!"); + assert(DE.Type == DependencyEdge::DT_RESOURCE && + "Unexpected unsupported dependency type!"); OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; } - OS << " - CYCLES: " << DE.Cycles << '\n'; + OS << " - CYCLES: " << DE.Cost << '\n'; } void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { OS << "\nREG DEPS\n"; - for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { - const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.OutgoingEdges) { - if (DE.Type == DT_REGISTER) - dumpDependencyEdge(OS, I, DE, MCIP); - } - } + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_REGISTER) + dumpDependencyEdge(OS, DE, MCIP); OS << "\nMEM DEPS\n"; - for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { - const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.OutgoingEdges) { - if (DE.Type == DT_MEMORY) - dumpDependencyEdge(OS, I, DE, MCIP); - } - } + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_MEMORY) + dumpDependencyEdge(OS, DE, MCIP); OS << "\nRESOURCE DEPS\n"; - for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { - const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.OutgoingEdges) { - if (DE.Type == DT_RESOURCE) - dumpDependencyEdge(OS, I, DE, MCIP); - } - } + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) + dumpDependencyEdge(OS, DE, MCIP); } #endif // NDEBUG -void DependencyGraph::addDependency(unsigned From, DependencyEdge &&Dep) { +void DependencyGraph::addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&Dep) { DGNode &NodeFrom = Nodes[From]; - DGNode &NodeTo = Nodes[Dep.IID]; + DGNode &NodeTo = Nodes[To]; SmallVectorImpl &Vec = NodeFrom.OutgoingEdges; - auto It = find_if(Vec, [Dep](DependencyEdge &DE) { - return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID; + auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { + return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; }); if (It != Vec.end()) { - It->Cycles += Dep.Cycles; + It->Dep.Cost += Dep.Cost; return; } - Vec.emplace_back(Dep); + DependencyEdge DE = {Dep, From, To}; + Vec.emplace_back(DE); NodeTo.NumPredecessors++; } BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, MCInstPrinter &Printer, - ArrayRef S) + ArrayRef S, unsigned NumIter) : STI(sti), Tracker(STI.getSchedModel()), DG(S.size() * 3), - Source(S), TotalCycles(0), PressureIncreasedBecauseOfResources(false), + Source(S), Iterations(NumIter), TotalCycles(0), + PressureIncreasedBecauseOfResources(false), PressureIncreasedBecauseOfRegisterDependencies(false), PressureIncreasedBecauseOfMemoryDependencies(false), SeenStallCycles(false), BPI() {} void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, - unsigned RegID, unsigned Cy) { + unsigned RegID, unsigned Cost) { bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - DG.addRegisterDep(From, To + SourceSize, RegID, Cy); - DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cy); + Cost *= Iterations / 2; + DG.addRegisterDep(From, To + SourceSize, RegID, Cost); + DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); return; } - DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cy); + DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); } -void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, unsigned Cy) { +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, + unsigned Cost) { bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - DG.addMemoryDep(From, To + SourceSize, Cy); - DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cy); + Cost *= Iterations / 2; + DG.addMemoryDep(From, To + SourceSize, Cost); + DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); return; } - DG.addMemoryDep(From + SourceSize, To + SourceSize, Cy); + DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); } void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, - uint64_t Mask, unsigned Cy) { + uint64_t Mask, unsigned Cost) { bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - DG.addResourceDep(From, To + SourceSize, Mask, Cy); - DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cy); + Cost *= Iterations / 2; + DG.addResourceDep(From, To + SourceSize, Mask, Cost); + DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); return; } - DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cy); + DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); } void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h index 9268e4935c11..f8302496cefb 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -115,16 +115,27 @@ public: void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event); }; -class DependencyGraph { - enum DependencyType { DT_REGISTER, DT_MEMORY, DT_RESOURCE }; +// An edge of a dependency graph. +// Vertices of the graph are instructions identified by their ID. +struct DependencyEdge { + enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE }; - struct DependencyEdge { + // Dependency edge descriptor. + // + // It describe the dependency reason, as well as the edge cost in cycles. + struct Dependency { DependencyType Type; - unsigned IID; uint64_t ResourceOrRegID; - uint64_t Cycles; + uint64_t Cost; }; + Dependency Dep; + // Pair of vertices connected by this edge. + unsigned FromIID; + unsigned ToIID; +}; + +class DependencyGraph { struct DGNode { unsigned NumPredecessors; SmallVector OutgoingEdges; @@ -134,26 +145,29 @@ class DependencyGraph { DependencyGraph(const DependencyGraph &) = delete; DependencyGraph &operator=(const DependencyGraph &) = delete; - void addDependency(unsigned From, DependencyEdge &&DE); + void addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&DE); #ifndef NDEBUG - void dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, - const DependencyEdge &DE, MCInstPrinter &MCIP) const; + void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, + MCInstPrinter &MCIP) const; #endif public: DependencyGraph(unsigned Size) : Nodes(Size) {} - void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) { - addDependency(From, {DT_REGISTER, To, RegID, Cy}); + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost}); } - void addMemoryDep(unsigned From, unsigned To, unsigned Cy) { - addDependency(From, {DT_MEMORY, To, /* unused */ 0, Cy}); + void addMemoryDep(unsigned From, unsigned To, unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost}); } - void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) { - addDependency(From, {DT_RESOURCE, To, Mask, Cy}); + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost}); } #ifndef NDEBUG @@ -168,6 +182,7 @@ class BottleneckAnalysis : public View { DependencyGraph DG; ArrayRef Source; + unsigned Iterations; unsigned TotalCycles; bool PressureIncreasedBecauseOfResources; @@ -190,17 +205,17 @@ class BottleneckAnalysis : public View { }; BackPressureInfo BPI; - // Prints a bottleneck message to OS. - void printBottleneckHints(raw_ostream &OS) const; - // Used to populate the dependency graph DG. void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); void addMemoryDep(unsigned From, unsigned To, unsigned Cy); void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); + // Prints a bottleneck message to OS. + void printBottleneckHints(raw_ostream &OS) const; + public: BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, - ArrayRef Sequence); + ArrayRef Sequence, unsigned Iterations); void onCycleEnd() override; void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index afe06ba9da2c..b3590b5910ec 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -487,8 +487,10 @@ int main(int argc, char **argv) { Printer.addView( llvm::make_unique(SM, Insts, DispatchWidth)); - if (EnableBottleneckAnalysis) - Printer.addView(llvm::make_unique(*STI, *IP, Insts)); + if (EnableBottleneckAnalysis) { + Printer.addView(llvm::make_unique( + *STI, *IP, Insts, S.getNumIterations())); + } if (PrintInstructionInfoView) Printer.addView(