forked from OSchip/llvm-project
653 lines
22 KiB
C++
653 lines
22 KiB
C++
//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
///
|
|
/// This file implements the functionalities used by the BottleneckAnalysis
|
|
/// to report bottleneck info.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Views/BottleneckAnalysis.h"
|
|
#include "llvm/MC/MCInst.h"
|
|
#include "llvm/MCA/Support.h"
|
|
#include "llvm/Support/Format.h"
|
|
#include "llvm/Support/FormattedStream.h"
|
|
|
|
namespace llvm {
|
|
namespace mca {
|
|
|
|
#define DEBUG_TYPE "llvm-mca"
|
|
|
|
PressureTracker::PressureTracker(const MCSchedModel &Model)
|
|
: SM(Model),
|
|
ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
|
|
ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
|
|
ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
|
|
ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
|
|
computeProcResourceMasks(SM, ProcResID2Mask);
|
|
|
|
// Ignore the invalid resource at index zero.
|
|
unsigned NextResourceUsersIdx = 0;
|
|
for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
|
|
const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
|
|
ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
|
|
NextResourceUsersIdx += ProcResource.NumUnits;
|
|
uint64_t ResourceMask = ProcResID2Mask[I];
|
|
ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
|
|
}
|
|
|
|
ResourceUsers.resize(NextResourceUsersIdx);
|
|
std::fill(ResourceUsers.begin(), ResourceUsers.end(),
|
|
std::make_pair<unsigned, unsigned>(~0U, 0U));
|
|
}
|
|
|
|
void PressureTracker::getResourceUsers(uint64_t ResourceMask,
|
|
SmallVectorImpl<User> &Users) const {
|
|
unsigned Index = getResourceStateIndex(ResourceMask);
|
|
unsigned ProcResID = ResIdx2ProcResID[Index];
|
|
const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
|
|
for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
|
|
const User U = getResourceUser(ProcResID, I);
|
|
if (U.second && IPI.find(U.first) != IPI.end())
|
|
Users.emplace_back(U);
|
|
}
|
|
}
|
|
|
|
void PressureTracker::onInstructionDispatched(unsigned IID) {
|
|
IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
|
|
}
|
|
|
|
void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
|
|
|
|
void PressureTracker::handleInstructionIssuedEvent(
|
|
const HWInstructionIssuedEvent &Event) {
|
|
unsigned IID = Event.IR.getSourceIndex();
|
|
using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
|
|
using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
|
|
for (const ResourceUse &Use : Event.UsedResources) {
|
|
const ResourceRef &RR = Use.first;
|
|
unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
|
|
Index += countTrailingZeros(RR.second);
|
|
ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
|
|
}
|
|
}
|
|
|
|
void PressureTracker::updateResourcePressureDistribution(
|
|
uint64_t CumulativeMask) {
|
|
while (CumulativeMask) {
|
|
uint64_t Current = CumulativeMask & (-CumulativeMask);
|
|
unsigned ResIdx = getResourceStateIndex(Current);
|
|
unsigned ProcResID = ResIdx2ProcResID[ResIdx];
|
|
uint64_t Mask = ProcResID2Mask[ProcResID];
|
|
|
|
if (Mask == Current) {
|
|
ResourcePressureDistribution[ProcResID]++;
|
|
CumulativeMask ^= Current;
|
|
continue;
|
|
}
|
|
|
|
Mask ^= Current;
|
|
while (Mask) {
|
|
uint64_t SubUnit = Mask & (-Mask);
|
|
ResIdx = getResourceStateIndex(SubUnit);
|
|
ProcResID = ResIdx2ProcResID[ResIdx];
|
|
ResourcePressureDistribution[ProcResID]++;
|
|
Mask ^= SubUnit;
|
|
}
|
|
|
|
CumulativeMask ^= Current;
|
|
}
|
|
}
|
|
|
|
void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
|
|
assert(Event.Reason != HWPressureEvent::INVALID &&
|
|
"Unexpected invalid event!");
|
|
|
|
switch (Event.Reason) {
|
|
default:
|
|
break;
|
|
|
|
case HWPressureEvent::RESOURCES: {
|
|
const uint64_t ResourceMask = Event.ResourceMask;
|
|
updateResourcePressureDistribution(Event.ResourceMask);
|
|
|
|
for (const InstRef &IR : Event.AffectedInstructions) {
|
|
const Instruction &IS = *IR.getInstruction();
|
|
unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
|
|
if (!BusyResources)
|
|
continue;
|
|
|
|
unsigned IID = IR.getSourceIndex();
|
|
IPI[IID].ResourcePressureCycles++;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case HWPressureEvent::REGISTER_DEPS:
|
|
for (const InstRef &IR : Event.AffectedInstructions) {
|
|
unsigned IID = IR.getSourceIndex();
|
|
IPI[IID].RegisterPressureCycles++;
|
|
}
|
|
break;
|
|
|
|
case HWPressureEvent::MEMORY_DEPS:
|
|
for (const InstRef &IR : Event.AffectedInstructions) {
|
|
unsigned IID = IR.getSourceIndex();
|
|
IPI[IID].MemoryPressureCycles++;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
|
|
const DependencyEdge &DepEdge,
|
|
MCInstPrinter &MCIP) const {
|
|
unsigned FromIID = DepEdge.FromIID;
|
|
unsigned ToIID = DepEdge.ToIID;
|
|
assert(FromIID < ToIID && "Graph should be acyclic!");
|
|
|
|
const DependencyEdge::Dependency &DE = DepEdge.Dep;
|
|
assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
|
|
|
|
OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
|
|
if (DE.Type == DependencyEdge::DT_REGISTER) {
|
|
OS << " - REGISTER: ";
|
|
MCIP.printRegName(OS, DE.ResourceOrRegID);
|
|
} else if (DE.Type == DependencyEdge::DT_MEMORY) {
|
|
OS << " - MEMORY";
|
|
} else {
|
|
assert(DE.Type == DependencyEdge::DT_RESOURCE &&
|
|
"Unsupported dependency type!");
|
|
OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
|
|
}
|
|
OS << " - COST: " << DE.Cost << '\n';
|
|
}
|
|
#endif // NDEBUG
|
|
|
|
void DependencyGraph::pruneEdges(unsigned Iterations) {
|
|
for (DGNode &N : Nodes) {
|
|
unsigned NumPruned = 0;
|
|
const unsigned Size = N.OutgoingEdges.size();
|
|
// Use a cut-off threshold to prune edges with a low frequency.
|
|
for (unsigned I = 0, E = Size; I < E; ++I) {
|
|
DependencyEdge &Edge = N.OutgoingEdges[I];
|
|
if (Edge.Frequency == Iterations)
|
|
continue;
|
|
double Factor = (double)Edge.Frequency / Iterations;
|
|
if (0.10 < Factor)
|
|
continue;
|
|
Nodes[Edge.ToIID].NumPredecessors--;
|
|
std::swap(Edge, N.OutgoingEdges[E - 1]);
|
|
--E;
|
|
++NumPruned;
|
|
}
|
|
|
|
if (NumPruned)
|
|
N.OutgoingEdges.resize(Size - NumPruned);
|
|
}
|
|
}
|
|
|
|
void DependencyGraph::initializeRootSet(
|
|
SmallVectorImpl<unsigned> &RootSet) const {
|
|
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
|
|
const DGNode &N = Nodes[I];
|
|
if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
|
|
RootSet.emplace_back(I);
|
|
}
|
|
}
|
|
|
|
void DependencyGraph::propagateThroughEdges(
|
|
SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
|
|
SmallVector<unsigned, 8> ToVisit;
|
|
|
|
// A critical sequence is computed as the longest path from a node of the
|
|
// RootSet to a leaf node (i.e. a node with no successors). The RootSet is
|
|
// composed of nodes with at least one successor, and no predecessors.
|
|
//
|
|
// Each node of the graph starts with an initial default cost of zero. The
|
|
// cost of a node is a measure of criticality: the higher the cost, the bigger
|
|
// is the performance impact.
|
|
// For register and memory dependencies, the cost is a function of the write
|
|
// latency as well as the actual delay (in cycles) caused to users.
|
|
// For processor resource dependencies, the cost is a function of the resource
|
|
// pressure. Resource interferences with low frequency values are ignored.
|
|
//
|
|
// This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
|
|
// the inner loop selects (i.e. visits) a node N from a set of `unvisited
|
|
// nodes`, and then propagates the cost of N to all its neighbors.
|
|
//
|
|
// The `unvisited nodes` set initially contains all the nodes from the
|
|
// RootSet. A node N is added to the `unvisited nodes` if all its
|
|
// predecessors have been visited already.
|
|
//
|
|
// For simplicity, every node tracks the number of unvisited incoming edges in
|
|
// field `NumVisitedPredecessors`. When the value of that field drops to
|
|
// zero, then the corresponding node is added to a `ToVisit` set.
|
|
//
|
|
// At the end of every iteration of the outer loop, set `ToVisit` becomes our
|
|
// new `unvisited nodes` set.
|
|
//
|
|
// The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
|
|
// is empty. This algorithm works under the assumption that the graph is
|
|
// acyclic.
|
|
do {
|
|
for (unsigned IID : RootSet) {
|
|
const DGNode &N = Nodes[IID];
|
|
for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
|
|
unsigned ToIID = DepEdge.ToIID;
|
|
DGNode &To = Nodes[ToIID];
|
|
uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
|
|
// Check if this is the most expensive incoming edge seen so far. In
|
|
// case, update the total cost of the destination node (ToIID), as well
|
|
// its field `CriticalPredecessor`.
|
|
if (Cost > To.Cost) {
|
|
To.CriticalPredecessor = DepEdge;
|
|
To.Cost = Cost;
|
|
To.Depth = N.Depth + 1;
|
|
}
|
|
To.NumVisitedPredecessors++;
|
|
if (To.NumVisitedPredecessors == To.NumPredecessors)
|
|
ToVisit.emplace_back(ToIID);
|
|
}
|
|
}
|
|
|
|
std::swap(RootSet, ToVisit);
|
|
ToVisit.clear();
|
|
} while (!RootSet.empty());
|
|
}
|
|
|
|
void DependencyGraph::getCriticalSequence(
|
|
SmallVectorImpl<const DependencyEdge *> &Seq) const {
|
|
// At this stage, nodes of the graph have been already visited, and costs have
|
|
// been propagated through the edges (see method `propagateThroughEdges()`).
|
|
|
|
// Identify the node N with the highest cost in the graph. By construction,
|
|
// that node is the last instruction of our critical sequence.
|
|
// Field N.Depth would tell us the total length of the sequence.
|
|
//
|
|
// To obtain the sequence of critical edges, we simply follow the chain of critical
|
|
// predecessors starting from node N (field DGNode::CriticalPredecessor).
|
|
const auto It = std::max_element(
|
|
Nodes.begin(), Nodes.end(),
|
|
[](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
|
|
unsigned IID = std::distance(Nodes.begin(), It);
|
|
Seq.resize(Nodes[IID].Depth);
|
|
for (unsigned I = Seq.size(), E = 0; I > E; --I) {
|
|
const DGNode &N = Nodes[IID];
|
|
Seq[I - 1] = &N.CriticalPredecessor;
|
|
IID = N.CriticalPredecessor.FromIID;
|
|
}
|
|
}
|
|
|
|
static void printInstruction(formatted_raw_ostream &FOS,
|
|
const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
|
|
const MCInst &MCI,
|
|
bool UseDifferentColor = false) {
|
|
std::string Instruction;
|
|
raw_string_ostream InstrStream(Instruction);
|
|
|
|
FOS.PadToColumn(14);
|
|
|
|
MCIP.printInst(&MCI, 0, "", STI, InstrStream);
|
|
InstrStream.flush();
|
|
|
|
if (UseDifferentColor)
|
|
FOS.changeColor(raw_ostream::CYAN, true, false);
|
|
FOS << StringRef(Instruction).ltrim();
|
|
if (UseDifferentColor)
|
|
FOS.resetColor();
|
|
}
|
|
|
|
void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
|
|
// Early exit if no bottlenecks were found during the simulation.
|
|
if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
|
|
return;
|
|
|
|
SmallVector<const DependencyEdge *, 16> Seq;
|
|
DG.getCriticalSequence(Seq);
|
|
if (Seq.empty())
|
|
return;
|
|
|
|
OS << "\nCritical sequence based on the simulation:\n\n";
|
|
|
|
const DependencyEdge &FirstEdge = *Seq[0];
|
|
unsigned FromIID = FirstEdge.FromIID % Source.size();
|
|
unsigned ToIID = FirstEdge.ToIID % Source.size();
|
|
bool IsLoopCarried = FromIID >= ToIID;
|
|
|
|
formatted_raw_ostream FOS(OS);
|
|
FOS.PadToColumn(14);
|
|
FOS << "Instruction";
|
|
FOS.PadToColumn(58);
|
|
FOS << "Dependency Information";
|
|
|
|
bool HasColors = FOS.has_colors();
|
|
|
|
unsigned CurrentIID = 0;
|
|
if (IsLoopCarried) {
|
|
FOS << "\n +----< " << FromIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
|
|
FOS << "\n |\n | < loop carried > \n |";
|
|
} else {
|
|
while (CurrentIID < FromIID) {
|
|
FOS << "\n " << CurrentIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
|
|
CurrentIID++;
|
|
}
|
|
|
|
FOS << "\n +----< " << CurrentIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
|
|
CurrentIID++;
|
|
}
|
|
|
|
for (const DependencyEdge *&DE : Seq) {
|
|
ToIID = DE->ToIID % Source.size();
|
|
unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
|
|
|
|
while (CurrentIID < LastIID) {
|
|
FOS << "\n | " << CurrentIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
|
|
CurrentIID++;
|
|
}
|
|
|
|
if (CurrentIID == ToIID) {
|
|
FOS << "\n +----> " << ToIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
|
|
} else {
|
|
FOS << "\n |\n | < loop carried > \n |"
|
|
<< "\n +----> " << ToIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
|
|
}
|
|
FOS.PadToColumn(58);
|
|
|
|
const DependencyEdge::Dependency &Dep = DE->Dep;
|
|
if (HasColors)
|
|
FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
|
|
|
|
if (Dep.Type == DependencyEdge::DT_REGISTER) {
|
|
FOS << "## REGISTER dependency: ";
|
|
if (HasColors)
|
|
FOS.changeColor(raw_ostream::MAGENTA, true, false);
|
|
MCIP.printRegName(FOS, Dep.ResourceOrRegID);
|
|
} else if (Dep.Type == DependencyEdge::DT_MEMORY) {
|
|
FOS << "## MEMORY dependency.";
|
|
} else {
|
|
assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
|
|
"Unsupported dependency type!");
|
|
FOS << "## RESOURCE interference: ";
|
|
if (HasColors)
|
|
FOS.changeColor(raw_ostream::MAGENTA, true, false);
|
|
FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
|
|
if (HasColors) {
|
|
FOS.resetColor();
|
|
FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
|
|
}
|
|
FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
|
|
<< "% ]";
|
|
}
|
|
if (HasColors)
|
|
FOS.resetColor();
|
|
++CurrentIID;
|
|
}
|
|
|
|
while (CurrentIID < Source.size()) {
|
|
FOS << "\n " << CurrentIID << ".";
|
|
printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
|
|
CurrentIID++;
|
|
}
|
|
|
|
FOS << '\n';
|
|
FOS.flush();
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
|
|
OS << "\nREG DEPS\n";
|
|
for (const DGNode &Node : Nodes)
|
|
for (const DependencyEdge &DE : Node.OutgoingEdges)
|
|
if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
|
|
dumpDependencyEdge(OS, DE, MCIP);
|
|
|
|
OS << "\nMEM DEPS\n";
|
|
for (const DGNode &Node : Nodes)
|
|
for (const DependencyEdge &DE : Node.OutgoingEdges)
|
|
if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
|
|
dumpDependencyEdge(OS, DE, MCIP);
|
|
|
|
OS << "\nRESOURCE DEPS\n";
|
|
for (const DGNode &Node : Nodes)
|
|
for (const DependencyEdge &DE : Node.OutgoingEdges)
|
|
if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
|
|
dumpDependencyEdge(OS, DE, MCIP);
|
|
}
|
|
#endif // NDEBUG
|
|
|
|
void DependencyGraph::addDependency(unsigned From, unsigned To,
|
|
DependencyEdge::Dependency &&Dep) {
|
|
DGNode &NodeFrom = Nodes[From];
|
|
DGNode &NodeTo = Nodes[To];
|
|
SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
|
|
|
|
auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
|
|
return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
|
|
});
|
|
|
|
if (It != Vec.end()) {
|
|
It->Dep.Cost += Dep.Cost;
|
|
It->Frequency++;
|
|
return;
|
|
}
|
|
|
|
DependencyEdge DE = {Dep, From, To, 1};
|
|
Vec.emplace_back(DE);
|
|
NodeTo.NumPredecessors++;
|
|
}
|
|
|
|
BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
|
|
MCInstPrinter &Printer,
|
|
ArrayRef<MCInst> S, unsigned NumIter)
|
|
: STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
|
|
Source(S), Iterations(NumIter), TotalCycles(0),
|
|
PressureIncreasedBecauseOfResources(false),
|
|
PressureIncreasedBecauseOfRegisterDependencies(false),
|
|
PressureIncreasedBecauseOfMemoryDependencies(false),
|
|
SeenStallCycles(false), BPI() {}
|
|
|
|
void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
|
|
unsigned RegID, unsigned Cost) {
|
|
bool IsLoopCarried = From >= To;
|
|
unsigned SourceSize = Source.size();
|
|
if (IsLoopCarried) {
|
|
DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
|
|
DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
|
|
return;
|
|
}
|
|
DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
|
|
}
|
|
|
|
void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
|
|
unsigned Cost) {
|
|
bool IsLoopCarried = From >= To;
|
|
unsigned SourceSize = Source.size();
|
|
if (IsLoopCarried) {
|
|
DG.addMemoryDep(From, To + SourceSize, Cost);
|
|
DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
|
|
return;
|
|
}
|
|
DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
|
|
}
|
|
|
|
void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
|
|
uint64_t Mask, unsigned Cost) {
|
|
bool IsLoopCarried = From >= To;
|
|
unsigned SourceSize = Source.size();
|
|
if (IsLoopCarried) {
|
|
DG.addResourceDep(From, To + SourceSize, Mask, Cost);
|
|
DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
|
|
return;
|
|
}
|
|
DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
|
|
}
|
|
|
|
void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
|
|
const unsigned IID = Event.IR.getSourceIndex();
|
|
if (Event.Type == HWInstructionEvent::Dispatched) {
|
|
Tracker.onInstructionDispatched(IID);
|
|
return;
|
|
}
|
|
if (Event.Type == HWInstructionEvent::Executed) {
|
|
Tracker.onInstructionExecuted(IID);
|
|
return;
|
|
}
|
|
|
|
if (Event.Type != HWInstructionEvent::Issued)
|
|
return;
|
|
|
|
const Instruction &IS = *Event.IR.getInstruction();
|
|
unsigned To = IID % Source.size();
|
|
|
|
unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
|
|
uint64_t ResourceMask = IS.getCriticalResourceMask();
|
|
SmallVector<std::pair<unsigned, unsigned>, 4> Users;
|
|
while (ResourceMask) {
|
|
uint64_t Current = ResourceMask & (-ResourceMask);
|
|
Tracker.getResourceUsers(Current, Users);
|
|
for (const std::pair<unsigned, unsigned> &U : Users)
|
|
addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
|
|
Users.clear();
|
|
ResourceMask ^= Current;
|
|
}
|
|
|
|
const CriticalDependency &RegDep = IS.getCriticalRegDep();
|
|
if (RegDep.Cycles) {
|
|
Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
|
|
unsigned From = RegDep.IID % Source.size();
|
|
addRegisterDep(From, To, RegDep.RegID, Cycles);
|
|
}
|
|
|
|
const CriticalDependency &MemDep = IS.getCriticalMemDep();
|
|
if (MemDep.Cycles) {
|
|
Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
|
|
unsigned From = MemDep.IID % Source.size();
|
|
addMemoryDep(From, To, Cycles);
|
|
}
|
|
|
|
Tracker.handleInstructionIssuedEvent(
|
|
static_cast<const HWInstructionIssuedEvent &>(Event));
|
|
|
|
// Check if this is the last simulated instruction.
|
|
if (IID == ((Iterations * Source.size()) - 1))
|
|
DG.finalizeGraph(Iterations);
|
|
}
|
|
|
|
void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
|
|
assert(Event.Reason != HWPressureEvent::INVALID &&
|
|
"Unexpected invalid event!");
|
|
|
|
Tracker.handlePressureEvent(Event);
|
|
|
|
switch (Event.Reason) {
|
|
default:
|
|
break;
|
|
|
|
case HWPressureEvent::RESOURCES:
|
|
PressureIncreasedBecauseOfResources = true;
|
|
break;
|
|
case HWPressureEvent::REGISTER_DEPS:
|
|
PressureIncreasedBecauseOfRegisterDependencies = true;
|
|
break;
|
|
case HWPressureEvent::MEMORY_DEPS:
|
|
PressureIncreasedBecauseOfMemoryDependencies = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void BottleneckAnalysis::onCycleEnd() {
|
|
++TotalCycles;
|
|
|
|
bool PressureIncreasedBecauseOfDataDependencies =
|
|
PressureIncreasedBecauseOfRegisterDependencies ||
|
|
PressureIncreasedBecauseOfMemoryDependencies;
|
|
if (!PressureIncreasedBecauseOfResources &&
|
|
!PressureIncreasedBecauseOfDataDependencies)
|
|
return;
|
|
|
|
++BPI.PressureIncreaseCycles;
|
|
if (PressureIncreasedBecauseOfRegisterDependencies)
|
|
++BPI.RegisterDependencyCycles;
|
|
if (PressureIncreasedBecauseOfMemoryDependencies)
|
|
++BPI.MemoryDependencyCycles;
|
|
if (PressureIncreasedBecauseOfDataDependencies)
|
|
++BPI.DataDependencyCycles;
|
|
if (PressureIncreasedBecauseOfResources)
|
|
++BPI.ResourcePressureCycles;
|
|
PressureIncreasedBecauseOfResources = false;
|
|
PressureIncreasedBecauseOfRegisterDependencies = false;
|
|
PressureIncreasedBecauseOfMemoryDependencies = false;
|
|
}
|
|
|
|
void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
|
|
if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
|
|
OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
|
|
return;
|
|
}
|
|
|
|
double PressurePerCycle =
|
|
(double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
|
|
double ResourcePressurePerCycle =
|
|
(double)BPI.ResourcePressureCycles * 100 / TotalCycles;
|
|
double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
|
|
double RegDepPressurePerCycle =
|
|
(double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
|
|
double MemDepPressurePerCycle =
|
|
(double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
|
|
|
|
OS << "\n\nCycles with backend pressure increase [ "
|
|
<< format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
|
|
|
|
OS << "\nThroughput Bottlenecks: "
|
|
<< "\n Resource Pressure [ "
|
|
<< format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
|
|
<< "% ]";
|
|
|
|
if (BPI.PressureIncreaseCycles) {
|
|
ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
|
|
const MCSchedModel &SM = STI.getSchedModel();
|
|
for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
|
|
unsigned ResourceCycles = Distribution[I];
|
|
if (ResourceCycles) {
|
|
double Frequency = (double)ResourceCycles * 100 / TotalCycles;
|
|
const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
|
|
OS << "\n - " << PRDesc.Name << " [ "
|
|
<< format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
|
|
}
|
|
}
|
|
}
|
|
|
|
OS << "\n Data Dependencies: [ "
|
|
<< format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
|
|
OS << "\n - Register Dependencies [ "
|
|
<< format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
|
|
<< "% ]";
|
|
OS << "\n - Memory Dependencies [ "
|
|
<< format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
|
|
<< "% ]\n";
|
|
}
|
|
|
|
void BottleneckAnalysis::printView(raw_ostream &OS) const {
|
|
std::string Buffer;
|
|
raw_string_ostream TempStream(Buffer);
|
|
printBottleneckHints(TempStream);
|
|
TempStream.flush();
|
|
OS << Buffer;
|
|
printCriticalSequence(OS);
|
|
}
|
|
|
|
} // namespace mca.
|
|
} // namespace llvm
|