[MCA] Further refactor the bottleneck analysis view. NFCI.

llvm-svn: 362933
This commit is contained in:
Andrea Di Biagio 2019-06-10 12:50:08 +00:00
parent b67333f283
commit 47db08dbb1
4 changed files with 175 additions and 113 deletions

View File

@ -198,7 +198,8 @@ InstRef Scheduler::select() {
Strategy->compare(IR, ReadySet[QueueIndex])) {
Instruction &IS = *IR.getInstruction();
uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc());
IS.setCriticalResourceMask(BusyResourceMask);
if (BusyResourceMask)
IS.setCriticalResourceMask(BusyResourceMask);
BusyResourceUnits |= BusyResourceMask;
if (!BusyResourceMask)
QueueIndex = I;

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "Views/BottleneckAnalysis.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"
@ -40,43 +41,38 @@ PressureTracker::PressureTracker(const MCSchedModel &Model)
}
ResourceUsers.resize(NextResourceUsersIdx);
std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U);
std::fill(ResourceUsers.begin(), ResourceUsers.end(),
std::make_pair<unsigned, unsigned>(~0U, 0U));
}
void PressureTracker::getUniqueUsers(
uint64_t ResourceMask, SmallVectorImpl<unsigned> &UniqueUsers) const {
void PressureTracker::getResourceUsers(uint64_t ResourceMask,
SmallVectorImpl<User> &Users) const {
unsigned Index = getResourceStateIndex(ResourceMask);
unsigned ProcResID = ResIdx2ProcResID[Index];
const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
unsigned From = getResourceUser(ProcResID, I);
if (find(UniqueUsers, From) == UniqueUsers.end())
UniqueUsers.emplace_back(From);
const User U = getResourceUser(ProcResID, I);
if (U.second && IPI.find(U.first) != IPI.end())
Users.emplace_back(U);
}
}
void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) {
void PressureTracker::onInstructionDispatched(unsigned IID) {
IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
}
void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
void PressureTracker::handleInstructionIssuedEvent(
const HWInstructionIssuedEvent &Event) {
unsigned IID = Event.IR.getSourceIndex();
switch (Event.Type) {
default:
break;
case HWInstructionEvent::Dispatched:
IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
break;
case HWInstructionEvent::Executed:
IPI.erase(IID);
break;
case HWInstructionEvent::Issued: {
const auto &IIE = static_cast<const HWInstructionIssuedEvent &>(Event);
using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
for (const ResourceUse &Use : IIE.UsedResources) {
const ResourceRef &RR = Use.first;
unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
Index += countTrailingZeros(RR.second);
ResourceUsers[Index] = IID;
}
}
using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
for (const ResourceUse &Use : Event.UsedResources) {
const ResourceRef &RR = Use.first;
unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
Index += countTrailingZeros(RR.second);
ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
}
}
@ -125,7 +121,8 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
if (!BusyResources)
continue;
IPI[IR.getSourceIndex()].ResourcePressureCycles++;
unsigned IID = IR.getSourceIndex();
IPI[IID].ResourcePressureCycles++;
}
break;
}
@ -146,51 +143,59 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
}
#ifndef NDEBUG
void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const {
void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, unsigned FromIID,
const DependencyEdge &DE,
MCInstPrinter &MCIP) const {
bool LoopCarried = FromIID >= DE.IID;
OS << " FROM: " << FromIID << " TO: " << DE.IID
<< (LoopCarried ? " (loop carried)" : " ");
if (DE.Type == DT_REGISTER) {
OS << " - REGISTER: ";
MCIP.printRegName(OS, DE.ResourceOrRegID);
} else if (DE.Type == DT_MEMORY) {
OS << " - MEMORY";
} else {
assert(DE.Type == DT_RESOURCE && "Unexpected unsupported dependency type!");
OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
}
OS << " - CYCLES: " << DE.Cycles << '\n';
}
void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
OS << "\nREG DEPS\n";
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
const DGNode &Node = Nodes[I];
for (const DependencyEdge &DE : Node.RegDeps) {
bool LoopCarried = I >= DE.IID;
OS << " FROM: " << I << " TO: " << DE.IID
<< (LoopCarried ? " (loop carried)" : " ")
<< " - REGISTER: ";
MCIP.printRegName(OS, DE.ResourceOrRegID);
OS << " - CYCLES: " << DE.Cycles << '\n';
for (const DependencyEdge &DE : Node.OutgoingEdges) {
if (DE.Type == DT_REGISTER)
dumpDependencyEdge(OS, I, DE, MCIP);
}
}
}
void DependencyGraph::dumpMemDeps(raw_ostream &OS) const {
OS << "\nMEM DEPS\n";
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
const DGNode &Node = Nodes[I];
for (const DependencyEdge &DE : Node.MemDeps) {
bool LoopCarried = I >= DE.IID;
OS << " FROM: " << I << " TO: " << DE.IID
<< (LoopCarried ? " (loop carried)" : " ")
<< " - MEMORY - CYCLES: " << DE.Cycles << '\n';
for (const DependencyEdge &DE : Node.OutgoingEdges) {
if (DE.Type == DT_MEMORY)
dumpDependencyEdge(OS, I, DE, MCIP);
}
}
}
void DependencyGraph::dumpResDeps(raw_ostream &OS) const {
OS << "\nRESOURCE DEPS\n";
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
const DGNode &Node = Nodes[I];
for (const DependencyEdge &DE : Node.ResDeps) {
bool LoopCarried = I >= DE.IID;
OS << " FROM: " << I << " TO: " << DE.IID
<< (LoopCarried ? "(loop carried)" : " ")
<< " - RESOURCE MASK: " << DE.ResourceOrRegID;
OS << " - CYCLES: " << DE.Cycles << '\n';
for (const DependencyEdge &DE : Node.OutgoingEdges) {
if (DE.Type == DT_RESOURCE)
dumpDependencyEdge(OS, I, DE, MCIP);
}
}
}
#endif // NDEBUG
void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,
DependencyEdge &&Dep) {
void DependencyGraph::addDependency(unsigned From, DependencyEdge &&Dep) {
DGNode &NodeFrom = Nodes[From];
DGNode &NodeTo = Nodes[Dep.IID];
SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
auto It = find_if(Vec, [Dep](DependencyEdge &DE) {
return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID;
});
@ -201,38 +206,102 @@ void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,
}
Vec.emplace_back(Dep);
Nodes[Dep.IID].NumPredecessors++;
NodeTo.NumPredecessors++;
}
BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
ArrayRef<MCInst> Sequence)
: STI(sti), Tracker(STI.getSchedModel()), DG(Sequence.size()),
Source(Sequence), TotalCycles(0),
PressureIncreasedBecauseOfResources(false),
MCInstPrinter &Printer,
ArrayRef<MCInst> S)
: STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
Source(S), TotalCycles(0), PressureIncreasedBecauseOfResources(false),
PressureIncreasedBecauseOfRegisterDependencies(false),
PressureIncreasedBecauseOfMemoryDependencies(false),
SeenStallCycles(false), BPI() {}
void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
unsigned RegID, unsigned Cy) {
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
DG.addRegisterDep(From, To + SourceSize, RegID, Cy);
DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cy);
return;
}
DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cy);
}
void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, unsigned Cy) {
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
DG.addMemoryDep(From, To + SourceSize, Cy);
DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cy);
return;
}
DG.addMemoryDep(From + SourceSize, To + SourceSize, Cy);
}
void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
uint64_t Mask, unsigned Cy) {
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
DG.addResourceDep(From, To + SourceSize, Mask, Cy);
DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cy);
return;
}
DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cy);
}
void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
Tracker.handleInstructionEvent(Event);
const unsigned IID = Event.IR.getSourceIndex();
if (Event.Type == HWInstructionEvent::Dispatched) {
Tracker.onInstructionDispatched(IID);
return;
}
if (Event.Type == HWInstructionEvent::Executed) {
Tracker.onInstructionExecuted(IID);
return;
}
if (Event.Type != HWInstructionEvent::Issued)
return;
const unsigned IID = Event.IR.getSourceIndex();
const Instruction &IS = *Event.IR.getInstruction();
unsigned Cycles = Tracker.getRegisterPressureCycles(IID);
unsigned To = IID % Source.size();
unsigned Cycles = Tracker.getResourcePressureCycles(IID);
if (Cycles) {
uint64_t ResourceMask = IS.getCriticalResourceMask();
SmallVector<std::pair<unsigned, unsigned>, 4> Users;
while (ResourceMask) {
uint64_t Current = ResourceMask & (-ResourceMask);
Tracker.getResourceUsers(Current, Users);
for (const std::pair<unsigned, unsigned> &U : Users) {
unsigned Cost = std::min(U.second, Cycles);
addResourceDep(U.first % Source.size(), To, Current, Cost);
}
Users.clear();
ResourceMask ^= Current;
}
}
Cycles = Tracker.getRegisterPressureCycles(IID);
if (Cycles) {
const CriticalDependency &RegDep = IS.getCriticalRegDep();
unsigned From = RegDep.IID % Source.size();
DG.addRegDep(From, To, RegDep.RegID, Cycles);
addRegisterDep(From, To, RegDep.RegID, Cycles);
}
Cycles = Tracker.getMemoryPressureCycles(IID);
if (Cycles) {
const CriticalDependency &MemDep = IS.getCriticalMemDep();
unsigned From = MemDep.IID % Source.size();
DG.addMemDep(From, To, Cycles);
addMemoryDep(From, To, Cycles);
}
Tracker.handleInstructionIssuedEvent(
static_cast<const HWInstructionIssuedEvent &>(Event));
}
void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
@ -245,28 +314,9 @@ void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
default:
break;
case HWPressureEvent::RESOURCES: {
case HWPressureEvent::RESOURCES:
PressureIncreasedBecauseOfResources = true;
SmallVector<unsigned, 4> UniqueUsers;
for (const InstRef &IR : Event.AffectedInstructions) {
const Instruction &IS = *IR.getInstruction();
unsigned To = IR.getSourceIndex() % Source.size();
unsigned BusyResources =
IS.getCriticalResourceMask() & Event.ResourceMask;
while (BusyResources) {
uint64_t Current = BusyResources & (-BusyResources);
Tracker.getUniqueUsers(Current, UniqueUsers);
for (unsigned User : UniqueUsers)
DG.addResourceDep(User % Source.size(), To, Current, 1);
BusyResources ^= Current;
}
UniqueUsers.clear();
}
break;
}
case HWPressureEvent::REGISTER_DEPS:
PressureIncreasedBecauseOfRegisterDependencies = true;
break;

View File

@ -63,7 +63,8 @@ class PressureTracker {
// There is one entry for every processor resource unit declared by the
// processor model. An all_ones value is treated like an invalid instruction
// identifier.
SmallVector<unsigned, 4> ResourceUsers;
using User = std::pair<unsigned, unsigned>;
SmallVector<User, 4> ResourceUsers;
struct InstructionPressureInfo {
unsigned RegisterPressureCycles;
@ -74,7 +75,7 @@ class PressureTracker {
void updateResourcePressureDistribution(uint64_t CumulativeMask);
unsigned getResourceUser(unsigned ProcResID, unsigned UnitID) const {
User getResourceUser(unsigned ProcResID, unsigned UnitID) const {
unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
return ResourceUsers[Index + UnitID];
}
@ -86,8 +87,8 @@ public:
return ResourcePressureDistribution;
}
void getUniqueUsers(uint64_t ResourceMask,
SmallVectorImpl<unsigned> &Users) const;
void getResourceUsers(uint64_t ResourceMask,
SmallVectorImpl<User> &Users) const;
unsigned getRegisterPressureCycles(unsigned IID) const {
assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
@ -107,12 +108,18 @@ public:
return Info.ResourcePressureCycles;
}
void onInstructionDispatched(unsigned IID);
void onInstructionExecuted(unsigned IID);
void handlePressureEvent(const HWPressureEvent &Event);
void handleInstructionEvent(const HWInstructionEvent &Event);
void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
};
class DependencyGraph {
enum DependencyType { DT_REGISTER, DT_MEMORY, DT_RESOURCE };
struct DependencyEdge {
DependencyType Type;
unsigned IID;
uint64_t ResourceOrRegID;
uint64_t Cycles;
@ -120,46 +127,44 @@ class DependencyGraph {
struct DGNode {
unsigned NumPredecessors;
SmallVector<DependencyEdge, 8> RegDeps;
SmallVector<DependencyEdge, 8> MemDeps;
SmallVector<DependencyEdge, 8> ResDeps;
SmallVector<DependencyEdge, 8> OutgoingEdges;
};
SmallVector<DGNode, 16> Nodes;
void addDepImpl(SmallVectorImpl<DependencyEdge> &Vec, DependencyEdge &&DE);
DependencyGraph(const DependencyGraph &) = delete;
DependencyGraph &operator=(const DependencyGraph &) = delete;
public:
DependencyGraph(unsigned NumNodes) : Nodes(NumNodes, DGNode()) {}
void addDependency(unsigned From, DependencyEdge &&DE);
void addRegDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) {
addDepImpl(Nodes[From].RegDeps, {To, RegID, Cy});
#ifndef NDEBUG
void dumpDependencyEdge(raw_ostream &OS, unsigned FromIID,
const DependencyEdge &DE, MCInstPrinter &MCIP) const;
#endif
public:
DependencyGraph(unsigned Size) : Nodes(Size) {}
void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) {
addDependency(From, {DT_REGISTER, To, RegID, Cy});
}
void addMemDep(unsigned From, unsigned To, unsigned Cy) {
addDepImpl(Nodes[From].MemDeps, {To, /* unused */ 0, Cy});
void addMemoryDep(unsigned From, unsigned To, unsigned Cy) {
addDependency(From, {DT_MEMORY, To, /* unused */ 0, Cy});
}
void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) {
addDepImpl(Nodes[From].ResDeps, {To, Mask, Cy});
addDependency(From, {DT_RESOURCE, To, Mask, Cy});
}
#ifndef NDEBUG
void dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const;
void dumpMemDeps(raw_ostream &OS) const;
void dumpResDeps(raw_ostream &OS) const;
void dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
dumpRegDeps(OS, MCIP);
dumpMemDeps(OS);
dumpResDeps(OS);
}
void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
#endif
};
/// A view that collects and prints a few performance numbers.
class BottleneckAnalysis : public View {
const MCSubtargetInfo &STI;
MCInstPrinter &MCIP;
PressureTracker Tracker;
DependencyGraph DG;
@ -189,8 +194,14 @@ class BottleneckAnalysis : public View {
// Prints a bottleneck message to OS.
void printBottleneckHints(raw_ostream &OS) const;
// Used to populate the dependency graph DG.
void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy);
void addMemoryDep(unsigned From, unsigned To, unsigned Cy);
void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy);
public:
BottleneckAnalysis(const MCSubtargetInfo &STI, ArrayRef<MCInst> Sequence);
BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
ArrayRef<MCInst> Sequence);
void onCycleEnd() override;
void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
@ -200,7 +211,7 @@ public:
void printView(raw_ostream &OS) const override;
#ifndef NDEBUG
void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
void dump(raw_ostream &OS) const { DG.dump(OS, MCIP); }
#endif
};

View File

@ -487,7 +487,7 @@ int main(int argc, char **argv) {
llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
if (EnableBottleneckAnalysis)
Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(*STI, Insts));
Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(*STI, *IP, Insts));
if (PrintInstructionInfoView)
Printer.addView(