llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

294 lines
10 KiB
C++
Raw Normal View History

//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \brief
///
/// This file implements the TimelineView interface.
///
//===----------------------------------------------------------------------===//
#include "Views/TimelineView.h"
namespace llvm {
namespace mca {
TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
unsigned Cycles)
: STI(sti), MCIP(Printer), Source(S), CurrentCycle(0),
MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()),
UsedBuffer(S.size()) {
unsigned NumInstructions = Source.size();
assert(Iterations && "Invalid number of iterations specified!");
NumInstructions *= Iterations;
Timeline.resize(NumInstructions);
TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
WaitTimeEntry NullWTEntry = {0, 0, 0};
std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
/* unknown buffer size */ -1};
std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
}
void TimelineView::onReservedBuffers(const InstRef &IR,
ArrayRef<unsigned> Buffers) {
if (IR.getSourceIndex() >= Source.size())
return;
const MCSchedModel &SM = STI.getSchedModel();
std::pair<unsigned, int> BufferInfo = {0, -1};
for (const unsigned Buffer : Buffers) {
const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
BufferInfo.first = Buffer;
BufferInfo.second = MCDesc.BufferSize;
}
}
UsedBuffer[IR.getSourceIndex()] = BufferInfo;
}
void TimelineView::onEvent(const HWInstructionEvent &Event) {
const unsigned Index = Event.IR.getSourceIndex();
if (Index >= Timeline.size())
return;
switch (Event.Type) {
case HWInstructionEvent::Retired: {
TimelineViewEntry &TVEntry = Timeline[Index];
if (CurrentCycle < MaxCycle)
TVEntry.CycleRetired = CurrentCycle;
// Update the WaitTime entry which corresponds to this Index.
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
WaitTimeEntry &WTEntry = WaitTime[Index % Source.size()];
WTEntry.CyclesSpentInSchedulerQueue +=
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
TVEntry.CycleIssued - CycleDispatched;
assert(CycleDispatched <= TVEntry.CycleReady &&
"Instruction cannot be ready if it hasn't been dispatched yet!");
WTEntry.CyclesSpentInSQWhileReady +=
TVEntry.CycleIssued - TVEntry.CycleReady;
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
(CurrentCycle - 1) - TVEntry.CycleExecuted;
break;
}
case HWInstructionEvent::Ready:
Timeline[Index].CycleReady = CurrentCycle;
break;
case HWInstructionEvent::Issued:
Timeline[Index].CycleIssued = CurrentCycle;
break;
case HWInstructionEvent::Executed:
Timeline[Index].CycleExecuted = CurrentCycle;
break;
case HWInstructionEvent::Dispatched:
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
// There may be multiple dispatch events. Microcoded instructions that are
// expanded into multiple uOps may require multiple dispatch cycles. Here,
// we want to capture the first dispatch cycle.
if (Timeline[Index].CycleDispatched == -1)
Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
break;
default:
return;
}
if (CurrentCycle < MaxCycle)
LastCycle = std::max(LastCycle, CurrentCycle);
}
static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
unsigned Executions, int BufferSize) {
if (CumulativeCycles && BufferSize < 0)
return raw_ostream::MAGENTA;
unsigned Size = static_cast<unsigned>(BufferSize);
if (CumulativeCycles >= Size * Executions)
return raw_ostream::RED;
if ((CumulativeCycles * 2) >= Size * Executions)
return raw_ostream::YELLOW;
return raw_ostream::SAVEDCOLOR;
}
static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
unsigned Executions, int BufferSize) {
if (!OS.has_colors())
return;
raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
if (Color == raw_ostream::SAVEDCOLOR) {
OS.resetColor();
return;
}
OS.changeColor(Color, /* bold */ true, /* BG */ false);
}
void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
const WaitTimeEntry &Entry,
unsigned SourceIndex,
unsigned Executions) const {
OS << SourceIndex << '.';
OS.PadToColumn(7);
double AverageTime1, AverageTime2, AverageTime3;
AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
OS << Executions;
OS.PadToColumn(13);
int BufferSize = UsedBuffer[SourceIndex].second;
tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
OS.PadToColumn(20);
tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
OS.PadToColumn(27);
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
STI.getSchedModel().MicroOpBufferSize);
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
if (OS.has_colors())
OS.resetColor();
OS.PadToColumn(34);
}
void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
std::string Header =
"\n\nAverage Wait times (based on the timeline view):\n"
"[0]: Executions\n"
"[1]: Average time spent waiting in a scheduler's queue\n"
"[2]: Average time spent waiting in a scheduler's queue while ready\n"
"[3]: Average time elapsed from WB until retire stage\n\n"
" [0] [1] [2] [3]\n";
OS << Header;
// Use a different string stream for printing instructions.
std::string Instruction;
raw_string_ostream InstrStream(Instruction);
formatted_raw_ostream FOS(OS);
unsigned Executions = Timeline.size() / Source.size();
unsigned IID = 0;
for (const MCInst &Inst : Source) {
printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
// Append the instruction info at the end of the line.
MCIP.printInst(&Inst, InstrStream, "", STI);
InstrStream.flush();
// Consume any tabs or spaces at the beginning of the string.
StringRef Str(Instruction);
Str = Str.ltrim();
FOS << " " << Str << '\n';
FOS.flush();
Instruction = "";
++IID;
}
}
void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
const TimelineViewEntry &Entry,
unsigned Iteration,
unsigned SourceIndex) const {
if (Iteration == 0 && SourceIndex == 0)
OS << '\n';
OS << '[' << Iteration << ',' << SourceIndex << ']';
OS.PadToColumn(10);
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
OS << ((I % 5 == 0) ? '.' : ' ');
OS << TimelineView::DisplayChar::Dispatched;
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
if (CycleDispatched != Entry.CycleExecuted) {
// Zero latency instructions have the same value for CycleDispatched,
// CycleIssued and CycleExecuted.
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
OS << TimelineView::DisplayChar::Waiting;
if (Entry.CycleIssued == Entry.CycleExecuted)
OS << TimelineView::DisplayChar::DisplayChar::Executed;
else {
[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view. This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
2018-08-30 18:50:20 +08:00
if (CycleDispatched != Entry.CycleIssued)
OS << TimelineView::DisplayChar::Executing;
for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
++I)
OS << TimelineView::DisplayChar::Executing;
OS << TimelineView::DisplayChar::Executed;
}
}
for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
OS << TimelineView::DisplayChar::RetireLag;
OS << TimelineView::DisplayChar::Retired;
// Skip other columns.
for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
}
static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
OS << "\n\nTimeline view:\n";
if (Cycles >= 10) {
OS.PadToColumn(10);
for (unsigned I = 0; I <= Cycles; ++I) {
if (((I / 10) & 1) == 0)
OS << ' ';
else
OS << I % 10;
}
OS << '\n';
}
OS << "Index";
OS.PadToColumn(10);
for (unsigned I = 0; I <= Cycles; ++I) {
if (((I / 10) & 1) == 0)
OS << I % 10;
else
OS << ' ';
}
OS << '\n';
}
void TimelineView::printTimeline(raw_ostream &OS) const {
formatted_raw_ostream FOS(OS);
printTimelineHeader(FOS, LastCycle);
FOS.flush();
// Use a different string stream for the instruction.
std::string Instruction;
raw_string_ostream InstrStream(Instruction);
unsigned IID = 0;
const unsigned Iterations = Timeline.size() / Source.size();
for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
for (const MCInst &Inst : Source) {
const TimelineViewEntry &Entry = Timeline[IID];
if (Entry.CycleRetired == 0)
return;
unsigned SourceIndex = IID % Source.size();
printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
// Append the instruction info at the end of the line.
MCIP.printInst(&Inst, InstrStream, "", STI);
InstrStream.flush();
// Consume any tabs or spaces at the beginning of the string.
StringRef Str(Instruction);
Str = Str.ltrim();
FOS << " " << Str << '\n';
FOS.flush();
Instruction = "";
++IID;
}
}
}
} // namespace mca
} // namespace llvm