forked from OSchip/llvm-project
[llvm-mca][scheduler-stats] Print issued micro opcodes per cycle. NFCI
It makes more sense to print out the number of micro opcodes that are issued every cycle rather than the number of instructions issued per cycle. This behavior is also consistent with the dispatch-stats: numbers from the two views can now be easily compared. llvm-svn: 357919
This commit is contained in:
parent
5058ca6d9b
commit
f6a60f1f80
|
@ -498,7 +498,7 @@ sections.
|
|||
2, 314 (51.5%)
|
||||
|
||||
|
||||
Schedulers - number of cycles where we saw N instructions issued:
|
||||
Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
[# issued], [# cycles]
|
||||
0, 7 (1.1%)
|
||||
1, 306 (50.2%)
|
||||
|
@ -552,9 +552,9 @@ dispatch statistics are displayed by either using the command option
|
|||
``-all-stats`` or ``-dispatch-stats``.
|
||||
|
||||
The next table, *Schedulers*, presents a histogram displaying a count,
|
||||
representing the number of instructions issued on some number of cycles. In
|
||||
this case, of the 610 simulated cycles, single instructions were issued 306
|
||||
times (50.2%) and there were 7 cycles where no instructions were issued.
|
||||
representing the number of micro opcodes issued on some number of cycles. In
|
||||
this case, of the 610 simulated cycles, single opcodes were issued 306 times
|
||||
(50.2%) and there were 7 cycles where no opcodes were issued.
|
||||
|
||||
The *Scheduler's queue usage* table shows that the average and maximum number of
|
||||
buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
# M4-NEXT: IPC: 0.50
|
||||
# M4-NEXT: Block RThroughput: 0.2
|
||||
|
||||
# ALL: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# ALL-NEXT: [# issued], [# cycles]
|
||||
# ALL-NEXT: 0, 1 (50.0%)
|
||||
# ALL-NEXT: 1, 1 (50.0%)
|
||||
|
|
|
@ -90,7 +90,7 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 2, 172 (83.1%)
|
||||
# CHECK-NEXT: 4, 14 (6.8%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
|
@ -203,7 +203,7 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 2, 172 (83.1%)
|
||||
# CHECK-NEXT: 4, 14 (6.8%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
|
@ -316,7 +316,7 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 2, 172 (83.1%)
|
||||
# CHECK-NEXT: 4, 14 (6.8%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
|
@ -429,7 +429,7 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 2, 172 (83.1%)
|
||||
# CHECK-NEXT: 4, 14 (6.8%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
|
@ -542,7 +542,7 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 2, 172 (83.1%)
|
||||
# CHECK-NEXT: 4, 14 (6.8%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
|
@ -655,7 +655,7 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 2, 172 (83.1%)
|
||||
# CHECK-NEXT: 4, 14 (6.8%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
|
@ -767,10 +767,10 @@ vmovaps (%rbx), %ymm3
|
|||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 4, 200 (96.6%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 7 (3.4%)
|
||||
# CHECK-NEXT: 2, 200 (96.6%)
|
||||
# CHECK-NEXT: 4, 200 (96.6%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: [1] Resource name.
|
||||
|
|
|
@ -26,7 +26,7 @@ add %rsi, %rsi
|
|||
# CHECK-NEXT: 1 10 1.00 * vmulps (%rsi), %xmm0, %xmm0
|
||||
# CHECK-NEXT: 1 1 0.50 addq %rsi, %rsi
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 12 (92.3%)
|
||||
# CHECK-NEXT: 2, 1 (7.7%)
|
||||
|
|
|
@ -91,7 +91,7 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 2, 1 (0.2%)
|
||||
# CHECK-NEXT: 4, 7 (1.7%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 1, 400 (99.3%)
|
||||
|
@ -205,7 +205,7 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 2, 1 (0.2%)
|
||||
# CHECK-NEXT: 4, 7 (1.7%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 1, 400 (99.3%)
|
||||
|
@ -319,7 +319,7 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 2, 1 (0.2%)
|
||||
# CHECK-NEXT: 4, 7 (1.7%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 1, 400 (99.3%)
|
||||
|
@ -433,7 +433,7 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 2, 1 (0.2%)
|
||||
# CHECK-NEXT: 4, 7 (1.7%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 1, 400 (99.3%)
|
||||
|
@ -547,7 +547,7 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 2, 1 (0.1%)
|
||||
# CHECK-NEXT: 4, 6 (0.7%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 403 (50.2%)
|
||||
# CHECK-NEXT: 1, 400 (49.8%)
|
||||
|
@ -662,7 +662,7 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 2, 1 (0.2%)
|
||||
# CHECK-NEXT: 4, 7 (1.7%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 1, 400 (99.3%)
|
||||
|
@ -774,10 +774,10 @@ vmovaps %ymm3, (%rbx)
|
|||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 4, 400 (99.3%)
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 3 (0.7%)
|
||||
# CHECK-NEXT: 1, 400 (99.3%)
|
||||
# CHECK-NEXT: 4, 400 (99.3%)
|
||||
|
||||
# CHECK: Scheduler's queue usage:
|
||||
# CHECK-NEXT: [1] Resource name.
|
||||
|
|
|
@ -26,7 +26,7 @@ add %rsi, %rsi
|
|||
# CHECK-NEXT: 1 7 1.00 * vmulps (%rsi), %xmm0, %xmm0
|
||||
# CHECK-NEXT: 1 1 0.50 addq %rsi, %rsi
|
||||
|
||||
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# CHECK-NEXT: [# issued], [# cycles]
|
||||
# CHECK-NEXT: 0, 9 (90.0%)
|
||||
# CHECK-NEXT: 2, 1 (10.0%)
|
||||
|
|
|
@ -41,7 +41,7 @@ add %eax, %eax
|
|||
# FULLREPORT-NEXT: 1, 62 (60.2%)
|
||||
# FULLREPORT-NEXT: 2, 19 (18.4%)
|
||||
|
||||
# FULLREPORT: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# FULLREPORT: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# FULLREPORT-NEXT: [# issued], [# cycles]
|
||||
# FULLREPORT-NEXT: 0, 3 (2.9%)
|
||||
# FULLREPORT-NEXT: 1, 100 (97.1%)
|
||||
|
|
|
@ -42,7 +42,7 @@ add %eax, %eax
|
|||
# FULL-NEXT: 1, 62 (60.2%)
|
||||
# FULL-NEXT: 2, 19 (18.4%)
|
||||
|
||||
# ALL: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# ALL-NEXT: [# issued], [# cycles]
|
||||
# ALL-NEXT: 0, 3 (2.9%)
|
||||
# ALL-NEXT: 1, 100 (97.1%)
|
||||
|
|
|
@ -43,7 +43,7 @@ add %eax, %eax
|
|||
# FULLREPORT-NEXT: 1, 62 (60.2%)
|
||||
# FULLREPORT-NEXT: 2, 19 (18.4%)
|
||||
|
||||
# FULLREPORT: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# FULLREPORT: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# FULLREPORT-NEXT: [# issued], [# cycles]
|
||||
# FULLREPORT-NEXT: 0, 3 (2.9%)
|
||||
# FULLREPORT-NEXT: 1, 100 (97.1%)
|
||||
|
|
|
@ -42,7 +42,7 @@ add %eax, %eax
|
|||
# ALL-NEXT: 1, 62 (60.2%)
|
||||
# ALL-NEXT: 2, 19 (18.4%)
|
||||
|
||||
# ALL: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# ALL-NEXT: [# issued], [# cycles]
|
||||
# ALL-NEXT: 0, 3 (2.9%)
|
||||
# ALL-NEXT: 1, 100 (97.1%)
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
xor %eax, %ebx
|
||||
|
||||
# ALL: Schedulers - number of cycles where we saw N instructions issued:
|
||||
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# ALL-NEXT: [# issued], [# cycles]
|
||||
# ALL-NEXT: 0, 3 (75.0%)
|
||||
# ALL-NEXT: 1, 1 (25.0%)
|
||||
|
|
|
@ -22,7 +22,6 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
|
|||
: SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
|
||||
NumCycles(0), MostRecentLoadDispatched(~0U),
|
||||
MostRecentStoreDispatched(~0U),
|
||||
IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
|
||||
Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
|
||||
if (SM.hasExtraProcessorInfo()) {
|
||||
const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
|
||||
|
@ -43,9 +42,10 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
|
|||
// In future we should add a new "memory queue" event type, so that we stop
|
||||
// making assumptions on how LSUnit internally works (See PR39828).
|
||||
void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
|
||||
if (Event.Type == HWInstructionEvent::Issued)
|
||||
++NumIssued;
|
||||
else if (Event.Type == HWInstructionEvent::Dispatched) {
|
||||
if (Event.Type == HWInstructionEvent::Issued) {
|
||||
const Instruction &Inst = *Event.IR.getInstruction();
|
||||
NumIssued += Inst.getDesc().NumMicroOps;
|
||||
} else if (Event.Type == HWInstructionEvent::Dispatched) {
|
||||
const Instruction &Inst = *Event.IR.getInstruction();
|
||||
const unsigned Index = Event.IR.getSourceIndex();
|
||||
if (LQResourceID && Inst.getDesc().MayLoad &&
|
||||
|
@ -95,29 +95,25 @@ void SchedulerStatistics::updateHistograms() {
|
|||
BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
|
||||
}
|
||||
|
||||
IssuedPerCycle[NumIssued]++;
|
||||
IssueWidthPerCycle[NumIssued]++;
|
||||
NumIssued = 0;
|
||||
}
|
||||
|
||||
void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
|
||||
OS << "\n\nSchedulers - "
|
||||
<< "number of cycles where we saw N instructions issued:\n";
|
||||
<< "number of cycles where we saw N micro opcodes issued:\n";
|
||||
OS << "[# issued], [# cycles]\n";
|
||||
|
||||
const auto It =
|
||||
std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
|
||||
unsigned Index = std::distance(IssuedPerCycle.begin(), It);
|
||||
|
||||
bool HasColors = OS.has_colors();
|
||||
for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
|
||||
unsigned IPC = IssuedPerCycle[I];
|
||||
if (!IPC)
|
||||
continue;
|
||||
|
||||
if (I == Index && HasColors)
|
||||
const auto It =
|
||||
std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end());
|
||||
for (const std::pair<unsigned, unsigned> &Entry : IssueWidthPerCycle) {
|
||||
unsigned NumIssued = Entry.first;
|
||||
if (NumIssued == It->first && HasColors)
|
||||
OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
|
||||
|
||||
OS << " " << I << ", " << IPC << " ("
|
||||
unsigned IPC = Entry.second;
|
||||
OS << " " << NumIssued << ", " << IPC << " ("
|
||||
<< format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
|
||||
if (HasColors)
|
||||
OS.resetColor();
|
||||
|
|
|
@ -62,7 +62,9 @@ class SchedulerStatistics final : public View {
|
|||
uint64_t CumulativeNumUsedSlots;
|
||||
};
|
||||
|
||||
std::vector<unsigned> IssuedPerCycle;
|
||||
using Histogram = std::map<unsigned, unsigned>;
|
||||
Histogram IssueWidthPerCycle;
|
||||
|
||||
std::vector<BufferUsage> Usage;
|
||||
|
||||
void updateHistograms();
|
||||
|
|
Loading…
Reference in New Issue