forked from OSchip/llvm-project
[llvm-mca] Correctly set the ReadAdvance information for register use operands.
The tool was passing the wrong operand index to method MCSubtargetInfo::getReadAdvanceCycles(). That method requires a "UseIdx", and not the operand index. This was found when testing X86 code where instructions had a memory folded operand. This patch fixes the issue and adds test read-advance-1.s to ensure that the ReadAfterLd (a ReadAdvance of 3cy) information is correctly used. llvm-svn: 328790
This commit is contained in:
parent
fe1d346f99
commit
0a837ef6b1
|
@ -0,0 +1,46 @@
|
||||||
|
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
|
||||||
|
|
||||||
|
# The vmul can start executing 3cy in advance. That is beause the first use
|
||||||
|
# operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is
|
||||||
|
# evaluated before %xmm1.
|
||||||
|
|
||||||
|
|
||||||
|
vaddps %xmm0, %xmm0, %xmm1
|
||||||
|
vmulps (%rdi), %xmm1, %xmm2
|
||||||
|
|
||||||
|
|
||||||
|
# CHECK: Iterations: 1
|
||||||
|
# CHECK-NEXT: Instructions: 2
|
||||||
|
# CHECK-NEXT: Total Cycles: 10
|
||||||
|
# CHECK-NEXT: Dispatch Width: 2
|
||||||
|
|
||||||
|
|
||||||
|
# CHECK: Instruction Info:
|
||||||
|
# CHECK-NEXT: [1]: #uOps
|
||||||
|
# CHECK-NEXT: [2]: Latency
|
||||||
|
# CHECK-NEXT: [3]: RThroughput
|
||||||
|
# CHECK-NEXT: [4]: MayLoad
|
||||||
|
# CHECK-NEXT: [5]: MayStore
|
||||||
|
# CHECK-NEXT: [6]: HasSideEffects
|
||||||
|
|
||||||
|
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||||
|
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
|
||||||
|
# CHECK-NEXT: 1 7 1.00 * vmulps (%rdi), %xmm1, %xmm2
|
||||||
|
|
||||||
|
|
||||||
|
# CHECK: Timeline view:
|
||||||
|
|
||||||
|
# CHECK: Index 0123456789
|
||||||
|
# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
|
||||||
|
# CHECK-NEXT: [0,1] DeeeeeeeER vmulps (%rdi), %xmm1, %xmm2
|
||||||
|
|
||||||
|
|
||||||
|
# CHECK: Average Wait times (based on the timeline view):
|
||||||
|
# CHECK-NEXT: [0]: Executions
|
||||||
|
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||||
|
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||||
|
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||||
|
|
||||||
|
# CHECK: [0] [1] [2] [3]
|
||||||
|
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
|
||||||
|
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2
|
|
@ -343,7 +343,7 @@ void DispatchUnit::updateRAWDependencies(ReadState &RS,
|
||||||
const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
|
const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
|
||||||
for (WriteState *WS : DependentWrites) {
|
for (WriteState *WS : DependentWrites) {
|
||||||
unsigned WriteResID = WS->getWriteResourceID();
|
unsigned WriteResID = WS->getWriteResourceID();
|
||||||
int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.OpIndex, WriteResID);
|
int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
|
||||||
WS->addUser(&RS, ReadAdvance);
|
WS->addUser(&RS, ReadAdvance);
|
||||||
}
|
}
|
||||||
// Prepare the set for another round.
|
// Prepare the set for another round.
|
||||||
|
|
|
@ -340,6 +340,7 @@ static void populateReads(InstrDesc &ID, const MCInst &MCI,
|
||||||
for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
|
for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
|
||||||
ReadDescriptor &Read = ID.Reads[CurrentUse];
|
ReadDescriptor &Read = ID.Reads[CurrentUse];
|
||||||
Read.OpIndex = i + CurrentUse;
|
Read.OpIndex = i + CurrentUse;
|
||||||
|
Read.UseIndex = CurrentUse;
|
||||||
Read.HasReadAdvanceEntries = HasReadAdvanceEntries;
|
Read.HasReadAdvanceEntries = HasReadAdvanceEntries;
|
||||||
Read.SchedClassID = SchedClassID;
|
Read.SchedClassID = SchedClassID;
|
||||||
DEBUG(dbgs() << "\t\tOpIdx=" << Read.OpIndex);
|
DEBUG(dbgs() << "\t\tOpIdx=" << Read.OpIndex);
|
||||||
|
@ -348,6 +349,7 @@ static void populateReads(InstrDesc &ID, const MCInst &MCI,
|
||||||
for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
|
for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
|
||||||
ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
|
ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
|
||||||
Read.OpIndex = -1;
|
Read.OpIndex = -1;
|
||||||
|
Read.UseIndex = -1;
|
||||||
Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
|
Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
|
||||||
Read.HasReadAdvanceEntries = false;
|
Read.HasReadAdvanceEntries = false;
|
||||||
Read.SchedClassID = SchedClassID;
|
Read.SchedClassID = SchedClassID;
|
||||||
|
|
|
@ -98,9 +98,7 @@ void Instruction::dispatch(unsigned RCUToken) {
|
||||||
RCUTokenID = RCUToken;
|
RCUTokenID = RCUToken;
|
||||||
|
|
||||||
// Check if input operands are already available.
|
// Check if input operands are already available.
|
||||||
if (std::all_of(Uses.begin(), Uses.end(),
|
update();
|
||||||
[](const UniqueUse &Use) { return Use->isReady(); }))
|
|
||||||
Stage = IS_READY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::execute() {
|
void Instruction::execute() {
|
||||||
|
@ -122,19 +120,22 @@ bool Instruction::isZeroLatency() const {
|
||||||
return Desc.MaxLatency == 0 && Defs.size() == 0 && Uses.size() == 0;
|
return Desc.MaxLatency == 0 && Defs.size() == 0 && Uses.size() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Instruction::update() {
|
||||||
|
if (!isDispatched())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
|
||||||
|
Stage = IS_READY;
|
||||||
|
}
|
||||||
|
|
||||||
void Instruction::cycleEvent() {
|
void Instruction::cycleEvent() {
|
||||||
if (isReady())
|
if (isReady())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (isDispatched()) {
|
if (isDispatched()) {
|
||||||
bool IsReady = true;
|
for (UniqueUse &Use : Uses)
|
||||||
for (UniqueUse &Use : Uses) {
|
|
||||||
Use->cycleEvent();
|
Use->cycleEvent();
|
||||||
IsReady &= Use->isReady();
|
update();
|
||||||
}
|
|
||||||
|
|
||||||
if (IsReady)
|
|
||||||
Stage = IS_READY;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,8 +60,12 @@ struct WriteDescriptor {
|
||||||
|
|
||||||
/// \brief A register read descriptor.
|
/// \brief A register read descriptor.
|
||||||
struct ReadDescriptor {
|
struct ReadDescriptor {
|
||||||
// This field defaults to -1 if this is an implicit read.
|
// A MCOperand index. This is used by the Dispatch logic to identify register
|
||||||
|
// reads. This field defaults to -1 if this is an implicit read.
|
||||||
int OpIndex;
|
int OpIndex;
|
||||||
|
// The actual "UseIdx". This field defaults to -1 if this is an implicit read.
|
||||||
|
// This is used by the scheduler to solve ReadAdvance queries.
|
||||||
|
int UseIndex;
|
||||||
// This field is only set if this is an implicit read.
|
// This field is only set if this is an implicit read.
|
||||||
unsigned RegisterID;
|
unsigned RegisterID;
|
||||||
// Scheduling Class Index. It is used to query the scheduling model for the
|
// Scheduling Class Index. It is used to query the scheduling model for the
|
||||||
|
@ -296,6 +300,14 @@ public:
|
||||||
// all the definitions.
|
// all the definitions.
|
||||||
void execute();
|
void execute();
|
||||||
|
|
||||||
|
// Force a transition from the IS_AVAILABLE state to the IS_READY state if
|
||||||
|
// input operands are all ready. State transitions normally occur at the
|
||||||
|
// beginning of a new cycle (see method cycleEvent()). However, the scheduler
|
||||||
|
// may decide to promote instructions from the wait queue to the ready queue
|
||||||
|
// as the result of another issue event. This method is called every time the
|
||||||
|
// instruction might have changed in state.
|
||||||
|
void update();
|
||||||
|
|
||||||
bool isDispatched() const { return Stage == IS_AVAILABLE; }
|
bool isDispatched() const { return Stage == IS_AVAILABLE; }
|
||||||
bool isReady() const { return Stage == IS_READY; }
|
bool isReady() const { return Stage == IS_READY; }
|
||||||
bool isExecuting() const { return Stage == IS_EXECUTING; }
|
bool isExecuting() const { return Stage == IS_EXECUTING; }
|
||||||
|
|
|
@ -293,7 +293,10 @@ void Scheduler::cycleEvent(unsigned /* unused */) {
|
||||||
|
|
||||||
updateIssuedQueue();
|
updateIssuedQueue();
|
||||||
updatePendingQueue();
|
updatePendingQueue();
|
||||||
issue();
|
bool InstructionsWerePromoted = false;
|
||||||
|
do {
|
||||||
|
InstructionsWerePromoted = issue();
|
||||||
|
} while(InstructionsWerePromoted);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
@ -357,7 +360,40 @@ void Scheduler::issueInstruction(Instruction &IS, unsigned InstrIndex) {
|
||||||
notifyInstructionExecuted(InstrIndex);
|
notifyInstructionExecuted(InstrIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::issue() {
|
bool Scheduler::promoteToReadyQueue() {
|
||||||
|
// Scan the set of waiting instructions and promote them to the
|
||||||
|
// ready queue if operands are all ready.
|
||||||
|
bool InstructionsWerePromoted = false;
|
||||||
|
for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) {
|
||||||
|
const QueueEntryTy &Entry = *I;
|
||||||
|
|
||||||
|
// Check if this instruction is now ready. In case, force
|
||||||
|
// a transition in state using method 'update()'.
|
||||||
|
Entry.second->update();
|
||||||
|
bool IsReady = Entry.second->isReady();
|
||||||
|
|
||||||
|
const InstrDesc &Desc = Entry.second->getDesc();
|
||||||
|
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
|
||||||
|
if (IsReady && IsMemOp)
|
||||||
|
IsReady &= LSU->isReady(Entry.first);
|
||||||
|
|
||||||
|
if (IsReady) {
|
||||||
|
notifyInstructionReady(Entry.first);
|
||||||
|
ReadyQueue[Entry.first] = Entry.second;
|
||||||
|
auto ToRemove = I;
|
||||||
|
++I;
|
||||||
|
WaitQueue.erase(ToRemove);
|
||||||
|
InstructionsWerePromoted = true;
|
||||||
|
} else {
|
||||||
|
++I;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return InstructionsWerePromoted;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Scheduler::issue() {
|
||||||
std::vector<unsigned> ToRemove;
|
std::vector<unsigned> ToRemove;
|
||||||
for (const QueueEntryTy QueueEntry : ReadyQueue) {
|
for (const QueueEntryTy QueueEntry : ReadyQueue) {
|
||||||
// Give priority to older instructions in ReadyQueue. The ready queue is
|
// Give priority to older instructions in ReadyQueue. The ready queue is
|
||||||
|
@ -371,33 +407,27 @@ void Scheduler::issue() {
|
||||||
ToRemove.emplace_back(InstrIndex);
|
ToRemove.emplace_back(InstrIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ToRemove.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
for (const unsigned InstrIndex : ToRemove)
|
for (const unsigned InstrIndex : ToRemove)
|
||||||
ReadyQueue.erase(InstrIndex);
|
ReadyQueue.erase(InstrIndex);
|
||||||
|
|
||||||
|
// Instructions that have been issued during this cycle might have unblocked
|
||||||
|
// other dependent instructions. Dependent instructions
|
||||||
|
// may be issued during this same cycle if operands have ReadAdvance entries.
|
||||||
|
// Promote those instructions to the ReadyQueue and tell to the caller that
|
||||||
|
// we need another round of 'issue()'.
|
||||||
|
return promoteToReadyQueue();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::updatePendingQueue() {
|
void Scheduler::updatePendingQueue() {
|
||||||
// Scan the set of waiting instructions and promote them to the
|
// Notify to instructions in the pending queue that a new cycle just
|
||||||
// ready queue if operands are all ready.
|
// started.
|
||||||
for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) {
|
for (QueueEntryTy Entry : WaitQueue)
|
||||||
const QueueEntryTy Entry = *I;
|
|
||||||
Entry.second->cycleEvent();
|
Entry.second->cycleEvent();
|
||||||
|
|
||||||
const InstrDesc &Desc = Entry.second->getDesc();
|
promoteToReadyQueue();
|
||||||
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
|
|
||||||
bool IsReady = Entry.second->isReady();
|
|
||||||
if (IsReady && IsMemOp)
|
|
||||||
IsReady &= LSU->isReady(Entry.first);
|
|
||||||
|
|
||||||
if (IsReady) {
|
|
||||||
notifyInstructionReady(Entry.first);
|
|
||||||
ReadyQueue[Entry.first] = Entry.second;
|
|
||||||
auto ToRemove = I;
|
|
||||||
++I;
|
|
||||||
WaitQueue.erase(ToRemove);
|
|
||||||
} else {
|
|
||||||
++I;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::updateIssuedQueue() {
|
void Scheduler::updateIssuedQueue() {
|
||||||
|
|
|
@ -430,9 +430,14 @@ class Scheduler {
|
||||||
// Notify the Backend that buffered resources were freed.
|
// Notify the Backend that buffered resources were freed.
|
||||||
void notifyReleasedBuffers(llvm::ArrayRef<uint64_t> Buffers);
|
void notifyReleasedBuffers(llvm::ArrayRef<uint64_t> Buffers);
|
||||||
|
|
||||||
/// Issue instructions from the ready queue by giving priority to older
|
/// Issue instructions from the ReadyQueue by giving priority to older
|
||||||
/// instructions.
|
/// instructions. This method returns true if at least one instruction has
|
||||||
void issue();
|
/// been promoted in the process from the WaitQueue to the ReadyQueue.
|
||||||
|
bool issue();
|
||||||
|
|
||||||
|
/// Scans the WaitQueue in search of instructions that can be moved to
|
||||||
|
/// the ReadyQueue.
|
||||||
|
bool promoteToReadyQueue();
|
||||||
|
|
||||||
/// Issue an instruction without updating the ready queue.
|
/// Issue an instruction without updating the ready queue.
|
||||||
void issueInstruction(Instruction &IS, unsigned InstrIndex);
|
void issueInstruction(Instruction &IS, unsigned InstrIndex);
|
||||||
|
|
Loading…
Reference in New Issue