llvm-project/llvm/lib/Target/AMDGPU/GCNILPSched.cpp

364 lines
11 KiB
C++
Raw Normal View History

//===---------------------------- GCNILPSched.cpp - -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAG.h"
using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
namespace {
class GCNILPScheduler {
struct Candidate : ilist_node<Candidate> {
SUnit *SU;
Candidate(SUnit *SU_)
: SU(SU_) {}
};
SpecificBumpPtrAllocator<Candidate> Alloc;
typedef simple_ilist<Candidate> Queue;
Queue PendingQueue;
Queue AvailQueue;
unsigned CurQueueId = 0;
std::vector<unsigned> SUNumbers;
/// CurCycle - The current scheduler state corresponds to this cycle.
unsigned CurCycle = 0;
unsigned getNodePriority(const SUnit *SU) const;
const SUnit *pickBest(const SUnit *left, const SUnit *right);
Candidate* pickCandidate();
void releasePending();
void advanceToCycle(unsigned NextCycle);
void releasePredecessors(const SUnit* SU);
public:
std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
const ScheduleDAG &DAG);
};
} // namespace
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
if (SethiUllmanNumber != 0)
return SethiUllmanNumber;
unsigned Extra = 0;
for (const SDep &Pred : SU->Preds) {
if (Pred.isCtrl()) continue; // ignore chain preds
SUnit *PredSU = Pred.getSUnit();
unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
if (PredSethiUllman > SethiUllmanNumber) {
SethiUllmanNumber = PredSethiUllman;
Extra = 0;
}
else if (PredSethiUllman == SethiUllmanNumber)
++Extra;
}
SethiUllmanNumber += Extra;
if (SethiUllmanNumber == 0)
SethiUllmanNumber = 1;
return SethiUllmanNumber;
}
// Lower priority means schedule further down. For bottom-up scheduling, lower
// priority SUs are scheduled before higher priority SUs.
unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
assert(SU->NodeNum < SUNumbers.size());
if (SU->NumSuccs == 0 && SU->NumPreds != 0)
// If SU does not have a register use, i.e. it doesn't produce a value
// that would be consumed (e.g. store), then it terminates a chain of
// computation. Give it a large SethiUllman number so it will be
// scheduled right before its predecessors that it doesn't lengthen
// their live ranges.
return 0xffff;
if (SU->NumPreds == 0 && SU->NumSuccs != 0)
// If SU does not have a register def, schedule it close to its uses
// because it does not lengthen any live ranges.
return 0;
return SUNumbers[SU->NodeNum];
}
/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
unsigned MaxHeight = 0;
for (const SDep &Succ : SU->Succs) {
if (Succ.isCtrl()) continue; // ignore chain succs
unsigned Height = Succ.getSUnit()->getHeight();
// If there are bunch of CopyToRegs stacked up, they should be considered
// to be at the same position.
if (Height > MaxHeight)
MaxHeight = Height;
}
return MaxHeight;
}
/// calcMaxScratches - Returns an cost estimate of the worse case requirement
/// for scratch registers, i.e. number of data dependencies.
static unsigned calcMaxScratches(const SUnit *SU) {
unsigned Scratches = 0;
for (const SDep &Pred : SU->Preds) {
if (Pred.isCtrl()) continue; // ignore chain preds
Scratches++;
}
return Scratches;
}
// Return -1 if left has higher priority, 1 if right has higher priority.
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(const SUnit *left, const SUnit *right) {
// Scheduling an instruction that uses a VReg whose postincrement has not yet
// been scheduled will induce a copy. Model this as an extra cycle of latency.
int LHeight = (int)left->getHeight();
int RHeight = (int)right->getHeight();
// If either node is scheduling for latency, sort them by height/depth
// and latency.
// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
// is enabled, grouping instructions by cycle, then its height is already
// covered so only its depth matters. We also reach this point if both stall
// but have the same height.
if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
int LDepth = left->getDepth();
int RDepth = right->getDepth();
if (LDepth != RDepth) {
LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
return 0;
}
const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
{
// TODO: add register pressure lowering checks
bool const DisableSchedCriticalPath = false;
int MaxReorderWindow = 6;
if (!DisableSchedCriticalPath) {
int spread = (int)left->getDepth() - (int)right->getDepth();
if (std::abs(spread) > MaxReorderWindow) {
LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
<< left->getDepth() << " != SU(" << right->NodeNum
<< "): " << right->getDepth() << "\n");
return left->getDepth() < right->getDepth() ? right : left;
}
}
bool const DisableSchedHeight = false;
if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
int spread = (int)left->getHeight() - (int)right->getHeight();
if (std::abs(spread) > MaxReorderWindow)
return left->getHeight() > right->getHeight() ? right : left;
}
// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
unsigned LPriority = getNodePriority(left);
unsigned RPriority = getNodePriority(right);
if (LPriority != RPriority)
return LPriority > RPriority ? right : left;
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
// t1 = op t2, c1
// t3 = op t4, c2
//
// and the following instructions are both ready.
// t2 = op c3
// t4 = op c4
//
// Then schedule t2 = op first.
// i.e.
// t4 = op c4
// t2 = op c3
// t1 = op t2, c1
// t3 = op t4, c2
//
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
if (LDist != RDist)
return LDist < RDist ? right : left;
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
if (LScratch != RScratch)
return LScratch > RScratch ? right : left;
bool const DisableSchedCycles = false;
if (!DisableSchedCycles) {
int result = BUCompareLatency(left, right);
if (result != 0)
return result > 0 ? right : left;
return left;
}
else {
if (left->getHeight() != right->getHeight())
return (left->getHeight() > right->getHeight()) ? right : left;
if (left->getDepth() != right->getDepth())
return (left->getDepth() < right->getDepth()) ? right : left;
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId) ? right : left;
}
GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
if (AvailQueue.empty())
return nullptr;
auto Best = AvailQueue.begin();
for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
auto NewBestSU = pickBest(Best->SU, I->SU);
if (NewBestSU != Best->SU) {
assert(NewBestSU == I->SU);
Best = I;
}
}
return &*Best;
}
void GCNILPScheduler::releasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
auto &C = *I++;
if (C.SU->getHeight() <= CurCycle) {
PendingQueue.remove(C);
AvailQueue.push_back(C);
C.SU->NodeQueueId = CurQueueId++;
}
}
}
/// Move the scheduler state forward by the specified number of Cycles.
void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
if (NextCycle <= CurCycle)
return;
CurCycle = NextCycle;
releasePending();
}
void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
for (const auto &PredEdge : SU->Preds) {
auto PredSU = PredEdge.getSUnit();
if (PredEdge.isWeak())
continue;
assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
}
}
std::vector<const SUnit*>
GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
const ScheduleDAG &DAG) {
auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
std::vector<SUnit> SUSavedCopy;
SUSavedCopy.resize(SUnits.size());
// we cannot save only those fields we touch: some of them are private
// so save units verbatim: this assumes SUnit should have value semantics
for (const SUnit &SU : SUnits)
SUSavedCopy[SU.NodeNum] = SU;
SUNumbers.assign(SUnits.size(), 0);
for (const SUnit &SU : SUnits)
CalcNodeSethiUllmanNumber(&SU, SUNumbers);
for (auto SU : BotRoots) {
AvailQueue.push_back(
*new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
}
releasePredecessors(&DAG.ExitSU);
std::vector<const SUnit*> Schedule;
Schedule.reserve(SUnits.size());
while (true) {
if (AvailQueue.empty() && !PendingQueue.empty()) {
auto EarliestSU = std::min_element(
PendingQueue.begin(), PendingQueue.end(),
[=](const Candidate& C1, const Candidate& C2) {
return C1.SU->getHeight() < C2.SU->getHeight();
})->SU;
advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
}
if (AvailQueue.empty())
break;
LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
"Ready queue:";
for (auto &C
: AvailQueue) dbgs()
<< ' ' << C.SU->NodeNum;
dbgs() << '\n';);
auto C = pickCandidate();
assert(C);
AvailQueue.remove(*C);
auto SU = C->SU;
LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
advanceToCycle(SU->getHeight());
releasePredecessors(SU);
Schedule.push_back(SU);
SU->isScheduled = true;
}
assert(SUnits.size() == Schedule.size());
std::reverse(Schedule.begin(), Schedule.end());
// restore units
for (auto &SU : SUnits)
SU = SUSavedCopy[SU.NodeNum];
return Schedule;
}
namespace llvm {
std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
const ScheduleDAG &DAG) {
GCNILPScheduler S;
return S.schedule(BotRoots, DAG);
}
}