[NFC][Scheduler] Refactor tryCandidate to return boolean

This patch changes return type of tryCandidate from void to bool:

1. Methods in some targets already follow this convention.
2. This would help if some target wants to re-use generic code.
3. It looks more intuitive if these try-method returns the same type.

We may need to change return type of them from bool to some enum
further, to make it less confusing.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D103951
This commit is contained in:
Qiu Chaofan 2021-07-01 14:31:47 +08:00
parent b9c24257c7
commit 07f0faed11
4 changed files with 69 additions and 56 deletions

View File

@ -1012,7 +1012,7 @@ protected:
const RegPressureTracker &RPTracker,
RegPressureTracker &TempTracker);
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
SchedBoundary *Zone) const;
SUnit *pickNodeBidirectional(bool &IsTopNode);
@ -1075,7 +1075,7 @@ public:
}
protected:
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
void pickNodeFromQueue(SchedCandidate &Cand);
};

View File

@ -2818,6 +2818,8 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
namespace llvm {
/// Return true if this heuristic determines order.
/// TODO: Consider refactor return type of these functions as integer or enum,
/// as we may need to differentiate whether TryCand is better than Cand.
bool tryLess(int TryVal, int CandVal,
GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
@ -3176,34 +3178,35 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
/// \param Zone describes the scheduled zone that we are extending, or nullptr
// if Cand is from a different zone than TryCand.
void GenericScheduler::tryCandidate(SchedCandidate &Cand,
/// if Cand is from a different zone than TryCand.
/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
return true;
}
// Bias PhysReg Defs and copies to their uses and defined respectively.
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
return;
return TryCand.Reason != NoCand;
// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
Cand.RPDelta.Excess,
TryCand, Cand, RegExcess, TRI,
DAG->MF))
return;
return TryCand.Reason != NoCand;
// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical, TRI,
DAG->MF))
return;
return TryCand.Reason != NoCand;
// We only compare a subset of features when comparing nodes between
// Top and Bottom boundary. Some properties are simply incomparable, in many
@ -3217,12 +3220,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// heuristics to take precedence.
if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
tryLatency(TryCand, Cand, *Zone))
return;
return TryCand.Reason != NoCand;
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
return TryCand.Reason != NoCand;
}
// Keep clustered nodes together to encourage downstream peephole
@ -3238,14 +3241,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
Cand.SU == CandNextClusterSU,
TryCand, Cand, Cluster))
return;
return TryCand.Reason != NoCand;
if (SameBoundary) {
// Weak edges are for clustering and other constraints.
if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
getWeakLeft(Cand.SU, Cand.AtTop),
TryCand, Cand, Weak))
return;
return TryCand.Reason != NoCand;
}
// Avoid increasing the max pressure of the entire region.
@ -3253,31 +3256,34 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
Cand.RPDelta.CurrentMax,
TryCand, Cand, RegMax, TRI,
DAG->MF))
return;
return TryCand.Reason != NoCand;
if (SameBoundary) {
// Avoid critical resource consumption and balance the schedule.
TryCand.initResourceDelta(DAG, SchedModel);
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
return;
return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources,
TryCand, Cand, ResourceDemand))
return;
return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
return;
return TryCand.Reason != NoCand;
// Fall through to original instruction order.
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
|| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
TryCand.Reason = NodeOrder;
return true;
}
}
return false;
}
/// Pick the best candidate from the queue.
@ -3299,8 +3305,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
if (TryCand.Reason != NoCand) {
if (tryCandidate(Cand, TryCand, ZoneArg)) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(DAG, SchedModel);
@ -3378,8 +3383,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
assert(TopCand.isValid());
SchedCandidate Cand = BotCand;
TopCand.Reason = NoCand;
tryCandidate(Cand, TopCand, nullptr);
if (TopCand.Reason != NoCand) {
if (tryCandidate(Cand, TopCand, nullptr)) {
Cand.setBest(TopCand);
LLVM_DEBUG(traceCandidate(Cand));
}
@ -3543,42 +3547,47 @@ void PostGenericScheduler::registerRoots() {
///
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
return true;
}
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
Cand.SU == DAG->getNextClusterSucc(),
TryCand, Cand, Cluster))
return;
return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
return;
return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources,
TryCand, Cand, ResourceDemand))
return;
return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
return;
return TryCand.Reason != NoCand;
}
// Fall through to original instruction order.
if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
TryCand.Reason = NodeOrder;
return true;
}
return false;
}
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
@ -3588,8 +3597,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
TryCand.SU = SU;
TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
if (TryCand.Reason != NoCand) {
if (tryCandidate(Cand, TryCand)) {
Cand.setBest(TryCand);
LLVM_DEBUG(traceCandidate(Cand));
}

View File

@ -46,7 +46,7 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
return false;
}
void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
// From GenericScheduler::tryCandidate
@ -54,25 +54,25 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
return true;
}
// Bias PhysReg Defs and copies to their uses and defined respectively.
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
return;
return TryCand.Reason != NoCand;
// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() &&
tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
RegExcess, TRI, DAG->MF))
return;
return TryCand.Reason != NoCand;
// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() &&
tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical, TRI, DAG->MF))
return;
return TryCand.Reason != NoCand;
// We only compare a subset of features when comparing nodes between
// Top and Bottom boundary. Some properties are simply incomparable, in many
@ -86,12 +86,12 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// heuristics to take precedence.
if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
tryLatency(TryCand, Cand, *Zone))
return;
return TryCand.Reason != NoCand;
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
return TryCand.Reason != NoCand;
}
// Keep clustered nodes together to encourage downstream peephole
@ -106,37 +106,37 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
return;
return TryCand.Reason != NoCand;
if (SameBoundary) {
// Weak edges are for clustering and other constraints.
if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
return;
return TryCand.Reason != NoCand;
}
// Avoid increasing the max pressure of the entire region.
if (DAG->isTrackingPressure() &&
tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
Cand, RegMax, TRI, DAG->MF))
return;
return TryCand.Reason != NoCand;
if (SameBoundary) {
// Avoid critical resource consumption and balance the schedule.
TryCand.initResourceDelta(DAG, SchedModel);
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
return;
return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources, TryCand, Cand,
ResourceDemand))
return;
return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
return;
return TryCand.Reason != NoCand;
// Fall through to original instruction order.
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
@ -150,14 +150,16 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// Add powerpc specific heuristic only when TryCand isn't selected or
// selected as node order.
if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
return;
return true;
// There are some benefits to schedule the ADDI before the load to hide the
// latency, as RA may create a true dependency between the load and addi.
if (SameBoundary) {
if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
return;
return TryCand.Reason != NoCand;
}
return TryCand.Reason != NoCand;
}
bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
@ -172,38 +174,38 @@ bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
return false;
}
void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
// From PostGenericScheduler::tryCandidate
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
return true;
}
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster))
return;
return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
return;
return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources, TryCand, Cand,
ResourceDemand))
return;
return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
return;
return TryCand.Reason != NoCand;
}
// Fall through to original instruction order.
@ -215,14 +217,16 @@ void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// Add powerpc post ra specific heuristic only when TryCand isn't selected or
// selected as node order.
if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
return;
return true;
// There are some benefits to schedule the ADDI as early as possible post ra
// to avoid stalled by vector instructions which take up all the hw units.
// And ADDI is usually used to post inc the loop indvar, which matters the
// performance.
if (biasAddiCandidate(Cand, TryCand))
return;
return TryCand.Reason != NoCand;
return TryCand.Reason != NoCand;
}
void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {

View File

@ -23,8 +23,9 @@ public:
PPCPreRASchedStrategy(const MachineSchedContext *C) :
GenericScheduler(C) {}
protected:
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
SchedBoundary *Zone) const override;
private:
bool biasAddiLoadCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
@ -43,7 +44,7 @@ protected:
void enterMBB(MachineBasicBlock *MBB) override;
void leaveMBB() override;
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override;
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override;
bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const;
};