forked from OSchip/llvm-project
MachineScheduler: Refactor setPolicy() to limit computing remaining latency
Summary: Computing the remaining latency can be very expensive especially on graphs of N nodes where the number of edges approaches N^2. This reduces the compile time of a pathological case with the AMDGPU backend from ~7.5 seconds to ~3 seconds. This test case has a basic block with 2655 stores, each with somewhere between 500 and 1500 successors and predecessors. Reviewers: atrick, MatzeB, airlied, mareko Reviewed By: mareko Subscribers: tpr, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D50486 llvm-svn: 340346
This commit is contained in:
parent
6a2a5c99c7
commit
ecd6aa5be2
|
@ -895,6 +895,10 @@ protected:
|
|||
#ifndef NDEBUG
|
||||
void traceCandidate(const SchedCandidate &Cand);
|
||||
#endif
|
||||
|
||||
private:
|
||||
bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
|
||||
bool ComputeRemLatency, unsigned &RemLatency) const;
|
||||
};
|
||||
|
||||
// Utility functions used by heuristics in tryCandidate().
|
||||
|
|
|
@ -2397,6 +2397,52 @@ initResourceDelta(const ScheduleDAGMI *DAG,
|
|||
}
|
||||
}
|
||||
|
||||
/// Compute remaining latency. We need this both to determine whether the
|
||||
/// overall schedule has become latency-limited and whether the instructions
|
||||
/// outside this zone are resource or latency limited.
|
||||
///
|
||||
/// The "dependent" latency is updated incrementally during scheduling as the
|
||||
/// max height/depth of scheduled nodes minus the cycles since it was
|
||||
/// scheduled:
|
||||
/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
|
||||
///
|
||||
/// The "independent" latency is the max ready queue depth:
|
||||
/// ILat = max N.depth for N in Available|Pending
|
||||
///
|
||||
/// RemainingLatency is the greater of independent and dependent latency.
|
||||
///
|
||||
/// These computations are expensive, especially in DAGs with many edges, so
|
||||
/// only do them if necessary.
|
||||
static unsigned computeRemLatency(SchedBoundary &CurrZone) {
|
||||
unsigned RemLatency = CurrZone.getDependentLatency();
|
||||
RemLatency = std::max(RemLatency,
|
||||
CurrZone.findMaxLatency(CurrZone.Available.elements()));
|
||||
RemLatency = std::max(RemLatency,
|
||||
CurrZone.findMaxLatency(CurrZone.Pending.elements()));
|
||||
return RemLatency;
|
||||
}
|
||||
|
||||
/// Returns true if the current cycle plus remaning latency is greater than
|
||||
/// the cirtical path in the scheduling region.
|
||||
bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
|
||||
SchedBoundary &CurrZone,
|
||||
bool ComputeRemLatency,
|
||||
unsigned &RemLatency) const {
|
||||
// The current cycle is already greater than the critical path, so we are
|
||||
// already latnecy limited and don't need to compute the remaining latency.
|
||||
if (CurrZone.getCurrCycle() > Rem.CriticalPath)
|
||||
return true;
|
||||
|
||||
// If we haven't scheduled anything yet, then we aren't latency limited.
|
||||
if (CurrZone.getCurrCycle() == 0)
|
||||
return false;
|
||||
|
||||
if (ComputeRemLatency)
|
||||
RemLatency = computeRemLatency(CurrZone);
|
||||
|
||||
return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
|
||||
}
|
||||
|
||||
/// Set the CandPolicy given a scheduling zone given the current resources and
|
||||
/// latencies inside and outside the zone.
|
||||
void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
|
||||
|
@ -2406,46 +2452,32 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
|
|||
// inside and outside this zone. Potential stalls should be considered before
|
||||
// following this policy.
|
||||
|
||||
// Compute remaining latency. We need this both to determine whether the
|
||||
// overall schedule has become latency-limited and whether the instructions
|
||||
// outside this zone are resource or latency limited.
|
||||
//
|
||||
// The "dependent" latency is updated incrementally during scheduling as the
|
||||
// max height/depth of scheduled nodes minus the cycles since it was
|
||||
// scheduled:
|
||||
// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
|
||||
//
|
||||
// The "independent" latency is the max ready queue depth:
|
||||
// ILat = max N.depth for N in Available|Pending
|
||||
//
|
||||
// RemainingLatency is the greater of independent and dependent latency.
|
||||
unsigned RemLatency = CurrZone.getDependentLatency();
|
||||
RemLatency = std::max(RemLatency,
|
||||
CurrZone.findMaxLatency(CurrZone.Available.elements()));
|
||||
RemLatency = std::max(RemLatency,
|
||||
CurrZone.findMaxLatency(CurrZone.Pending.elements()));
|
||||
|
||||
// Compute the critical resource outside the zone.
|
||||
unsigned OtherCritIdx = 0;
|
||||
unsigned OtherCount =
|
||||
OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
|
||||
|
||||
bool OtherResLimited = false;
|
||||
if (SchedModel->hasInstrSchedModel())
|
||||
unsigned RemLatency = 0;
|
||||
bool RemLatencyComputed = false;
|
||||
if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
|
||||
RemLatency = computeRemLatency(CurrZone);
|
||||
RemLatencyComputed = true;
|
||||
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
|
||||
OtherCount, RemLatency);
|
||||
}
|
||||
|
||||
// Schedule aggressively for latency in PostRA mode. We don't check for
|
||||
// acyclic latency during PostRA, and highly out-of-order processors will
|
||||
// skip PostRA scheduling.
|
||||
if (!OtherResLimited) {
|
||||
if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
|
||||
Policy.ReduceLatency |= true;
|
||||
LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
|
||||
<< " RemainingLatency " << RemLatency << " + "
|
||||
<< CurrZone.getCurrCycle() << "c > CritPath "
|
||||
<< Rem.CriticalPath << "\n");
|
||||
}
|
||||
if (!OtherResLimited &&
|
||||
(IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
|
||||
RemLatency))) {
|
||||
Policy.ReduceLatency |= true;
|
||||
LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
|
||||
<< " RemainingLatency " << RemLatency << " + "
|
||||
<< CurrZone.getCurrCycle() << "c > CritPath "
|
||||
<< Rem.CriticalPath << "\n");
|
||||
}
|
||||
// If the same resource is limiting inside and outside the zone, do nothing.
|
||||
if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
|
||||
|
|
Loading…
Reference in New Issue