forked from OSchip/llvm-project
[AMDGPU] Avoid second rescheduling for some regions
If a region was not constrained by a high register pressure and was not rescheduled without clustering we can skip rescheduling it ClusteredLowOccupancyReschedule stage. This improves scheduling speed by 25% on some kernels. Differential Revision: https://reviews.llvm.org/D97506
This commit is contained in:
parent
635993f07b
commit
799c50fe93
|
@ -21,7 +21,7 @@ using namespace llvm;
|
||||||
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
|
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
|
||||||
const MachineSchedContext *C) :
|
const MachineSchedContext *C) :
|
||||||
GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
|
GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
|
||||||
MF(nullptr) { }
|
HasExcessPressure(false), MF(nullptr) { }
|
||||||
|
|
||||||
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
||||||
GenericScheduler::initialize(DAG);
|
GenericScheduler::initialize(DAG);
|
||||||
|
@ -104,11 +104,13 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
|
||||||
// marked as RegExcess in tryCandidate() when they are compared with
|
// marked as RegExcess in tryCandidate() when they are compared with
|
||||||
// instructions that increase the register pressure.
|
// instructions that increase the register pressure.
|
||||||
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
|
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
|
||||||
|
HasExcessPressure = true;
|
||||||
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
|
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
|
||||||
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
|
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
|
if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
|
||||||
|
HasExcessPressure = true;
|
||||||
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
|
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
|
||||||
Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
|
Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
|
||||||
}
|
}
|
||||||
|
@ -122,6 +124,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
|
||||||
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
|
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
|
||||||
|
|
||||||
if (SGPRDelta >= 0 || VGPRDelta >= 0) {
|
if (SGPRDelta >= 0 || VGPRDelta >= 0) {
|
||||||
|
HasExcessPressure = true;
|
||||||
if (SGPRDelta > VGPRDelta) {
|
if (SGPRDelta > VGPRDelta) {
|
||||||
Cand.RPDelta.CriticalMax =
|
Cand.RPDelta.CriticalMax =
|
||||||
PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
|
PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
|
||||||
|
@ -331,12 +334,17 @@ void GCNScheduleDAGMILive::schedule() {
|
||||||
}
|
}
|
||||||
|
|
||||||
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
|
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
|
||||||
// Set HasClusteredNodes to true for late stages where we are not interested
|
// Set HasClusteredNodes to true for late stages where we have already
|
||||||
// in it anymore. That way pickNode() will not scan SDep's when not needed.
|
// collected it. That way pickNode() will not scan SDep's when not needed.
|
||||||
S.HasClusteredNodes = Stage >= UnclusteredReschedule;
|
S.HasClusteredNodes = Stage > InitialSchedule;
|
||||||
|
S.HasExcessPressure = false;
|
||||||
ScheduleDAGMILive::schedule();
|
ScheduleDAGMILive::schedule();
|
||||||
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
|
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
|
||||||
RescheduleRegions[RegionIdx] = false;
|
RescheduleRegions[RegionIdx] = false;
|
||||||
|
if (Stage == InitialSchedule && S.HasClusteredNodes)
|
||||||
|
RegionsWithClusters[RegionIdx] = true;
|
||||||
|
if (S.HasExcessPressure)
|
||||||
|
RegionsWithHighRP[RegionIdx] = true;
|
||||||
|
|
||||||
if (!LIS)
|
if (!LIS)
|
||||||
return;
|
return;
|
||||||
|
@ -381,8 +389,10 @@ void GCNScheduleDAGMILive::schedule() {
|
||||||
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
|
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
|
||||||
if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
|
if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
|
||||||
PressureAfter.getAGPRNum() > MaxVGPRs ||
|
PressureAfter.getAGPRNum() > MaxVGPRs ||
|
||||||
PressureAfter.getSGPRNum() > MaxSGPRs)
|
PressureAfter.getSGPRNum() > MaxSGPRs) {
|
||||||
RescheduleRegions[RegionIdx] = true;
|
RescheduleRegions[RegionIdx] = true;
|
||||||
|
RegionsWithHighRP[RegionIdx] = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (WavesAfter >= MinOccupancy) {
|
if (WavesAfter >= MinOccupancy) {
|
||||||
if (Stage == UnclusteredReschedule &&
|
if (Stage == UnclusteredReschedule &&
|
||||||
|
@ -392,7 +402,8 @@ void GCNScheduleDAGMILive::schedule() {
|
||||||
PressureAfter.less(ST, PressureBefore) ||
|
PressureAfter.less(ST, PressureBefore) ||
|
||||||
!RescheduleRegions[RegionIdx]) {
|
!RescheduleRegions[RegionIdx]) {
|
||||||
Pressure[RegionIdx] = PressureAfter;
|
Pressure[RegionIdx] = PressureAfter;
|
||||||
if (!S.HasClusteredNodes && (Stage + 1) == UnclusteredReschedule)
|
if (!RegionsWithClusters[RegionIdx] &&
|
||||||
|
(Stage + 1) == UnclusteredReschedule)
|
||||||
RescheduleRegions[RegionIdx] = false;
|
RescheduleRegions[RegionIdx] = false;
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
|
@ -401,7 +412,7 @@ void GCNScheduleDAGMILive::schedule() {
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
|
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
|
||||||
RescheduleRegions[RegionIdx] = S.HasClusteredNodes ||
|
RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
|
||||||
(Stage + 1) != UnclusteredReschedule;
|
(Stage + 1) != UnclusteredReschedule;
|
||||||
RegionEnd = RegionBegin;
|
RegionEnd = RegionBegin;
|
||||||
for (MachineInstr *MI : Unsched) {
|
for (MachineInstr *MI : Unsched) {
|
||||||
|
@ -535,7 +546,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
||||||
LiveIns.resize(Regions.size());
|
LiveIns.resize(Regions.size());
|
||||||
Pressure.resize(Regions.size());
|
Pressure.resize(Regions.size());
|
||||||
RescheduleRegions.resize(Regions.size());
|
RescheduleRegions.resize(Regions.size());
|
||||||
|
RegionsWithClusters.resize(Regions.size());
|
||||||
|
RegionsWithHighRP.resize(Regions.size());
|
||||||
RescheduleRegions.set();
|
RescheduleRegions.set();
|
||||||
|
RegionsWithClusters.reset();
|
||||||
|
RegionsWithHighRP.reset();
|
||||||
|
|
||||||
if (!Regions.empty())
|
if (!Regions.empty())
|
||||||
BBLiveInMap = getBBLiveInMap();
|
BBLiveInMap = getBBLiveInMap();
|
||||||
|
@ -580,7 +595,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
||||||
SavedMutations.swap(Mutations);
|
SavedMutations.swap(Mutations);
|
||||||
|
|
||||||
for (auto Region : Regions) {
|
for (auto Region : Regions) {
|
||||||
if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
|
if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
|
||||||
|
(Stage == ClusteredLowOccupancyReschedule &&
|
||||||
|
!RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
|
||||||
|
|
||||||
++RegionIdx;
|
++RegionIdx;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,10 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
|
||||||
// before a region scheduling to know if the region had such clusters.
|
// before a region scheduling to know if the region had such clusters.
|
||||||
bool HasClusteredNodes;
|
bool HasClusteredNodes;
|
||||||
|
|
||||||
|
// schedule() have seen a an excess register pressure and had to track
|
||||||
|
// register pressure for actual scheduling heuristics.
|
||||||
|
bool HasExcessPressure;
|
||||||
|
|
||||||
MachineFunction *MF;
|
MachineFunction *MF;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -100,6 +104,12 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
|
||||||
// or we generally desire to reschedule it.
|
// or we generally desire to reschedule it.
|
||||||
BitVector RescheduleRegions;
|
BitVector RescheduleRegions;
|
||||||
|
|
||||||
|
// Record regions which use clustered loads/stores.
|
||||||
|
BitVector RegionsWithClusters;
|
||||||
|
|
||||||
|
// Record regions with high register pressure.
|
||||||
|
BitVector RegionsWithHighRP;
|
||||||
|
|
||||||
// Region live-in cache.
|
// Region live-in cache.
|
||||||
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
|
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue