[AMDGPU] Skip unclusterd rescheduling w/o ld/st

We are attempting rescheduling without load store clustering
if occupancy limits were not met with clustering. Skip this
for regions which do not have any loads or stores at all.

In a set of kernels I am experimenting with this improves
scheduling time by ~30%.

Differential Revision: https://reviews.llvm.org/D97342
This commit is contained in:
Stanislav Mekhanoshin 2021-02-23 15:26:12 -08:00
parent bcc1aba6c4
commit 635993f07b
2 changed files with 23 additions and 3 deletions

View File

@ -20,7 +20,8 @@ using namespace llvm;
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C) :
GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
MF(nullptr) { }
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
@ -279,6 +280,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
if (SU->isBottomReady())
Bot.removeReady(SU);
if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
for (SDep &Dep : SU->Preds) {
if (Dep.isCluster()) {
HasClusteredNodes = true;
break;
}
}
}
LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
<< *SU->getInstr());
return SU;
@ -320,6 +330,10 @@ void GCNScheduleDAGMILive::schedule() {
PressureBefore.print(dbgs()));
}
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
// Set HasClusteredNodes to true for late stages where we are not interested
// in it anymore. That way pickNode() will not scan SDep's when not needed.
S.HasClusteredNodes = Stage >= UnclusteredReschedule;
ScheduleDAGMILive::schedule();
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
RescheduleRegions[RegionIdx] = false;
@ -328,7 +342,6 @@ void GCNScheduleDAGMILive::schedule() {
return;
// Check the results of scheduling.
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
auto PressureAfter = getRealRegPressure();
LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
@ -379,6 +392,8 @@ void GCNScheduleDAGMILive::schedule() {
PressureAfter.less(ST, PressureBefore) ||
!RescheduleRegions[RegionIdx]) {
Pressure[RegionIdx] = PressureAfter;
if (!S.HasClusteredNodes && (Stage + 1) == UnclusteredReschedule)
RescheduleRegions[RegionIdx] = false;
return;
} else {
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
@ -386,7 +401,8 @@ void GCNScheduleDAGMILive::schedule() {
}
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
RescheduleRegions[RegionIdx] = true;
RescheduleRegions[RegionIdx] = S.HasClusteredNodes ||
(Stage + 1) != UnclusteredReschedule;
RegionEnd = RegionBegin;
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr())

View File

@ -50,6 +50,10 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
unsigned TargetOccupancy;
// schedule() have seen a clustered memory operation. Set it to false
// before a region scheduling to know if the region had such clusters.
bool HasClusteredNodes;
MachineFunction *MF;
public: