forked from OSchip/llvm-project
[AMDGPU] Skip unclusterd rescheduling w/o ld/st
We are attempting rescheduling without load store clustering if occupancy limits were not met with clustering. Skip this for regions which do not have any loads or stores at all. In a set of kernels I am experimenting with this improves scheduling time by ~30%. Differential Revision: https://reviews.llvm.org/D97342
This commit is contained in:
parent
bcc1aba6c4
commit
635993f07b
|
@ -20,7 +20,8 @@ using namespace llvm;
|
|||
|
||||
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
|
||||
const MachineSchedContext *C) :
|
||||
GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
|
||||
GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
|
||||
MF(nullptr) { }
|
||||
|
||||
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
||||
GenericScheduler::initialize(DAG);
|
||||
|
@ -279,6 +280,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
|
|||
if (SU->isBottomReady())
|
||||
Bot.removeReady(SU);
|
||||
|
||||
if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
|
||||
for (SDep &Dep : SU->Preds) {
|
||||
if (Dep.isCluster()) {
|
||||
HasClusteredNodes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
|
||||
<< *SU->getInstr());
|
||||
return SU;
|
||||
|
@ -320,6 +330,10 @@ void GCNScheduleDAGMILive::schedule() {
|
|||
PressureBefore.print(dbgs()));
|
||||
}
|
||||
|
||||
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
|
||||
// Set HasClusteredNodes to true for late stages where we are not interested
|
||||
// in it anymore. That way pickNode() will not scan SDep's when not needed.
|
||||
S.HasClusteredNodes = Stage >= UnclusteredReschedule;
|
||||
ScheduleDAGMILive::schedule();
|
||||
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
|
||||
RescheduleRegions[RegionIdx] = false;
|
||||
|
@ -328,7 +342,6 @@ void GCNScheduleDAGMILive::schedule() {
|
|||
return;
|
||||
|
||||
// Check the results of scheduling.
|
||||
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
|
||||
auto PressureAfter = getRealRegPressure();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
|
||||
|
@ -379,6 +392,8 @@ void GCNScheduleDAGMILive::schedule() {
|
|||
PressureAfter.less(ST, PressureBefore) ||
|
||||
!RescheduleRegions[RegionIdx]) {
|
||||
Pressure[RegionIdx] = PressureAfter;
|
||||
if (!S.HasClusteredNodes && (Stage + 1) == UnclusteredReschedule)
|
||||
RescheduleRegions[RegionIdx] = false;
|
||||
return;
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
|
||||
|
@ -386,7 +401,8 @@ void GCNScheduleDAGMILive::schedule() {
|
|||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
|
||||
RescheduleRegions[RegionIdx] = true;
|
||||
RescheduleRegions[RegionIdx] = S.HasClusteredNodes ||
|
||||
(Stage + 1) != UnclusteredReschedule;
|
||||
RegionEnd = RegionBegin;
|
||||
for (MachineInstr *MI : Unsched) {
|
||||
if (MI->isDebugInstr())
|
||||
|
|
|
@ -50,6 +50,10 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
|
|||
|
||||
unsigned TargetOccupancy;
|
||||
|
||||
// schedule() have seen a clustered memory operation. Set it to false
|
||||
// before a region scheduling to know if the region had such clusters.
|
||||
bool HasClusteredNodes;
|
||||
|
||||
MachineFunction *MF;
|
||||
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue