forked from OSchip/llvm-project
[AMDGPU] Attempt to reschedule withou clustering
We want to have more load/store clustering but we also want to maintain low register pressure which are oposit targets. Allow scheduler to reschedule regions without mutations applied if we hit a register limit. Differential Revision: https://reviews.llvm.org/D73386
This commit is contained in:
parent
97711228fd
commit
53eb0f8c07
|
@ -316,13 +316,13 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
|
|||
ST(MF.getSubtarget<GCNSubtarget>()),
|
||||
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
|
||||
StartingOccupancy(MFI.getOccupancy()),
|
||||
MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
|
||||
MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
|
||||
}
|
||||
|
||||
void GCNScheduleDAGMILive::schedule() {
|
||||
if (Stage == 0) {
|
||||
if (Stage == Collect) {
|
||||
// Just record regions at the first pass.
|
||||
Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
|
||||
return;
|
||||
|
@ -348,6 +348,7 @@ void GCNScheduleDAGMILive::schedule() {
|
|||
|
||||
ScheduleDAGMILive::schedule();
|
||||
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
|
||||
RescheduleRegions[RegionIdx] = false;
|
||||
|
||||
if (!LIS)
|
||||
return;
|
||||
|
@ -389,20 +390,28 @@ void GCNScheduleDAGMILive::schedule() {
|
|||
<< MinOccupancy << ".\n");
|
||||
}
|
||||
|
||||
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
|
||||
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
|
||||
if (PressureAfter.getVGPRNum() > MaxVGPRs ||
|
||||
PressureAfter.getSGPRNum() > MaxSGPRs)
|
||||
RescheduleRegions[RegionIdx] = true;
|
||||
|
||||
if (WavesAfter >= MinOccupancy) {
|
||||
unsigned TotalVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
|
||||
unsigned TotalSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(&ST);
|
||||
if (WavesAfter > MFI.getMinWavesPerEU() ||
|
||||
if (Stage == UnclusteredReschedule &&
|
||||
!PressureAfter.less(ST, PressureBefore)) {
|
||||
LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
|
||||
} else if (WavesAfter > MFI.getMinWavesPerEU() ||
|
||||
PressureAfter.less(ST, PressureBefore) ||
|
||||
(TotalVGPRs >= PressureAfter.getVGPRNum() &&
|
||||
TotalSGPRs >= PressureAfter.getSGPRNum())) {
|
||||
!RescheduleRegions[RegionIdx]) {
|
||||
Pressure[RegionIdx] = PressureAfter;
|
||||
return;
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
|
||||
RescheduleRegions[RegionIdx] = true;
|
||||
RegionEnd = RegionBegin;
|
||||
for (MachineInstr *MI : Unsched) {
|
||||
if (MI->isDebugInstr())
|
||||
|
@ -532,33 +541,55 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
|||
|
||||
LiveIns.resize(Regions.size());
|
||||
Pressure.resize(Regions.size());
|
||||
RescheduleRegions.resize(Regions.size());
|
||||
RescheduleRegions.set();
|
||||
|
||||
if (!Regions.empty())
|
||||
BBLiveInMap = getBBLiveInMap();
|
||||
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
|
||||
|
||||
do {
|
||||
Stage++;
|
||||
RegionIdx = 0;
|
||||
MachineBasicBlock *MBB = nullptr;
|
||||
|
||||
if (Stage > 1) {
|
||||
if (Stage > InitialSchedule) {
|
||||
if (!LIS)
|
||||
break;
|
||||
|
||||
// Retry function scheduling if we found resulting occupancy and it is
|
||||
// lower than used for first pass scheduling. This will give more freedom
|
||||
// to schedule low register pressure blocks.
|
||||
// Code is partially copied from MachineSchedulerBase::scheduleRegions().
|
||||
|
||||
if (!LIS || StartingOccupancy <= MinOccupancy)
|
||||
break;
|
||||
if (Stage == UnclusteredReschedule) {
|
||||
if (RescheduleRegions.none())
|
||||
continue;
|
||||
LLVM_DEBUG(dbgs() <<
|
||||
"Retrying function scheduling without clustering.\n");
|
||||
}
|
||||
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< "Retrying function scheduling with lowest recorded occupancy "
|
||||
<< MinOccupancy << ".\n");
|
||||
if (Stage == ClusteredLowOccupancyReschedule) {
|
||||
if (StartingOccupancy <= MinOccupancy)
|
||||
break;
|
||||
|
||||
S.setTargetOccupancy(MinOccupancy);
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< "Retrying function scheduling with lowest recorded occupancy "
|
||||
<< MinOccupancy << ".\n");
|
||||
|
||||
S.setTargetOccupancy(MinOccupancy);
|
||||
}
|
||||
}
|
||||
|
||||
if (Stage == UnclusteredReschedule)
|
||||
SavedMutations.swap(Mutations);
|
||||
|
||||
for (auto Region : Regions) {
|
||||
if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
|
||||
continue;
|
||||
|
||||
RegionBegin = Region.first;
|
||||
RegionEnd = Region.second;
|
||||
|
||||
|
@ -566,7 +597,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
|||
if (MBB) finishBlock();
|
||||
MBB = RegionBegin->getParent();
|
||||
startBlock(MBB);
|
||||
if (Stage == 1)
|
||||
if (Stage == InitialSchedule)
|
||||
computeBlockPressure(MBB);
|
||||
}
|
||||
|
||||
|
@ -594,5 +625,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
|||
}
|
||||
finishBlock();
|
||||
|
||||
} while (Stage < 2);
|
||||
if (Stage == UnclusteredReschedule)
|
||||
SavedMutations.swap(Mutations);
|
||||
} while (Stage != LastStage);
|
||||
}
|
||||
|
|
|
@ -64,6 +64,14 @@ public:
|
|||
|
||||
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
|
||||
|
||||
enum : unsigned {
|
||||
Collect,
|
||||
InitialSchedule,
|
||||
UnclusteredReschedule,
|
||||
ClusteredLowOccupancyReschedule,
|
||||
LastStage = ClusteredLowOccupancyReschedule
|
||||
};
|
||||
|
||||
const GCNSubtarget &ST;
|
||||
|
||||
SIMachineFunctionInfo &MFI;
|
||||
|
@ -84,6 +92,10 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
|
|||
SmallVector<std::pair<MachineBasicBlock::iterator,
|
||||
MachineBasicBlock::iterator>, 32> Regions;
|
||||
|
||||
// Records if a region is not yet scheduled, or schedule has been reverted,
|
||||
// or we generally desire to reschedule it.
|
||||
BitVector RescheduleRegions;
|
||||
|
||||
// Region live-in cache.
|
||||
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Interleave loads and stores to fit into 9 VGPR limit.
|
||||
; This requires to avoid load/store clustering.
|
||||
|
||||
; GCN: global_load_dwordx4
|
||||
; GCN: global_store_dwordx4
|
||||
; GCN: global_load_dwordx4
|
||||
; GCN: global_store_dwordx4
|
||||
; GCN: global_load_dwordx4
|
||||
; GCN: global_store_dwordx4
|
||||
; GCN: NumVgprs: {{[0-9]$}}
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
|
||||
define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1) #1 {
|
||||
bb:
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id
|
||||
%tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4
|
||||
%tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3
|
||||
%tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 4
|
||||
%tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 5
|
||||
%tmp6 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp5, align 4
|
||||
store <4 x i32> %tmp2, <4 x i32> addrspace(1)* %arg1, align 4
|
||||
%tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 3
|
||||
store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4
|
||||
%tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5
|
||||
store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { "amdgpu-num-vgpr"="9" }
|
Loading…
Reference in New Issue