forked from OSchip/llvm-project
[AMDGPU] Fix missed SI_RETURN_TO_EPILOG in pre-emit peephole
SIPreEmitPeephole does not process all terminators, which means it can fail to handle SI_RETURN_TO_EPILOG if immediately preceeded by a branch to the early exit block. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D85872
This commit is contained in:
parent
c58f1fe2ae
commit
d538c5837a
|
@ -266,16 +266,24 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
|
||||
if (MBBE != MBB.end()) {
|
||||
MachineInstr &MI = *MBBE;
|
||||
MachineBasicBlock::iterator TermI = MBBE;
|
||||
// Check first terminator for VCC branches to optimize
|
||||
if (TermI != MBB.end()) {
|
||||
MachineInstr &MI = *TermI;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::S_CBRANCH_VCCZ:
|
||||
case AMDGPU::S_CBRANCH_VCCNZ:
|
||||
Changed |= optimizeVccBranch(MI);
|
||||
continue;
|
||||
case AMDGPU::SI_RETURN_TO_EPILOG:
|
||||
// FIXME: This is not an optimization and should be
|
||||
// moved somewhere else.
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Check all terminators for SI_RETURN_TO_EPILOG
|
||||
// FIXME: This is not an optimization and should be moved somewhere else.
|
||||
while (TermI != MBB.end()) {
|
||||
MachineInstr &MI = *TermI;
|
||||
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
|
||||
assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
|
||||
|
||||
// Graphics shaders returning non-void shouldn't contain S_ENDPGM,
|
||||
|
@ -293,11 +301,11 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
|
|||
.addMBB(EmptyMBBAtEnd);
|
||||
MI.eraseFromParent();
|
||||
MBBE = MBB.getFirstTerminator();
|
||||
TermI = MBBE;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
TermI++;
|
||||
}
|
||||
|
||||
if (!ST.hasVGPRIndexMode())
|
||||
|
|
|
@ -115,14 +115,15 @@ define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(floa
|
|||
; GCN: liveins: $sgpr0_sgpr1
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GCN: bb.6.end:
|
||||
; GCN: successors: %bb.7(0x80000000)
|
||||
; GCN: successors: %bb.7(0x40000000), %bb.8(0x40000000)
|
||||
; GCN: liveins: $sgpr0_sgpr1
|
||||
; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
|
||||
; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
|
||||
; GCN: SI_RETURN_TO_EPILOG undef $vgpr0, undef $vgpr1, undef $vgpr2, undef $vgpr3
|
||||
; GCN: S_BRANCH %bb.8
|
||||
; GCN: bb.7:
|
||||
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.8:
|
||||
entry:
|
||||
%.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val
|
||||
%cmp0 = fcmp olt float %.i0, 0.000000e+00
|
||||
|
|
Loading…
Reference in New Issue