[MachineSink] Check block prologue interference

Sinking must check for interference between the block prologue
and the instruction being sunk.
Specifically check for clobbering of uses by the prologue, and
overwrites to prologue defined registers by the sunk instruction.

Reviewed By: rampitec, ruiling

Differential Revision: https://reviews.llvm.org/D121277
This commit is contained in:
Carl Ritson 2022-03-22 11:13:13 +09:00
parent 6a7f055117
commit 8e64d84995
2 changed files with 196 additions and 4 deletions

View File

@ -1294,6 +1294,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
return true;
}
/// Return true if a target defined block prologue instruction interferes
/// with a sink candidate.
static bool blockPrologueInterferes(MachineBasicBlock *BB,
MachineBasicBlock::iterator End,
MachineInstr &MI,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
const MachineRegisterInfo *MRI) {
if (BB->begin() == End)
return false; // no prologue
for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
// Only check target defined prologue instructions
if (!TII->isBasicBlockPrologue(*PI))
continue;
for (auto &MO : MI.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
if (Register::isPhysicalRegister(Reg) &&
(TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
continue;
if (PI->modifiesRegister(Reg, TRI))
return true;
} else {
if (PI->readsRegister(Reg, TRI))
return true;
// Check for interference with non-dead defs
auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
if (DefOp && !DefOp->isDead())
return true;
}
}
}
return false;
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@ -1407,6 +1446,10 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos =
SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
return false;
}
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
@ -1805,11 +1848,19 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
auto DbgValsToSink = DbgValsToSinkMap.takeVector();
LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
MachineBasicBlock::iterator InsertPos =
SuccBB->SkipPHIsAndLabels(SuccBB->begin());
if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
LLVM_DEBUG(
dbgs() << " *** Not sinking: prologue interference\n");
continue;
}
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos =
SuccBB->SkipPHIsAndLabels(SuccBB->begin());
performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);

View File

@ -5,6 +5,7 @@
# past block prologues which would overwrite their uses.
---
# Make sure COPY to $sgpr9 is not sunk after S_AND_SAVEEXEC_B64.
name: _amdgpu_ps_main
alignment: 1
tracksRegLiveness: true
@ -17,16 +18,16 @@ body: |
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: liveins: $sgpr4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
; GFX10-NEXT: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: liveins: $sgpr4:0x0000000000000003, $sgpr6, $sgpr0_sgpr1
; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.2
@ -70,3 +71,143 @@ body: |
S_ENDPGM 0
...
---
# Make sure COPY to $sgpr0_sgpr1 is not sunk after S_AND_SAVEEXEC_B64.
name: _amdgpu_ps_main2
alignment: 1
tracksRegLiveness: true
registers: []
liveins:
- { reg: '$sgpr4', virtual-reg: '' }
- { reg: '$sgpr6_sgpr7', virtual-reg: '' }
body: |
; GFX10-LABEL: name: _amdgpu_ps_main2
; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: liveins: $sgpr4, $sgpr6_sgpr7
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
; GFX10-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
; GFX10-NEXT: S_BRANCH %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: liveins: $sgpr6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6
; GFX10-NEXT: S_BRANCH %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $sgpr4, $sgpr6_sgpr7
renamable $sgpr9 = COPY $sgpr4
renamable $vgpr5 = IMPLICIT_DEF
renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
S_BRANCH %bb.1
bb.1:
successors: %bb.2(0x40000000), %bb.8(0x40000000)
liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
$sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
S_CBRANCH_EXECZ %bb.8, implicit $exec
S_BRANCH %bb.2
bb.2:
successors: %bb.8(0x40000000)
liveins: $sgpr6
$m0 = COPY killed renamable $sgpr6
S_BRANCH %bb.8
bb.8:
S_ENDPGM 0
...
---
# Make sure COPY to $sgpr2_sgpr3 is not sunk after S_AND_SAVEEXEC_B32.
name: _amdgpu_ps_main3
alignment: 1
tracksRegLiveness: true
registers: []
liveins:
- { reg: '$sgpr6_sgpr7', virtual-reg: '' }
- { reg: '$sgpr8', virtual-reg: '' }
body: |
; GFX10-LABEL: name: _amdgpu_ps_main3
; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: liveins: $sgpr6_sgpr7, $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
; GFX10-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
; GFX10-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
; GFX10-NEXT: S_BRANCH %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX10-NEXT: S_NOP 0, implicit $sgpr2_sgpr3
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: liveins: $sgpr6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6
; GFX10-NEXT: S_BRANCH %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $sgpr6_sgpr7, $sgpr8
renamable $vgpr5 = IMPLICIT_DEF
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
S_BRANCH %bb.1
bb.1:
successors: %bb.2(0x40000000), %bb.8(0x40000000)
liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
$sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
$sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
S_NOP 0, implicit $sgpr2_sgpr3
S_CBRANCH_EXECZ %bb.8, implicit $exec
S_BRANCH %bb.2
bb.2:
successors: %bb.8(0x40000000)
liveins: $sgpr6
$m0 = COPY killed renamable $sgpr6
S_BRANCH %bb.8
bb.8:
S_ENDPGM 0
...