AMDGPU: Fix SI_IF lowering when the save exec reg has terminator uses

Reverts part of 6524a7a2b9. Since that
commit, the expansion was ignoring the actual save exec register
produced by the instruction, and looking at other instructions. I do
not understand why it was looking at other instructions, but relying
on this scan was wrong.

Fixes verifier errors after SI_IF is tail duplicated, which should be
correct to do. The results were fed into a phi, which was lowered to
the S_MOV_B64_term instructions.
This commit is contained in:
Matt Arsenault 2019-12-27 13:11:06 -05:00 committed by Matt Arsenault
parent 29e646fe65
commit 6e1770821f
2 changed files with 78 additions and 23 deletions

View File

@ -98,8 +98,6 @@ private:
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
Register getSaveExec(MachineInstr* MI);
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
@ -177,29 +175,11 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
return true;
}
Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
MachineBasicBlock *MBB = MI->getParent();
MachineOperand &SaveExec = MI->getOperand(0);
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
Register SaveExecReg = SaveExec.getReg();
unsigned FalseTermOpc =
TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
MachineBasicBlock::iterator I = (MI);
MachineBasicBlock::iterator J = std::next(I);
if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
SaveExecReg = J->getOperand(0).getReg();
J->eraseFromParent();
}
return SaveExecReg;
}
void SILowerControlFlow::emitIf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
Register SaveExecReg = getSaveExec(&MI);
Register SaveExecReg = MI.getOperand(0).getReg();
MachineOperand& Cond = MI.getOperand(1);
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
@ -282,7 +262,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
Register DstReg = getSaveExec(&MI);
Register DstReg = MI.getOperand(0).getReg();
bool ExecModified = MI.getOperand(3).getImm() != 0;
MachineBasicBlock::iterator Start = MBB.begin();
@ -354,7 +334,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
auto Dst = getSaveExec(&MI);
auto Dst = MI.getOperand(0).getReg();
// Skip ANDing with exec if the break condition is already masked by exec
// because it is a V_CMP in the same basic block. (We know the break

View File

@ -0,0 +1,75 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-lower-control-flow -verify-machineinstrs -o - %s | FileCheck %s
# The save exec result register of SI_IF is used by other terminators
# inserted to behave as a lowered phi. The output register of SI_IF
# was ignored, and the def was removed, so the S_MOV_B64_term uses
# would fail the verifier.
---
name: si_if_use
alignment: 1
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: si_if_use
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; CHECK: S_BRANCH %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]]
; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
; CHECK: bb.2:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
; CHECK: S_SLEEP 1
; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc
; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
; CHECK: S_BRANCH %bb.2
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
%10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
%13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
S_BRANCH %bb.2
bb.1:
%11:sreg_64_xexec = COPY %13
dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
%14:sreg_64_xexec = COPY %11
bb.2:
%12:sreg_64_xexec = COPY %14
SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec
S_SLEEP 1
%9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
%13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
S_BRANCH %bb.2
...