forked from OSchip/llvm-project
AMDGPU: Fix SI_IF lowering when the save exec reg has terminator uses
Reverts part of 6524a7a2b9
. Since that
commit, the expansion was ignoring the actual save exec register
produced by the instruction, and looking at other instructions. I do
not understand why it was looking at other instructions, but relying
on this scan was wrong.
Fixes verifier errors after SI_IF is tail duplicated, which should be
correct to do. The results were fed into a phi, which was lowered to
the S_MOV_B64_term instructions.
This commit is contained in:
parent
29e646fe65
commit
6e1770821f
|
@ -98,8 +98,6 @@ private:
|
|||
void emitLoop(MachineInstr &MI);
|
||||
void emitEndCf(MachineInstr &MI);
|
||||
|
||||
Register getSaveExec(MachineInstr* MI);
|
||||
|
||||
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MachineOperand> &Src) const;
|
||||
|
||||
|
@ -177,29 +175,11 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
|
|||
return true;
|
||||
}
|
||||
|
||||
Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineOperand &SaveExec = MI->getOperand(0);
|
||||
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
|
||||
|
||||
Register SaveExecReg = SaveExec.getReg();
|
||||
unsigned FalseTermOpc =
|
||||
TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
|
||||
MachineBasicBlock::iterator I = (MI);
|
||||
MachineBasicBlock::iterator J = std::next(I);
|
||||
if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
|
||||
J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
|
||||
SaveExecReg = J->getOperand(0).getReg();
|
||||
J->eraseFromParent();
|
||||
}
|
||||
return SaveExecReg;
|
||||
}
|
||||
|
||||
void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
MachineBasicBlock::iterator I(&MI);
|
||||
Register SaveExecReg = getSaveExec(&MI);
|
||||
Register SaveExecReg = MI.getOperand(0).getReg();
|
||||
MachineOperand& Cond = MI.getOperand(1);
|
||||
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
|
||||
|
||||
|
@ -282,7 +262,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
|||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
Register DstReg = getSaveExec(&MI);
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
|
||||
bool ExecModified = MI.getOperand(3).getImm() != 0;
|
||||
MachineBasicBlock::iterator Start = MBB.begin();
|
||||
|
@ -354,7 +334,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
|||
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
auto Dst = getSaveExec(&MI);
|
||||
auto Dst = MI.getOperand(0).getReg();
|
||||
|
||||
// Skip ANDing with exec if the break condition is already masked by exec
|
||||
// because it is a V_CMP in the same basic block. (We know the break
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-lower-control-flow -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
# The save exec result register of SI_IF is used by other terminators
|
||||
# inserted to behave as a lowered phi. The output register of SI_IF
|
||||
# was ignored, and the def was removed, so the S_MOV_B64_term uses
|
||||
# would fail the verifier.
|
||||
|
||||
---
|
||||
name: si_if_use
|
||||
alignment: 1
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
selected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: si_if_use
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
|
||||
; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]]
|
||||
; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
|
||||
; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
|
||||
; CHECK: S_SLEEP 1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc
|
||||
; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
|
||||
; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
|
||||
%10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
|
||||
%13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
%11:sreg_64_xexec = COPY %13
|
||||
dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
|
||||
%14:sreg_64_xexec = COPY %11
|
||||
|
||||
bb.2:
|
||||
%12:sreg_64_xexec = COPY %14
|
||||
SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
S_SLEEP 1
|
||||
%9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
|
||||
%13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
...
|
Loading…
Reference in New Issue