From 6e1770821fbd05bd5180530aca17e1455d1c29d8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 27 Dec 2019 13:11:06 -0500 Subject: [PATCH] AMDGPU: Fix SI_IF lowering when the save exec reg has terminator uses Reverts part of 6524a7a2b9ca072bd7f7b4355d1230e70c679d2f. Since that commit, the expansion was ignoring the actual save exec register produced by the instruction, and looking at other instructions. I do not understand why it was looking at other instructions, but relying on this scan was wrong. Fixes verifier errors after SI_IF is tail duplicated, which should be correct to do. The results were fed into a phi, which was lowered to the S_MOV_B64_term instructions. --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 26 +------ .../AMDGPU/si-if-lower-user-terminators.mir | 75 +++++++++++++++++++ 2 files changed, 78 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 61d2719a3aad..06173551e92e 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -98,8 +98,6 @@ private: void emitLoop(MachineInstr &MI); void emitEndCf(MachineInstr &MI); - Register getSaveExec(MachineInstr* MI); - void findMaskOperands(MachineInstr &MI, unsigned OpNo, SmallVectorImpl &Src) const; @@ -177,29 +175,11 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, return true; } -Register SILowerControlFlow::getSaveExec(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - MachineOperand &SaveExec = MI->getOperand(0); - assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister); - - Register SaveExecReg = SaveExec.getReg(); - unsigned FalseTermOpc = - TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term; - MachineBasicBlock::iterator I = (MI); - MachineBasicBlock::iterator J = std::next(I); - if (J != MBB->end() && J->getOpcode() == FalseTermOpc && - J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) { - SaveExecReg = J->getOperand(0).getReg(); - J->eraseFromParent(); - } - return SaveExecReg; -} - void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); - Register SaveExecReg = getSaveExec(&MI); + Register SaveExecReg = MI.getOperand(0).getReg(); MachineOperand& Cond = MI.getOperand(1); assert(Cond.getSubReg() == AMDGPU::NoSubRegister); @@ -282,7 +262,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - Register DstReg = getSaveExec(&MI); + Register DstReg = MI.getOperand(0).getReg(); bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); @@ -354,7 +334,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - auto Dst = getSaveExec(&MI); + auto Dst = MI.getOperand(0).getReg(); // Skip ANDing with exec if the break condition is already masked by exec // because it is a V_CMP in the same basic block. (We know the break diff --git a/llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir b/llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir new file mode 100644 index 000000000000..5850a3b27bce --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir @@ -0,0 +1,75 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-lower-control-flow -verify-machineinstrs -o - %s | FileCheck %s + +# The save exec result register of SI_IF is used by other terminators +# inserted to behave as a lowered phi. The output register of SI_IF +# was ignored, and the def was removed, so the S_MOV_B64_term uses +# would fail the verifier. + +--- +name: si_if_use +alignment: 1 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: si_if_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] + ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]] + ; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc + ; CHECK: S_SLEEP 1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc + ; CHECK: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec + ; CHECK: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec + ; CHECK: S_BRANCH %bb.2 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec + %10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec + %13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec + S_BRANCH %bb.2 + + bb.1: + %11:sreg_64_xexec = COPY %13 + dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) + %14:sreg_64_xexec = COPY %11 + + bb.2: + %12:sreg_64_xexec = COPY %14 + SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_SLEEP 1 + %9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec + %13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec + S_BRANCH %bb.2 + +...