AMDGPU: Insert skip branches over return blocks

SIInsertSkips really doesn't understand the control flow, and makes
very stupid assumptions about the block layout. This was able to get
away with not skipping return blocks, since usually after
structurization there is only one placed at the end of the
function. Tail duplication can break this assumption.

llvm-svn: 362754
This commit is contained in:
Matt Arsenault 2019-06-06 22:51:51 +00:00
parent b82ea52b78
commit b6cfa129cc
3 changed files with 198 additions and 3 deletions

View File

@ -109,9 +109,6 @@ static bool opcodeEmitsNoInsts(unsigned Opc) {
bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
if (From.succ_empty())
return false;
unsigned NumInstr = 0;
const MachineFunction *MF = From.getParent();

View File

@ -2479,6 +2479,10 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
if (MI.mayStore() && isSMRD(MI))
return true; // scalar store or atomic
// This will terminate the function when other lanes may need to continue.
if (MI.isReturn())
return true;
// These instructions cause shader I/O that may cause hardware lockups
// when executed with an empty EXEC mask.
//

View File

@ -0,0 +1,194 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
---
name: skip_branch_taildup_endpgm
machineFunctionInfo:
isEntryFunction: true
body: |
; CHECK-LABEL: name: skip_branch_taildup_endpgm
; CHECK: bb.0:
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
; CHECK: S_WAITCNT 127
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
; CHECK: renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
; CHECK: S_WAITCNT 112
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: S_BRANCH %bb.3
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: S_BRANCH %bb.4
; CHECK: bb.2:
; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
; CHECK: S_ENDPGM 0
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.4:
; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
; CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7
renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
S_WAITCNT 127
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
S_WAITCNT 112
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
SI_MASK_BRANCH %bb.2, implicit $exec
S_BRANCH %bb.1
bb.2:
successors: %bb.3, %bb.4
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
liveins: $sgpr2_sgpr3
$exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
S_ENDPGM 0
bb.1:
successors: %bb.3, %bb.4
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
$exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
S_ENDPGM 0
...
---
name: skip_branch_taildup_ret
body: |
; CHECK-LABEL: name: skip_branch_taildup_ret
; CHECK: bb.0:
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK: S_WAITCNT 0
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: S_BRANCH %bb.3
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: S_BRANCH %bb.4
; CHECK: bb.2:
; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
; CHECK: S_SETPC_B64_return $sgpr30_sgpr31
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.4:
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
; CHECK: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
; CHECK: S_SETPC_B64_return $sgpr30_sgpr31
bb.0:
successors: %bb.1, %bb.2
liveins: $vgpr0, $sgpr30_sgpr31, $vgpr1_vgpr2
S_WAITCNT 0
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
SI_MASK_BRANCH %bb.2, implicit $exec
S_BRANCH %bb.1
bb.2:
successors: %bb.3, %bb.4
liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
liveins: $sgpr6_sgpr7, $sgpr30_sgpr31
$exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31
bb.1:
successors: %bb.3, %bb.4
liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
SI_MASK_BRANCH %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
$exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31
...