RegAllocFast: Make self loop live-out heuristic more aggressive

This currently has no impact on code, but prevents sizeable code size
regressions after D52010. This prevents spilling and reloading all
values inside blocks that loop back. Add a baseline test which would
regress without this patch.
This commit is contained in:
Matt Arsenault 2020-08-31 15:09:50 -04:00
parent e47d2927de
commit 738c73a454
2 changed files with 218 additions and 4 deletions

View File

@ -263,6 +263,20 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
return FrameIdx;
}
static bool dominates(MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator A,
MachineBasicBlock::const_iterator B) {
auto MBBEnd = MBB.end();
if (B == MBBEnd)
return true;
MachineBasicBlock::const_iterator I = MBB.begin();
for (; &*I != A && &*I != B; ++I)
;
return &*I == A;
}
/// Returns false if \p VirtReg is known to not live out of the current block.
bool RegAllocFast::mayLiveOut(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
@ -270,11 +284,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
return !MBB->succ_empty();
}
// If this block loops back to itself, it would be necessary to check whether
// the use comes after the def.
const MachineInstr *SelfLoopDef = nullptr;
// If this block loops back to itself, it is necessary to check whether the
// use comes after the def.
if (MBB->isSuccessor(MBB)) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
if (!SelfLoopDef) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
}
// See if the first \p Limit uses of the register are all in the current
@ -287,6 +306,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
if (SelfLoopDef) {
// Try to handle some simple cases to avoid spilling and reloading every
// value inside a self looping block.
if (SelfLoopDef == &UseInst ||
!dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
}
}
return false;

View File

@ -0,0 +1,185 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GCN %s
---
name: self_loop_single_def_use
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; GCN-LABEL: name: self_loop_single_def_use
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0_vgpr1
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
S_ENDPGM 0
...
---
name: self_loop_multi_def
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; GCN-LABEL: name: self_loop_multi_def
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0_vgpr1
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
S_ENDPGM 0
...
# There's a single def inside the self loop, but it's also a use.
---
name: self_loop_def_use_same_inst
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; GCN-LABEL: name: self_loop_def_use_same_inst
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0_vgpr1
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
%1:vgpr_32 = V_ADD_U32_e32 1, undef %1, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
S_ENDPGM 0
...
---
name: self_loop_def_after_use
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; GCN-LABEL: name: self_loop_def_after_use
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0_vgpr1
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec
; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
GLOBAL_STORE_DWORD %0, undef %1, 0, 0, 0, 0, implicit $exec
%1:vgpr_32 = V_ADD_U32_e64 1, 1, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
S_ENDPGM 0
...
---
name: self_loop_single_subreg_def_use
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; GCN-LABEL: name: self_loop_single_subreg_def_use
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0_vgpr1
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec
; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5)
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
S_ENDPGM 0
...