forked from OSchip/llvm-project
RegAllocFast: Make self loop live-out heuristic more aggressive
This currently has no impact on code, but prevents sizeable code size regressions after D52010. This prevents spilling and reloading all values inside blocks that loop back. Add a baseline test which would regress without this patch.
This commit is contained in:
parent
e47d2927de
commit
738c73a454
|
@ -263,6 +263,20 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
|
|||
return FrameIdx;
|
||||
}
|
||||
|
||||
static bool dominates(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::const_iterator A,
|
||||
MachineBasicBlock::const_iterator B) {
|
||||
auto MBBEnd = MBB.end();
|
||||
if (B == MBBEnd)
|
||||
return true;
|
||||
|
||||
MachineBasicBlock::const_iterator I = MBB.begin();
|
||||
for (; &*I != A && &*I != B; ++I)
|
||||
;
|
||||
|
||||
return &*I == A;
|
||||
}
|
||||
|
||||
/// Returns false if \p VirtReg is known to not live out of the current block.
|
||||
bool RegAllocFast::mayLiveOut(Register VirtReg) {
|
||||
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
|
||||
|
@ -270,11 +284,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
|
|||
return !MBB->succ_empty();
|
||||
}
|
||||
|
||||
// If this block loops back to itself, it would be necessary to check whether
|
||||
// the use comes after the def.
|
||||
const MachineInstr *SelfLoopDef = nullptr;
|
||||
|
||||
// If this block loops back to itself, it is necessary to check whether the
|
||||
// use comes after the def.
|
||||
if (MBB->isSuccessor(MBB)) {
|
||||
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
|
||||
return true;
|
||||
SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
|
||||
if (!SelfLoopDef) {
|
||||
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// See if the first \p Limit uses of the register are all in the current
|
||||
|
@ -287,6 +306,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
|
|||
// Cannot be live-out if there are no successors.
|
||||
return !MBB->succ_empty();
|
||||
}
|
||||
|
||||
if (SelfLoopDef) {
|
||||
// Try to handle some simple cases to avoid spilling and reloading every
|
||||
// value inside a self looping block.
|
||||
if (SelfLoopDef == &UseInst ||
|
||||
!dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
|
||||
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: self_loop_single_def_use
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
; GCN-LABEL: name: self_loop_single_def_use
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
|
||||
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: self_loop_multi_def
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
; GCN-LABEL: name: self_loop_multi_def
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
|
||||
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# There's a single def inside the self loop, but it's also a use.
|
||||
|
||||
---
|
||||
name: self_loop_def_use_same_inst
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
; GCN-LABEL: name: self_loop_def_use_same_inst
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
|
||||
; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
|
||||
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:vgpr_32 = V_ADD_U32_e32 1, undef %1, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: self_loop_def_after_use
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
; GCN-LABEL: name: self_loop_def_after_use
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
|
||||
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec
|
||||
; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
GLOBAL_STORE_DWORD %0, undef %1, 0, 0, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = V_ADD_U32_e64 1, 1, 0, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: self_loop_single_subreg_def_use
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
; GCN-LABEL: name: self_loop_single_subreg_def_use
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
|
||||
; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3
|
||||
; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5)
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, 0, 0, implicit $exec
|
||||
S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
Loading…
Reference in New Issue