AMDGPU: Fix incorrectly deleting copies after spilling SGPR tuples

The implicit def of the super register would appear to kill any live
uses of components before the spill, and would be deleted by
MachineCopyPropagation. We need to add implicit uses of the super
register, similarly to what copyPhysReg does. VGPR tuples appear to be
correctly handled already. I need to double check the SGPR->memory
path.
This commit is contained in:
Matt Arsenault 2020-08-28 14:58:33 -04:00
parent 7128e64706
commit 9145d75226
3 changed files with 132 additions and 8 deletions

View File

@ -999,7 +999,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DenseSet<Register> SGPRSpillVGPRDefinedSet;
DenseSet<Register> SGPRSpillVGPRDefinedSet; // FIXME: This should be removed
ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
= MFI->getSGPRToVGPRSpills(Index);
@ -1032,6 +1032,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
bool UseKill = IsKill && i == NumSubRegs - 1;
// During SGPR spilling to VGPR, determine if the VGPR is defined. The
// only circumstance in which we say it is undefined is when it is the
// first spill to this VGPR in the first basic block.
@ -1044,7 +1046,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
auto MIB = BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
.addReg(SubReg, getKillRegState(IsKill))
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
@ -1054,6 +1056,9 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MIB.addReg(SuperReg, RegState::ImplicitDefine);
}
if (NumSubRegs > 1)
MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit);
// FIXME: Since this spills to another register instead of an actual
// frame index, we should delete the frame index when all references to
// it are fixed.

View File

@ -0,0 +1,119 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-lower-sgpr-spills,prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# Make sure the initial first $sgpr1 = COPY $sgpr2 copy is not deleted
# by the copy propagation after lowering the spill.
---
name: spill_sgpr128_use_subreg
tracksRegLiveness: true
machineFunctionInfo:
hasSpilledSGPRs: true
scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 }
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN-LABEL: name: spill_sgpr128_use_subreg
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: renamable $sgpr1 = COPY $sgpr2
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1
; GCN: S_ENDPGM 0, implicit $sgpr8
renamable $sgpr1 = COPY $sgpr2
SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5)
renamable $sgpr8 = COPY killed renamable $sgpr1
S_ENDPGM 0, implicit $sgpr8
...
---
name: spill_sgpr128_use_kill
tracksRegLiveness: true
machineFunctionInfo:
hasSpilledSGPRs: true
scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 }
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN-LABEL: name: spill_sgpr128_use_kill
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: renamable $sgpr1 = COPY $sgpr2
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: S_ENDPGM 0
renamable $sgpr1 = COPY $sgpr2
SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: spill_vgpr128_use_subreg
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN-LABEL: name: spill_vgpr128_use_subreg
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN: renamable $vgpr1 = COPY $vgpr2
; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; GCN: renamable $vgpr8 = COPY $vgpr2
; GCN: S_ENDPGM 0, implicit $vgpr8
renamable $vgpr1 = COPY $vgpr2
SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
renamable $vgpr8 = COPY killed renamable $vgpr1
S_ENDPGM 0, implicit $vgpr8
...
---
name: spill_vgpr128_use_kill
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN-LABEL: name: spill_vgpr128_use_kill
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN: renamable $vgpr1 = COPY $vgpr2
; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; GCN: S_ENDPGM 0
renamable $vgpr1 = COPY $vgpr2
SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...

View File

@ -30,12 +30,12 @@ body: |
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: liveins: $vgpr0
; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr4, 0, undef $vgpr0
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr5, 1, $vgpr0
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr6, 2, $vgpr0
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr7, 3, $vgpr0
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr8, 4, $vgpr0
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED: bb.1:
; EXPANDED: successors: %bb.2(0x80000000)