[AMDGPU] Fix the dead frame indices during custom spill lowering.

AMDGPU target tries to handle the SGPR and VGPR spills in a
custom pass before the actual frame lowering pass. Once they
are handled and the respective frames are eliminated in the
custom pass, certain uses of them still remain. For instance,
the DBG_VALUE instructions inserted by the allocator alongside
the spill instruction will use the corresponding frame index.
They become dead later during PEI and causes a crash while trying to
replace the frame indices. We should possibly avoid this custom pass.
For now, replacing such dead references with null register value.

Reviewed By: arsenm, scott.linder

Differential Revision: https://reviews.llvm.org/D98038
This commit is contained in:
Christudasan Devadasan 2021-03-05 13:29:14 +05:30 committed by cdevadas
parent 55ae279ba7
commit 24c0ad7143
3 changed files with 129 additions and 0 deletions

View File

@ -338,6 +338,9 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
lowerShiftReservedVGPR(MF, ST);
// To track the spill frame indices handled in this pass.
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator Next;
for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
@ -361,6 +364,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// FIXME: change to enterBasicBlockEnd()
RS->enterBasicBlock(MBB);
TRI->eliminateFrameIndex(MI, 0, FIOp, RS.get());
SpillFIs.set(FI);
continue;
}
}
@ -375,6 +379,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
(void)Spilled;
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
SpillFIs.set(FI);
}
}
}
@ -390,6 +395,18 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
MBB.addLiveIn(Reg);
MBB.sortUniqueLiveIns();
// FIXME: The dead frame indices are replaced with a null register from
// the debug value instructions. We should instead, update it with the
// correct register value. But not sure the register value alone is
// adequate to lower the DIExpression. It should be worked out later.
for (MachineInstr &MI : MBB) {
if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
SpillFIs[MI.getOperand(0).getIndex()]) {
MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
MI.getOperand(0).setIsDebug();
}
}
}
MadeChange = true;

View File

@ -0,0 +1,56 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
# After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, replace the dead frame index in the DBG_VALUE instruction with reg 0.
# Otherwise, the test would crash during PEI while trying to replace the dead frame index.
--- |
define amdgpu_kernel void @test() { ret void }
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4)
!1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6)
!2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !5)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !{!1}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 32)
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DIExpression()
!9 = !DILocation(line: 10, column: 9, scope: !2)
...
---
name: test
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
maxKernArgAlign: 4
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
hasSpilledSGPRs: true
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
dispatchPtr: { reg: '$sgpr4_sgpr5' }
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
workGroupIDX: { reg: '$sgpr8' }
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
body: |
; CHECK-LABEL: name: test
; CHECK: bb.0:
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, $vgpr0
; CHECK: DBG_VALUE $noreg, 0
; CHECK: bb.1:
; CHECK: $sgpr10 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_ENDPGM 0
bb.0:
renamable $sgpr10 = IMPLICIT_DEF
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
DBG_VALUE %stack.0, 0, !1, !8, debug-location !9
bb.1:
renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
S_ENDPGM 0

View File

@ -0,0 +1,56 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-vgpr-to-agpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
# After handling the VGPR spill to AGPR copy in SILowerSGPRSpills pass, replace the dead frame index in the DBG_VALUE instruction with reg 0.
# Otherwise, the test would crash during PEI while trying to replace the dead frame index.
--- |
define amdgpu_kernel void @test() { ret void }
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4)
!1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6)
!2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !5)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !{!1}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 32)
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DIExpression()
!9 = !DILocation(line: 10, column: 9, scope: !2)
...
---
name: test
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
machineFunctionInfo:
maxKernArgAlign: 4
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
hasSpilledVGPRs: true
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
dispatchPtr: { reg: '$sgpr4_sgpr5' }
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
workGroupIDX: { reg: '$sgpr8' }
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
body: |
; CHECK-LABEL: name: test
; CHECK: bb.0:
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec
; CHECK: DBG_VALUE $noreg, 0
; CHECK: bb.1:
; CHECK: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
$vgpr2 = IMPLICIT_DEF
SI_SPILL_V32_SAVE $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, align 4, addrspace 5)
DBG_VALUE %stack.0, 0, !1, !8, debug-location !9
bb.1:
renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0