AMDGPU: Remove SGPRSpillVGPRDefinedSet hack

These VGPRs should be reserved and therefore do not need "correct"
liveness. They should not have undef uses, which can still cause
issues.
This commit is contained in:
Matt Arsenault 2020-10-15 18:40:02 -04:00
parent c1d6d461aa
commit f333736757
8 changed files with 57 additions and 34 deletions

View File

@ -310,10 +310,13 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
std::unique_ptr<RegScavenger> RS;
bool NewReservedRegs = false;
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
// handled as SpilledToReg in regular PrologEpilogInserter.
if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
SpillVGPRToAGPR) {
const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
(HasCSRs || FuncInfo->hasSpilledSGPRs());
if (HasSGPRSpillToVGPR || SpillVGPRToAGPR) {
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
// are spilled to VGPRs, in which case we can eliminate the stack usage.
//
@ -338,6 +341,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
NewReservedRegs = true;
if (!RS)
RS.reset(new RegScavenger());
@ -354,6 +358,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
NewReservedRegs = true;
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
(void)Spilled;
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
@ -382,5 +387,9 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
SaveBlocks.clear();
RestoreBlocks.clear();
// Updated the reserved registers with any VGPRs added for SGPR spills.
if (NewReservedRegs)
MRI.freezeReservedRegs(MF);
return MadeChange;
}

View File

@ -1150,7 +1150,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DenseSet<Register> SGPRSpillVGPRDefinedSet; // FIXME: This should be removed
ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
= MFI->getSGPRToVGPRSpills(Index);
@ -1186,20 +1185,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool UseKill = IsKill && i == NumSubRegs - 1;
// During SGPR spilling to VGPR, determine if the VGPR is defined. The
// only circumstance in which we say it is undefined is when it is the
// first spill to this VGPR in the first basic block.
bool VGPRDefined = true;
if (MBB == &MF->front())
VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
auto MIB =
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
.addReg(Spill.VGPR);
if (i == 0 && NumSubRegs > 1) {
// We may be spilling a super-register which is only partially defined,

View File

@ -14,10 +14,10 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $exec_lo
; CHECK: $sgpr0 = S_MOV_B32 $exec_lo
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec_lo
@ -38,10 +38,10 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $exec_hi
; CHECK: $sgpr0 = S_MOV_B32 $exec_hi
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec_hi
@ -62,13 +62,13 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $exec
; CHECK: $sgpr0_sgpr1 = S_MOV_B64 $exec
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec
@ -91,10 +91,10 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo
@ -113,10 +113,10 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi
@ -135,13 +135,13 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec

View File

@ -15,10 +15,10 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $m0
; CHECK: $sgpr0 = S_MOV_B32 $m0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
; CHECK: S_NOP 0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
@ -43,10 +43,10 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
; CHECK: S_NOP 0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec

View File

@ -21,7 +21,7 @@ body: |
; CHECK-LABEL: name: sgpr_spill_s64_undef_high32
; CHECK: liveins: $sgpr4, $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
@ -45,7 +45,7 @@ body: |
; CHECK-LABEL: name: sgpr_spill_s64_undef_low32
; CHECK: liveins: $sgpr5, $vgpr0
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)

View File

@ -22,7 +22,7 @@ body: |
; GCN-LABEL: name: spill_sgpr128_use_subreg
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: renamable $sgpr1 = COPY $sgpr2
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@ -52,7 +52,7 @@ body: |
; GCN-LABEL: name: spill_sgpr128_use_kill
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: renamable $sgpr1 = COPY $sgpr2
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3

View File

@ -30,7 +30,7 @@ body: |
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: liveins: $vgpr0
; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9

View File

@ -124,3 +124,25 @@ body: |
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
---
name: spill_v128_kill
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-LABEL: name: spill_v128_kill
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, addrspace 5)
...