AMDGPU: Disallow spill folding with m0 copies

readlane and writelane instructions are not allowed to use m0 as the
data operand, so spilling them is tricky and would require an
intermediate SGPR to spill it. Constrain the virtual register class in
this caes to disallow the inline spiller from folding the m0 operand
directly into the spill instruction.

I copied this hack from AArch64 which has the same problem for $sp.
This commit is contained in:
Matt Arsenault 2019-10-30 12:56:24 -07:00 committed by Matt Arsenault
parent 204a529cb0
commit d9e0a2942a
3 changed files with 100 additions and 0 deletions

View File

@ -1062,6 +1062,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling SGPRs.
@ -1190,6 +1191,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
@ -6558,3 +6560,36 @@ MachineInstr *SIInstrInfo::createPHISourceCopy(
}
bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
VirtRegMap *VRM) const {
// This is a bit of a hack (copied from AArch64). Consider this instruction:
//
// %0:sreg_32 = COPY $m0
//
// We explicitly chose SReg_32 for the virtual register so such a copy might
// be eliminated by RegisterCoalescer. However, that may not be possible, and
// %0 may even spill. We can't spill $m0 normally (it would require copying to
// a numbered SGPR anyway), and since it is in the SReg_32 register class,
// TargetInstrInfo::foldMemoryOperand() is going to try.
//
// To prevent that, constrain the %0 register class here.
if (MI.isFullCopy()) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
return nullptr;
}
if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
return nullptr;
}
}
return nullptr;
}

View File

@ -1027,6 +1027,13 @@ public:
}
void fixImplicitOperands(MachineInstr &MI) const;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt,
int FrameIndex,
LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const override;
};
/// \brief Returns true if a reg:subreg pair P has a TRC class

View File

@ -0,0 +1,58 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stress-regalloc=2 -start-before=greedy -stop-after=virtregmap -o - %s | FileCheck %s
# Test that a spill of a copy of m0 is not folded to be a spill of m0 directly.
---
name: merge_sgpr_spill_into_copy_from_m0
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $m0
; CHECK: $sgpr0 = S_MOV_B32 $m0
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
; CHECK: S_NOP 0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $m0
%0:sreg_32 = COPY $m0
S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
$m0 = COPY %0
S_SENDMSG 0, implicit $m0, implicit $exec
...
# Test that a reload into a copy of m0 is not folded to be a reload of m0 directly.
---
name: reload_sgpr_spill_into_copy_to_m0
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
; CHECK: S_NOP 0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $m0
S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
$m0 = COPY %0
S_SENDMSG 0, implicit $m0, implicit $exec
...