forked from OSchip/llvm-project
AMDGPU: Disallow spill folding with m0 copies
readlane and writelane instructions are not allowed to use m0 as the data operand, so spilling them is tricky and would require an intermediate SGPR to spill it. Constrain the virtual register class in this caes to disallow the inline spiller from folding the m0 operand directly into the spill instruction. I copied this hack from AArch64 which has the same problem for $sp.
This commit is contained in:
parent
204a529cb0
commit
d9e0a2942a
|
@ -1062,6 +1062,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
|
||||
if (RI.isSGPRClass(RC)) {
|
||||
MFI->setHasSpilledSGPRs();
|
||||
assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
|
||||
|
||||
// We are only allowed to create one new instruction when spilling
|
||||
// registers, so we need to use pseudo instruction for spilling SGPRs.
|
||||
|
@ -1190,6 +1191,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
|
||||
if (RI.isSGPRClass(RC)) {
|
||||
MFI->setHasSpilledSGPRs();
|
||||
assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
|
||||
|
||||
// FIXME: Maybe this should not include a memoperand because it will be
|
||||
// lowered to non-memory instructions.
|
||||
|
@ -6558,3 +6560,36 @@ MachineInstr *SIInstrInfo::createPHISourceCopy(
|
|||
}
|
||||
|
||||
bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
|
||||
|
||||
MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
|
||||
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
|
||||
MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
|
||||
VirtRegMap *VRM) const {
|
||||
// This is a bit of a hack (copied from AArch64). Consider this instruction:
|
||||
//
|
||||
// %0:sreg_32 = COPY $m0
|
||||
//
|
||||
// We explicitly chose SReg_32 for the virtual register so such a copy might
|
||||
// be eliminated by RegisterCoalescer. However, that may not be possible, and
|
||||
// %0 may even spill. We can't spill $m0 normally (it would require copying to
|
||||
// a numbered SGPR anyway), and since it is in the SReg_32 register class,
|
||||
// TargetInstrInfo::foldMemoryOperand() is going to try.
|
||||
//
|
||||
// To prevent that, constrain the %0 register class here.
|
||||
if (MI.isFullCopy()) {
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
|
||||
if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
|
||||
MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
|
||||
MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -1027,6 +1027,13 @@ public:
|
|||
}
|
||||
|
||||
void fixImplicitOperands(MachineInstr &MI) const;
|
||||
|
||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
|
||||
ArrayRef<unsigned> Ops,
|
||||
MachineBasicBlock::iterator InsertPt,
|
||||
int FrameIndex,
|
||||
LiveIntervals *LIS = nullptr,
|
||||
VirtRegMap *VRM = nullptr) const override;
|
||||
};
|
||||
|
||||
/// \brief Returns true if a reg:subreg pair P has a TRC class
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stress-regalloc=2 -start-before=greedy -stop-after=virtregmap -o - %s | FileCheck %s
|
||||
|
||||
# Test that a spill of a copy of m0 is not folded to be a spill of m0 directly.
|
||||
|
||||
---
|
||||
|
||||
name: merge_sgpr_spill_into_copy_from_m0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def $m0
|
||||
; CHECK: $sgpr0 = S_MOV_B32 $m0
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
|
||||
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
|
||||
; CHECK: S_NOP 0
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def $m0
|
||||
%0:sreg_32 = COPY $m0
|
||||
S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
|
||||
$m0 = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# Test that a reload into a copy of m0 is not folded to be a reload of m0 directly.
|
||||
|
||||
---
|
||||
|
||||
name: reload_sgpr_spill_into_copy_to_m0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_WAITCNT 0
|
||||
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
|
||||
; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
|
||||
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
|
||||
; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
|
||||
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
|
||||
; CHECK: S_NOP 0
|
||||
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $m0
|
||||
S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
|
||||
$m0 = COPY %0
|
||||
S_SENDMSG 0, implicit $m0, implicit $exec
|
||||
|
||||
...
|
Loading…
Reference in New Issue