forked from OSchip/llvm-project
R600/SI: Fix bug in VGPR spilling
AMDGPU::SI_SPILL_V96_RESTORE was missing from a switch statement, which caused the srsrc and soffset register to not be set correctly. This commit replaces the switch statement with a SITargetInfo query to make sure all spill instructions are covered. Differential Revision: http://reviews.llvm.org/D9582 llvm-svn: 237164
This commit is contained in:
parent
9690fcf12e
commit
a77c3f7010
|
@ -36,7 +36,8 @@ enum {
|
|||
DS = 1 << 17,
|
||||
MIMG = 1 << 18,
|
||||
FLAT = 1 << 19,
|
||||
WQM = 1 << 20
|
||||
WQM = 1 << 20,
|
||||
VGPRSpill = 1 << 21
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
field bits<1> MIMG = 0;
|
||||
field bits<1> FLAT = 0;
|
||||
field bits<1> WQM = 0;
|
||||
field bits<1> VGPRSpill = 0;
|
||||
|
||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||
let TSFlags{0} = VM_CNT;
|
||||
|
@ -66,6 +67,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
let TSFlags{18} = MIMG;
|
||||
let TSFlags{19} = FLAT;
|
||||
let TSFlags{20} = WQM;
|
||||
let TSFlags{21} = VGPRSpill;
|
||||
|
||||
// Most instructions require adjustments after selection to satisfy
|
||||
// operand requirements.
|
||||
|
|
|
@ -216,6 +216,10 @@ public:
|
|||
return get(Opcode).TSFlags & SIInstrFlags::WQM;
|
||||
}
|
||||
|
||||
bool isVGPRSpill(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
|
||||
}
|
||||
|
||||
bool isInlineConstant(const APInt &Imm) const;
|
||||
bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
|
||||
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
|
||||
|
|
|
@ -2057,7 +2057,7 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
|
|||
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
|
||||
|
||||
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
|
||||
let UseNamedOperandTable = 1 in {
|
||||
let UseNamedOperandTable = 1, VGPRSpill = 1 in {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
|
||||
|
@ -2070,7 +2070,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
|
|||
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
} // End UseNamedOperandTable = 1
|
||||
} // End UseNamedOperandTable = 1, VGPRSpill = 1
|
||||
}
|
||||
|
||||
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
|
||||
|
|
|
@ -128,80 +128,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
|||
MachineInstr &MI = *I;
|
||||
RS.forward(I);
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
switch(MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_SPILL_V512_SAVE:
|
||||
case AMDGPU::SI_SPILL_V256_SAVE:
|
||||
case AMDGPU::SI_SPILL_V128_SAVE:
|
||||
case AMDGPU::SI_SPILL_V96_SAVE:
|
||||
case AMDGPU::SI_SPILL_V64_SAVE:
|
||||
case AMDGPU::SI_SPILL_V32_SAVE:
|
||||
case AMDGPU::SI_SPILL_V32_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V64_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V128_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V256_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V512_RESTORE:
|
||||
if (!TII->isVGPRSpill(MI.getOpcode()))
|
||||
continue;
|
||||
|
||||
// Scratch resource
|
||||
unsigned ScratchRsrcReg =
|
||||
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
|
||||
// Scratch resource
|
||||
unsigned ScratchRsrcReg =
|
||||
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
|
||||
|
||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
|
||||
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
||||
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
||||
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
||||
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
||||
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
||||
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
||||
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
||||
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
|
||||
.addImm(Rsrc & 0xffffffff)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
|
||||
.addImm(Rsrc & 0xffffffff)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
|
||||
.addImm(Rsrc >> 32)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
|
||||
.addImm(Rsrc >> 32)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
// Scratch Offset
|
||||
if (ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
|
||||
ScratchOffsetReg)
|
||||
.addFrameIndex(ScratchOffsetFI)
|
||||
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
} else if (!MBB.isLiveIn(ScratchOffsetReg)) {
|
||||
MBB.addLiveIn(ScratchOffsetReg);
|
||||
}
|
||||
|
||||
if (ScratchRsrcReg == AMDGPU::NoRegister ||
|
||||
ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
|
||||
ScratchRsrcReg = AMDGPU::SGPR0;
|
||||
ScratchOffsetReg = AMDGPU::SGPR0;
|
||||
}
|
||||
MI.getOperand(2).setReg(ScratchRsrcReg);
|
||||
MI.getOperand(2).setIsKill(true);
|
||||
MI.getOperand(2).setIsUndef(false);
|
||||
MI.getOperand(3).setReg(ScratchOffsetReg);
|
||||
MI.getOperand(3).setIsUndef(false);
|
||||
MI.getOperand(3).setIsKill(false);
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
|
||||
|
||||
break;
|
||||
// Scratch Offset
|
||||
if (ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
|
||||
ScratchOffsetReg)
|
||||
.addFrameIndex(ScratchOffsetFI)
|
||||
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
} else if (!MBB.isLiveIn(ScratchOffsetReg)) {
|
||||
MBB.addLiveIn(ScratchOffsetReg);
|
||||
}
|
||||
|
||||
if (ScratchRsrcReg == AMDGPU::NoRegister ||
|
||||
ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
|
||||
ScratchRsrcReg = AMDGPU::SGPR0;
|
||||
ScratchOffsetReg = AMDGPU::SGPR0;
|
||||
}
|
||||
MI.getOperand(2).setReg(ScratchRsrcReg);
|
||||
MI.getOperand(2).setIsKill(true);
|
||||
MI.getOperand(2).setIsUndef(false);
|
||||
MI.getOperand(3).setReg(ScratchOffsetReg);
|
||||
MI.getOperand(3).setIsUndef(false);
|
||||
MI.getOperand(3).setIsKill(false);
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue