forked from OSchip/llvm-project
[AMDGPU] Allow spilling FP to memory
If there are no available lanes in a reserved VGPR, no free SGPR, and no unused CSR VGPR when trying to save the FP it needs to be spilled to memory as a last resort. This can be done in the prolog/epilog if we manually add the spill and manage exec. Differential Revision: https://reviews.llvm.org/D79610
This commit is contained in:
parent
e1ed4d9eb5
commit
09253b608a
|
@ -593,6 +593,47 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
||||||
llvm_unreachable("Invalid TargetStackID::Value");
|
llvm_unreachable("Invalid TargetStackID::Value");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Activate all lanes, returns saved exec.
|
||||||
|
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
|
||||||
|
MachineFunction &MF,
|
||||||
|
MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MBBI,
|
||||||
|
bool IsProlog) {
|
||||||
|
Register ScratchExecCopy;
|
||||||
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||||
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||||
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||||
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
DebugLoc DL;
|
||||||
|
|
||||||
|
if (LiveRegs.empty()) {
|
||||||
|
if (IsProlog) {
|
||||||
|
LiveRegs.init(TRI);
|
||||||
|
LiveRegs.addLiveIns(MBB);
|
||||||
|
if (FuncInfo->SGPRForFPSaveRestoreCopy)
|
||||||
|
LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
|
||||||
|
} else {
|
||||||
|
// In epilog.
|
||||||
|
LiveRegs.init(*ST.getRegisterInfo());
|
||||||
|
LiveRegs.addLiveOuts(MBB);
|
||||||
|
LiveRegs.stepBackward(*MBBI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ScratchExecCopy = findScratchNonCalleeSaveRegister(
|
||||||
|
MRI, LiveRegs, *TRI.getWaveMaskRegClass());
|
||||||
|
|
||||||
|
if (!IsProlog)
|
||||||
|
LiveRegs.removeReg(ScratchExecCopy);
|
||||||
|
|
||||||
|
const unsigned OrSaveExec =
|
||||||
|
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
|
||||||
|
|
||||||
|
return ScratchExecCopy;
|
||||||
|
}
|
||||||
|
|
||||||
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
MachineBasicBlock &MBB) const {
|
MachineBasicBlock &MBB) const {
|
||||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
@ -621,6 +662,15 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
// turn on all lanes before doing the spill to memory.
|
// turn on all lanes before doing the spill to memory.
|
||||||
Register ScratchExecCopy;
|
Register ScratchExecCopy;
|
||||||
|
|
||||||
|
bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
|
||||||
|
bool SpillFPToMemory = false;
|
||||||
|
// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
|
||||||
|
// Otherwise we are spilling the FP to memory.
|
||||||
|
if (HasFPSaveIndex) {
|
||||||
|
SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
|
||||||
|
TargetStackID::SGPRSpill;
|
||||||
|
}
|
||||||
|
|
||||||
// Emit the copy if we need an FP, and are using a free SGPR to save it.
|
// Emit the copy if we need an FP, and are using a free SGPR to save it.
|
||||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
|
||||||
|
@ -636,25 +686,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
if (!Reg.FI.hasValue())
|
if (!Reg.FI.hasValue())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!ScratchExecCopy) {
|
if (!ScratchExecCopy)
|
||||||
if (LiveRegs.empty()) {
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
||||||
LiveRegs.init(TRI);
|
|
||||||
LiveRegs.addLiveIns(MBB);
|
|
||||||
if (FuncInfo->SGPRForFPSaveRestoreCopy)
|
|
||||||
LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
|
|
||||||
}
|
|
||||||
|
|
||||||
ScratchExecCopy
|
|
||||||
= findScratchNonCalleeSaveRegister(MRI, LiveRegs,
|
|
||||||
*TRI.getWaveMaskRegClass());
|
|
||||||
assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
|
|
||||||
|
|
||||||
const unsigned OrSaveExec = ST.isWave32() ?
|
|
||||||
AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
|
|
||||||
ScratchExecCopy)
|
|
||||||
.addImm(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
||||||
FuncInfo->getScratchRSrcReg(),
|
FuncInfo->getScratchRSrcReg(),
|
||||||
|
@ -662,30 +695,50 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
Reg.FI.getValue());
|
Reg.FI.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (HasFPSaveIndex && SpillFPToMemory) {
|
||||||
|
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
||||||
|
assert(!MFI.isDeadObjectIndex(FI));
|
||||||
|
|
||||||
|
if (!ScratchExecCopy)
|
||||||
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
||||||
|
|
||||||
|
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
||||||
|
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||||
|
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
||||||
|
.addReg(FramePtrReg);
|
||||||
|
|
||||||
|
buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
|
||||||
|
FuncInfo->getScratchRSrcReg(), StackPtrReg,
|
||||||
|
FuncInfo->FramePointerSaveIndex.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
if (ScratchExecCopy) {
|
if (ScratchExecCopy) {
|
||||||
// FIXME: Split block and make terminator.
|
// FIXME: Split block and make terminator.
|
||||||
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||||
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
|
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
|
||||||
.addReg(ScratchExecCopy, RegState::Kill);
|
.addReg(ScratchExecCopy, RegState::Kill);
|
||||||
LiveRegs.addReg(ScratchExecCopy);
|
LiveRegs.addReg(ScratchExecCopy);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (FuncInfo->FramePointerSaveIndex) {
|
// In this case, spill the FP to a reserved VGPR.
|
||||||
|
if (HasFPSaveIndex && !SpillFPToMemory) {
|
||||||
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
||||||
assert(!MFI.isDeadObjectIndex(FI) &&
|
assert(!MFI.isDeadObjectIndex(FI));
|
||||||
MFI.getStackID(FI) == TargetStackID::SGPRSpill);
|
|
||||||
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
|
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
|
||||||
= FuncInfo->getSGPRToVGPRSpills(FI);
|
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
|
||||||
|
FuncInfo->getSGPRToVGPRSpills(FI);
|
||||||
assert(Spill.size() == 1);
|
assert(Spill.size() == 1);
|
||||||
|
|
||||||
// Save FP before setting it up.
|
// Save FP before setting it up.
|
||||||
// FIXME: This should respect spillSGPRToVGPR;
|
// FIXME: This should respect spillSGPRToVGPR;
|
||||||
BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
|
BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
|
||||||
Spill[0].VGPR)
|
Spill[0].VGPR)
|
||||||
.addReg(FramePtrReg)
|
.addReg(FramePtrReg)
|
||||||
.addImm(Spill[0].Lane)
|
.addImm(Spill[0].Lane)
|
||||||
.addReg(Spill[0].VGPR, RegState::Undef);
|
.addReg(Spill[0].VGPR, RegState::Undef);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TRI.needsStackRealignment(MF)) {
|
if (TRI.needsStackRealignment(MF)) {
|
||||||
|
@ -706,13 +759,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
// s_add_u32 tmp_reg, s32, NumBytes
|
// s_add_u32 tmp_reg, s32, NumBytes
|
||||||
// s_and_b32 s32, tmp_reg, 0b111...0000
|
// s_and_b32 s32, tmp_reg, 0b111...0000
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
|
||||||
.addReg(StackPtrReg)
|
.addReg(StackPtrReg)
|
||||||
.addImm((Alignment - 1) * ST.getWavefrontSize())
|
.addImm((Alignment - 1) * ST.getWavefrontSize())
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
|
||||||
.addReg(ScratchSPReg, RegState::Kill)
|
.addReg(ScratchSPReg, RegState::Kill)
|
||||||
.addImm(-Alignment * ST.getWavefrontSize())
|
.addImm(-Alignment * ST.getWavefrontSize())
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
FuncInfo->setIsStackRealigned(true);
|
FuncInfo->setIsStackRealigned(true);
|
||||||
} else if ((HasFP = hasFP(MF))) {
|
} else if ((HasFP = hasFP(MF))) {
|
||||||
// If we need a base pointer, set it up here. It's whatever the value of
|
// If we need a base pointer, set it up here. It's whatever the value of
|
||||||
|
@ -720,15 +773,15 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
// allocated after this, so we can still use the base pointer to reference
|
// allocated after this, so we can still use the base pointer to reference
|
||||||
// locals.
|
// locals.
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
||||||
.addReg(StackPtrReg)
|
.addReg(StackPtrReg)
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (HasFP && RoundedSize != 0) {
|
if (HasFP && RoundedSize != 0) {
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
|
||||||
.addReg(StackPtrReg)
|
.addReg(StackPtrReg)
|
||||||
.addImm(RoundedSize * ST.getWavefrontSize())
|
.addImm(RoundedSize * ST.getWavefrontSize())
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
|
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
|
||||||
|
@ -758,9 +811,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
uint32_t RoundedSize = FuncInfo->isStackRealigned()
|
uint32_t RoundedSize = FuncInfo->isStackRealigned()
|
||||||
? NumBytes + MFI.getMaxAlign().value()
|
? NumBytes + MFI.getMaxAlign().value()
|
||||||
: NumBytes;
|
: NumBytes;
|
||||||
|
const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
||||||
|
const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
|
||||||
|
|
||||||
|
bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
|
||||||
|
bool SpillFPToMemory = false;
|
||||||
|
if (HasFPSaveIndex) {
|
||||||
|
SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
|
||||||
|
TargetStackID::SGPRSpill;
|
||||||
|
}
|
||||||
|
|
||||||
if (RoundedSize != 0 && hasFP(MF)) {
|
if (RoundedSize != 0 && hasFP(MF)) {
|
||||||
const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
||||||
.addReg(StackPtrReg)
|
.addReg(StackPtrReg)
|
||||||
.addImm(RoundedSize * ST.getWavefrontSize())
|
.addImm(RoundedSize * ST.getWavefrontSize())
|
||||||
|
@ -768,55 +829,49 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
||||||
.addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
|
.addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
}
|
|
||||||
|
|
||||||
if (FuncInfo->FramePointerSaveIndex) {
|
|
||||||
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
|
||||||
|
|
||||||
assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
|
|
||||||
MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
|
|
||||||
|
|
||||||
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
|
|
||||||
= FuncInfo->getSGPRToVGPRSpills(FI);
|
|
||||||
assert(Spill.size() == 1);
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
|
||||||
FuncInfo->getFrameOffsetReg())
|
|
||||||
.addReg(Spill[0].VGPR)
|
|
||||||
.addImm(Spill[0].Lane);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Register ScratchExecCopy;
|
Register ScratchExecCopy;
|
||||||
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
if (HasFPSaveIndex) {
|
||||||
: FuncInfo->getSGPRSpillVGPRs()) {
|
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
||||||
|
assert(!MFI.isDeadObjectIndex(FI));
|
||||||
|
if (SpillFPToMemory) {
|
||||||
|
if (!ScratchExecCopy)
|
||||||
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
||||||
|
|
||||||
|
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
|
||||||
|
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||||
|
buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
|
||||||
|
FuncInfo->getScratchRSrcReg(), StackPtrReg, FI);
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
|
||||||
|
.addReg(TempVGPR, RegState::Kill);
|
||||||
|
} else {
|
||||||
|
// Reload from VGPR spill.
|
||||||
|
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
|
||||||
|
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
|
||||||
|
FuncInfo->getSGPRToVGPRSpills(FI);
|
||||||
|
assert(Spill.size() == 1);
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
||||||
|
FramePtrReg)
|
||||||
|
.addReg(Spill[0].VGPR)
|
||||||
|
.addImm(Spill[0].Lane);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg :
|
||||||
|
FuncInfo->getSGPRSpillVGPRs()) {
|
||||||
if (!Reg.FI.hasValue())
|
if (!Reg.FI.hasValue())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
if (!ScratchExecCopy)
|
||||||
if (!ScratchExecCopy) {
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
||||||
// See emitPrologue
|
|
||||||
if (LiveRegs.empty()) {
|
|
||||||
LiveRegs.init(*ST.getRegisterInfo());
|
|
||||||
LiveRegs.addLiveOuts(MBB);
|
|
||||||
LiveRegs.stepBackward(*MBBI);
|
|
||||||
}
|
|
||||||
|
|
||||||
ScratchExecCopy = findScratchNonCalleeSaveRegister(
|
|
||||||
MRI, LiveRegs, *TRI.getWaveMaskRegClass());
|
|
||||||
LiveRegs.removeReg(ScratchExecCopy);
|
|
||||||
|
|
||||||
const unsigned OrSaveExec =
|
|
||||||
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
|
|
||||||
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
|
|
||||||
.addImm(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
||||||
FuncInfo->getScratchRSrcReg(),
|
FuncInfo->getScratchRSrcReg(), StackPtrReg,
|
||||||
FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
|
Reg.FI.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ScratchExecCopy) {
|
if (ScratchExecCopy) {
|
||||||
|
@ -824,7 +879,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||||
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
|
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
|
||||||
.addReg(ScratchExecCopy, RegState::Kill);
|
.addReg(ScratchExecCopy, RegState::Kill);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -906,7 +961,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||||
if (MFI->isEntryFunction())
|
if (MFI->isEntryFunction())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||||
|
|
||||||
|
@ -934,12 +989,14 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||||
if (!HasFP)
|
if (!HasFP)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
// We need to save and restore the current FP.
|
||||||
|
|
||||||
|
// 1: If there is already a VGPR with free lanes, use it. We
|
||||||
|
// may already have to pay the penalty for spilling a CSR VGPR.
|
||||||
if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
|
if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
|
||||||
int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
|
int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
|
||||||
TargetStackID::SGPRSpill);
|
TargetStackID::SGPRSpill);
|
||||||
|
|
||||||
// If there is already a VGPR with free lanes, use it. We may already have
|
|
||||||
// to pay the penalty for spilling a CSR VGPR.
|
|
||||||
if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
|
if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
|
||||||
llvm_unreachable("allocate SGPR spill should have worked");
|
llvm_unreachable("allocate SGPR spill should have worked");
|
||||||
|
|
||||||
|
@ -952,16 +1009,22 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 2: Next, try to save the FP in an unused SGPR.
|
||||||
MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
|
MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
|
||||||
|
|
||||||
if (!MFI->SGPRForFPSaveRestoreCopy) {
|
if (!MFI->SGPRForFPSaveRestoreCopy) {
|
||||||
// There's no free lane to spill, and no free register to save FP, so we're
|
|
||||||
// forced to spill another VGPR to use for the spill.
|
|
||||||
int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
|
int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
|
||||||
TargetStackID::SGPRSpill);
|
TargetStackID::SGPRSpill);
|
||||||
if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
|
|
||||||
llvm_unreachable("allocate SGPR spill should have worked");
|
if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
|
||||||
MFI->FramePointerSaveIndex = NewFI;
|
// 3: There's no free lane to spill, and no free register to save FP, so
|
||||||
|
// we're forced to spill another VGPR to use for the spill.
|
||||||
|
MFI->FramePointerSaveIndex = NewFI;
|
||||||
|
} else {
|
||||||
|
// 4: If all else fails, spill the FP to memory.
|
||||||
|
MFI->FramePointerSaveIndex =
|
||||||
|
FrameInfo.CreateSpillStackObject(4, Align(4));
|
||||||
|
}
|
||||||
|
|
||||||
LLVM_DEBUG(
|
LLVM_DEBUG(
|
||||||
auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
|
auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
|
||||||
|
|
|
@ -459,6 +459,115 @@ define void @ipra_call_with_stack() #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; With no free registers, we must spill the FP to memory.
|
||||||
|
; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory:
|
||||||
|
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||||
|
; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
|
||||||
|
; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]]
|
||||||
|
; GCN: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||||
|
; GCN: s_mov_b32 s33, s32
|
||||||
|
; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||||
|
; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]]
|
||||||
|
; GCN: s_waitcnt vmcnt(0)
|
||||||
|
; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]]
|
||||||
|
; GCN: s_mov_b64 exec, [[COPY_EXEC2]]
|
||||||
|
; GCN: s_setpc_b64
|
||||||
|
define void @callee_need_to_spill_fp_to_memory() #1 {
|
||||||
|
call void asm sideeffect "; clobber nonpreserved SGPRs",
|
||||||
|
"~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
|
||||||
|
,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
|
||||||
|
,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
|
||||||
|
,~{vcc}"()
|
||||||
|
|
||||||
|
call void asm sideeffect "; clobber all VGPRs",
|
||||||
|
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
||||||
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
||||||
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
||||||
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
||||||
|
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
||||||
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
||||||
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
||||||
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
||||||
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
||||||
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
||||||
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
||||||
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
||||||
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
||||||
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
||||||
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
||||||
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
||||||
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
||||||
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
||||||
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
||||||
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
||||||
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
||||||
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
||||||
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
||||||
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
||||||
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
||||||
|
,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; If we have a reserved VGPR that can be used for SGPR spills, we may still
|
||||||
|
; need to spill the FP to memory if there are no free lanes in the reserved
|
||||||
|
; VGPR.
|
||||||
|
; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory_full_reserved_vgpr:
|
||||||
|
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||||
|
; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
|
||||||
|
; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]]
|
||||||
|
; GCN: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||||
|
; GCN-NOT: v_writelane_b32 v40, s33
|
||||||
|
; GCN: s_mov_b32 s33, s32
|
||||||
|
; GCN-NOT: v_readlane_b32 s33, v40
|
||||||
|
; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||||
|
; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]]
|
||||||
|
; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]]
|
||||||
|
; GCN: s_mov_b64 exec, [[COPY_EXEC2]]
|
||||||
|
; GCN: s_setpc_b64
|
||||||
|
define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #1 {
|
||||||
|
call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
|
||||||
|
"~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
|
||||||
|
,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
|
||||||
|
,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
|
||||||
|
,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
|
||||||
|
,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
|
||||||
|
,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
|
||||||
|
,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
|
||||||
|
,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
|
||||||
|
,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
|
||||||
|
,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
|
||||||
|
|
||||||
|
call void asm sideeffect "; clobber all VGPRs except CSR v40",
|
||||||
|
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
||||||
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
||||||
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
||||||
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
||||||
|
,~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
||||||
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
||||||
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
||||||
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
||||||
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
||||||
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
||||||
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
||||||
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
||||||
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
||||||
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
||||||
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
||||||
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
||||||
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
||||||
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
||||||
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
||||||
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
||||||
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
||||||
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
||||||
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
||||||
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
||||||
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
||||||
|
,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
attributes #0 = { nounwind }
|
||||||
attributes #1 = { nounwind "frame-pointer"="all" }
|
attributes #1 = { nounwind "frame-pointer"="all" }
|
||||||
attributes #2 = { nounwind "frame-pointer"="non-leaf" }
|
attributes #2 = { nounwind "frame-pointer"="non-leaf" }
|
||||||
|
|
Loading…
Reference in New Issue