2015-11-07 02:17:45 +08:00
|
|
|
|
//===----------------------- SIFrameLowering.cpp --------------------------===//
|
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-11-07 02:17:45 +08:00
|
|
|
|
//
|
|
|
|
|
//==-----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
#include "SIFrameLowering.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
|
#include "AMDGPUSubtarget.h"
|
2015-12-01 05:15:53 +08:00
|
|
|
|
#include "SIInstrInfo.h"
|
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
2015-11-07 02:17:45 +08:00
|
|
|
|
#include "SIRegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2016-06-24 14:30:11 +08:00
|
|
|
|
|
2018-03-30 05:30:06 +08:00
|
|
|
|
#include "llvm/CodeGen/LivePhysRegs.h"
|
2015-11-07 02:17:45 +08:00
|
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2015-12-01 05:15:53 +08:00
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2015-11-07 02:17:45 +08:00
|
|
|
|
#include "llvm/CodeGen/RegisterScavenging.h"
|
|
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
2015-12-01 05:15:53 +08:00
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
|
static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
|
2017-02-08 21:02:33 +08:00
|
|
|
|
const MachineFunction &MF) {
|
2016-05-18 23:19:50 +08:00
|
|
|
|
return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
|
2017-02-08 21:02:33 +08:00
|
|
|
|
ST.getMaxNumSGPRs(MF) / 4);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
|
static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
|
2017-02-08 21:02:33 +08:00
|
|
|
|
const MachineFunction &MF) {
|
2015-12-01 05:16:03 +08:00
|
|
|
|
return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
|
2017-02-08 21:02:33 +08:00
|
|
|
|
ST.getMaxNumSGPRs(MF));
|
2015-12-01 05:16:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
|
void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
|
2016-09-01 05:52:21 +08:00
|
|
|
|
MachineFunction &MF,
|
|
|
|
|
MachineBasicBlock &MBB) const {
|
2017-02-19 02:29:53 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const SIRegisterInfo* TRI = &TII->getRegisterInfo();
|
2017-08-04 07:00:29 +08:00
|
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2017-02-19 02:29:53 +08:00
|
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// We don't need this if we only have spills since there is no user facing
|
|
|
|
|
// scratch.
|
|
|
|
|
|
|
|
|
|
// TODO: If we know we don't have flat instructions earlier, we can omit
|
|
|
|
|
// this from the input registers.
|
|
|
|
|
//
|
|
|
|
|
// TODO: We only need to know if we access scratch space through a flat
|
|
|
|
|
// pointer. Because we only detect if flat instructions are used at all,
|
|
|
|
|
// this will be used more often than necessary on VI.
|
|
|
|
|
|
|
|
|
|
// Debug location must be unknown since the first debug location is used to
|
|
|
|
|
// determine the end of the prologue.
|
|
|
|
|
DebugLoc DL;
|
|
|
|
|
MachineBasicBlock::iterator I = MBB.begin();
|
|
|
|
|
|
|
|
|
|
unsigned FlatScratchInitReg
|
2017-08-04 07:00:29 +08:00
|
|
|
|
= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
|
2016-09-01 05:52:21 +08:00
|
|
|
|
|
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
MRI.addLiveIn(FlatScratchInitReg);
|
|
|
|
|
MBB.addLiveIn(FlatScratchInitReg);
|
|
|
|
|
|
|
|
|
|
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
|
2017-02-19 02:29:53 +08:00
|
|
|
|
unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
|
2016-09-01 05:52:21 +08:00
|
|
|
|
|
|
|
|
|
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
|
|
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
|
// Do a 64-bit pointer add.
|
|
|
|
|
if (ST.flatScratchIsPointer()) {
|
2019-05-01 06:08:23 +08:00
|
|
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
|
|
|
|
.addReg(FlatScrInitLo)
|
|
|
|
|
.addReg(ScratchWaveOffsetReg);
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
|
|
|
|
|
.addReg(FlatScrInitHi)
|
|
|
|
|
.addImm(0);
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
|
|
|
|
|
addReg(FlatScrInitLo).
|
|
|
|
|
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
|
|
|
|
|
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
|
|
|
|
|
addReg(FlatScrInitHi).
|
|
|
|
|
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
|
|
|
|
|
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
|
|
|
|
|
.addReg(FlatScrInitLo)
|
|
|
|
|
.addReg(ScratchWaveOffsetReg);
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
|
|
|
|
|
.addReg(FlatScrInitHi)
|
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
|
assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
|
|
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
|
// Copy the size in bytes.
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
|
|
|
|
|
.addReg(FlatScrInitHi, RegState::Kill);
|
|
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// Add wave offset in bytes to private base offset.
|
|
|
|
|
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
|
|
|
|
.addReg(FlatScrInitLo)
|
|
|
|
|
.addReg(ScratchWaveOffsetReg);
|
|
|
|
|
|
|
|
|
|
// Convert offset to 256-byte units.
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
|
|
|
|
|
.addReg(FlatScrInitLo, RegState::Kill)
|
|
|
|
|
.addImm(8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
|
2018-07-12 04:59:01 +08:00
|
|
|
|
const GCNSubtarget &ST,
|
2016-09-01 05:52:21 +08:00
|
|
|
|
const SIInstrInfo *TII,
|
|
|
|
|
const SIRegisterInfo *TRI,
|
|
|
|
|
SIMachineFunctionInfo *MFI,
|
|
|
|
|
MachineFunction &MF) const {
|
2017-04-25 05:08:32 +08:00
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2016-09-01 05:52:21 +08:00
|
|
|
|
|
|
|
|
|
// We need to insert initialization of the scratch resource descriptor.
|
|
|
|
|
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
|
2017-04-25 05:08:32 +08:00
|
|
|
|
if (ScratchRsrcReg == AMDGPU::NoRegister ||
|
|
|
|
|
!MRI.isPhysRegUsed(ScratchRsrcReg))
|
2016-10-29 03:43:31 +08:00
|
|
|
|
return AMDGPU::NoRegister;
|
2016-09-01 05:52:21 +08:00
|
|
|
|
|
|
|
|
|
if (ST.hasSGPRInitBug() ||
|
|
|
|
|
ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
|
|
|
|
|
return ScratchRsrcReg;
|
|
|
|
|
|
|
|
|
|
// We reserved the last registers for this. Shift it down to the end of those
|
|
|
|
|
// which were actually used.
|
|
|
|
|
//
|
|
|
|
|
// FIXME: It might be safer to use a pseudoregister before replacement.
|
|
|
|
|
|
|
|
|
|
// FIXME: We should be able to eliminate unused input registers. We only
|
|
|
|
|
// cannot do this for the resources required for scratch access. For now we
|
|
|
|
|
// skip over user SGPRs and may leave unused holes.
|
|
|
|
|
|
|
|
|
|
// We find the resource first because it has an alignment requirement.
|
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
|
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
|
2017-02-08 21:02:33 +08:00
|
|
|
|
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
|
2016-10-29 03:43:31 +08:00
|
|
|
|
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
|
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
|
// Skip the last N reserved elements because they should have already been
|
|
|
|
|
// reserved for VCC etc.
|
2016-10-29 03:43:31 +08:00
|
|
|
|
for (MCPhysReg Reg : AllSGPR128s) {
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// Pick the first unallocated one. Make sure we don't clobber the other
|
|
|
|
|
// reserved input we needed.
|
2016-10-29 03:43:31 +08:00
|
|
|
|
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
|
2016-09-01 05:52:21 +08:00
|
|
|
|
MRI.replaceRegWith(ScratchRsrcReg, Reg);
|
|
|
|
|
MFI->setScratchRSrcReg(Reg);
|
|
|
|
|
return Reg;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ScratchRsrcReg;
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
// Shift down registers reserved for the scratch wave offset.
|
|
|
|
|
unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
|
|
|
|
const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
|
|
|
|
|
SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
|
2017-04-25 05:08:32 +08:00
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2016-09-01 05:52:21 +08:00
|
|
|
|
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
|
2017-04-25 05:08:32 +08:00
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
assert(MFI->isEntryFunction());
|
|
|
|
|
|
2017-04-25 05:08:32 +08:00
|
|
|
|
// No replacement necessary.
|
|
|
|
|
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
|
2019-06-06 06:20:47 +08:00
|
|
|
|
(!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
|
|
|
|
|
return AMDGPU::NoRegister;
|
2017-04-26 07:40:57 +08:00
|
|
|
|
}
|
2017-04-25 05:08:32 +08:00
|
|
|
|
|
2017-04-26 07:40:57 +08:00
|
|
|
|
if (ST.hasSGPRInitBug())
|
2019-06-06 06:20:47 +08:00
|
|
|
|
return ScratchWaveOffsetReg;
|
2016-09-01 05:52:21 +08:00
|
|
|
|
|
|
|
|
|
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
|
|
|
|
|
2017-02-08 21:02:33 +08:00
|
|
|
|
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
|
2016-10-29 03:43:31 +08:00
|
|
|
|
if (NumPreloaded > AllSGPRs.size())
|
2019-06-06 06:20:47 +08:00
|
|
|
|
return ScratchWaveOffsetReg;
|
2016-10-29 03:43:31 +08:00
|
|
|
|
|
|
|
|
|
AllSGPRs = AllSGPRs.slice(NumPreloaded);
|
|
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// We need to drop register from the end of the list that we cannot use
|
|
|
|
|
// for the scratch wave offset.
|
|
|
|
|
// + 2 s102 and s103 do not exist on VI.
|
|
|
|
|
// + 2 for vcc
|
|
|
|
|
// + 2 for xnack_mask
|
|
|
|
|
// + 2 for flat_scratch
|
|
|
|
|
// + 4 for registers reserved for scratch resource register
|
|
|
|
|
// + 1 for register reserved for scratch wave offset. (By exluding this
|
|
|
|
|
// register from the list to consider, it means that when this
|
|
|
|
|
// register is being used for the scratch wave offset and there
|
|
|
|
|
// are no other free SGPRs, then the value will stay in this register.
|
2017-04-26 07:40:57 +08:00
|
|
|
|
// + 1 if stack pointer is used.
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// ----
|
2017-04-26 07:40:57 +08:00
|
|
|
|
// 13 (+1)
|
|
|
|
|
unsigned ReservedRegCount = 13;
|
|
|
|
|
|
|
|
|
|
if (AllSGPRs.size() < ReservedRegCount)
|
2019-06-06 06:20:47 +08:00
|
|
|
|
return ScratchWaveOffsetReg;
|
2017-04-26 07:40:57 +08:00
|
|
|
|
|
|
|
|
|
bool HandledScratchWaveOffsetReg =
|
|
|
|
|
ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
|
2016-10-29 03:43:31 +08:00
|
|
|
|
|
2017-04-26 07:40:57 +08:00
|
|
|
|
for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
|
|
|
|
|
// scratch descriptor, since we haven’t added its uses yet.
|
2017-04-25 05:08:32 +08:00
|
|
|
|
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
|
2017-04-26 07:40:57 +08:00
|
|
|
|
if (!HandledScratchWaveOffsetReg) {
|
|
|
|
|
HandledScratchWaveOffsetReg = true;
|
|
|
|
|
|
|
|
|
|
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
|
2019-06-06 06:20:47 +08:00
|
|
|
|
if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
|
|
|
|
|
assert(!hasFP(MF));
|
|
|
|
|
MFI->setStackPtrOffsetReg(Reg);
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-26 07:40:57 +08:00
|
|
|
|
MFI->setScratchWaveOffsetReg(Reg);
|
2019-06-06 06:20:47 +08:00
|
|
|
|
MFI->setFrameOffsetReg(Reg);
|
2017-04-26 07:40:57 +08:00
|
|
|
|
ScratchWaveOffsetReg = Reg;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-09-01 05:52:21 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
return ScratchWaveOffsetReg;
|
2016-09-01 05:52:21 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
|
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|
|
|
|
MachineBasicBlock &MBB) const {
|
2015-12-01 05:15:53 +08:00
|
|
|
|
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
|
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
|
|
|
|
|
// If we only have SGPR spills, we won't actually be using scratch memory
|
|
|
|
|
// since these spill to VGPRs.
|
|
|
|
|
//
|
|
|
|
|
// FIXME: We should be cleaning up these unused SGPR spill frame indices
|
|
|
|
|
// somewhere.
|
|
|
|
|
|
2019-02-22 07:27:46 +08:00
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2016-06-24 14:30:11 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
2016-02-12 14:31:30 +08:00
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2018-05-30 01:42:50 +08:00
|
|
|
|
const Function &F = MF.getFunction();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
|
// We need to do the replacement of the private segment buffer and wave offset
|
|
|
|
|
// register even if there are no stack objects. There could be stores to undef
|
|
|
|
|
// or a constant without an associated object.
|
|
|
|
|
|
|
|
|
|
// FIXME: We still have implicit uses on SGPR spill instructions in case they
|
|
|
|
|
// need to spill to vector memory. It's likely that will not happen, but at
|
|
|
|
|
// this point it appears we need the setup. This part of the prolog should be
|
|
|
|
|
// emitted after frame indices are eliminated.
|
|
|
|
|
|
2017-07-19 00:44:58 +08:00
|
|
|
|
if (MFI->hasFlatScratchInit())
|
2017-02-19 02:29:53 +08:00
|
|
|
|
emitFlatScratchInit(ST, MF, MBB);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
2017-04-25 05:08:32 +08:00
|
|
|
|
unsigned ScratchRsrcReg
|
|
|
|
|
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
|
2017-04-26 07:40:57 +08:00
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
unsigned ScratchWaveOffsetReg =
|
|
|
|
|
getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
|
2017-04-25 05:08:32 +08:00
|
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// We need to insert initialization of the scratch resource descriptor.
|
2017-08-04 07:00:29 +08:00
|
|
|
|
unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
|
|
|
|
|
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
|
|
|
|
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
|
2018-10-05 05:02:16 +08:00
|
|
|
|
if (ST.isAmdHsaOrMesa(F)) {
|
2017-08-04 07:00:29 +08:00
|
|
|
|
PreloadedPrivateBufferReg = MFI->getPreloadedReg(
|
|
|
|
|
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
|
|
|
|
|
MRI.isPhysRegUsed(ScratchWaveOffsetReg);
|
2017-04-25 05:08:32 +08:00
|
|
|
|
bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
|
|
|
|
|
MRI.isPhysRegUsed(ScratchRsrcReg);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
// FIXME: Hack to not crash in situations which emitted an error.
|
|
|
|
|
if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
|
|
|
|
|
return;
|
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
|
// We added live-ins during argument lowering, but since they were not used
|
|
|
|
|
// they were deleted. We're adding the uses now, so add them back.
|
2019-06-06 06:20:47 +08:00
|
|
|
|
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
|
|
|
|
|
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
|
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
|
2018-10-05 05:02:16 +08:00
|
|
|
|
assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
|
2015-12-01 05:16:03 +08:00
|
|
|
|
MRI.addLiveIn(PreloadedPrivateBufferReg);
|
|
|
|
|
MBB.addLiveIn(PreloadedPrivateBufferReg);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
|
// Make the register selected live throughout the function.
|
|
|
|
|
for (MachineBasicBlock &OtherBB : MF) {
|
|
|
|
|
if (&OtherBB == &MBB)
|
|
|
|
|
continue;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
|
if (OffsetRegUsed)
|
|
|
|
|
OtherBB.addLiveIn(ScratchWaveOffsetReg);
|
|
|
|
|
|
|
|
|
|
if (ResourceRegUsed)
|
|
|
|
|
OtherBB.addLiveIn(ScratchRsrcReg);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-01 05:15:53 +08:00
|
|
|
|
DebugLoc DL;
|
2016-09-01 05:52:21 +08:00
|
|
|
|
MachineBasicBlock::iterator I = MBB.begin();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
|
// If we reserved the original input registers, we don't need to copy to the
|
|
|
|
|
// reserved registers.
|
|
|
|
|
|
|
|
|
|
bool CopyBuffer = ResourceRegUsed &&
|
|
|
|
|
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
|
2018-10-05 05:02:16 +08:00
|
|
|
|
ST.isAmdHsaOrMesa(F) &&
|
2016-10-29 03:43:31 +08:00
|
|
|
|
ScratchRsrcReg != PreloadedPrivateBufferReg;
|
|
|
|
|
|
|
|
|
|
// This needs to be careful of the copying order to avoid overwriting one of
|
|
|
|
|
// the input registers before it's been copied to it's final
|
|
|
|
|
// destination. Usually the offset should be copied first.
|
|
|
|
|
bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
|
|
|
|
|
ScratchWaveOffsetReg);
|
|
|
|
|
if (CopyBuffer && CopyBufferFirst) {
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
|
|
|
|
|
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
unsigned SPReg = MFI->getStackPtrOffsetReg();
|
|
|
|
|
assert(SPReg != AMDGPU::SP_REG);
|
|
|
|
|
|
|
|
|
|
// FIXME: Remove the isPhysRegUsed checks
|
|
|
|
|
const bool HasFP = hasFP(MF);
|
|
|
|
|
|
|
|
|
|
if (HasFP || OffsetRegUsed) {
|
|
|
|
|
assert(ScratchWaveOffsetReg);
|
2016-09-01 05:52:25 +08:00
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
|
2019-06-06 06:20:47 +08:00
|
|
|
|
.addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
}
|
2015-12-01 05:15:53 +08:00
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
|
if (CopyBuffer && !CopyBufferFirst) {
|
2016-09-01 05:52:25 +08:00
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
|
|
|
|
|
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
|
2016-10-29 03:43:31 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
if (ResourceRegUsed) {
|
2017-09-29 17:49:35 +08:00
|
|
|
|
emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
|
|
|
|
|
PreloadedPrivateBufferReg, ScratchRsrcReg);
|
2019-06-06 06:20:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (HasFP) {
|
|
|
|
|
DebugLoc DL;
|
|
|
|
|
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
|
int64_t StackSize = FrameInfo.getStackSize();
|
|
|
|
|
|
|
|
|
|
// On kernel entry, the private scratch wave offset is the SP value.
|
|
|
|
|
if (StackSize == 0) {
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
|
|
|
|
|
.addReg(MFI->getScratchWaveOffsetReg());
|
|
|
|
|
} else {
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
|
|
|
|
|
.addReg(MFI->getScratchWaveOffsetReg())
|
|
|
|
|
.addImm(StackSize * ST.getWavefrontSize());
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-09-29 17:49:35 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
|
2018-07-12 04:59:01 +08:00
|
|
|
|
void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
|
2017-09-29 17:49:35 +08:00
|
|
|
|
MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
|
|
|
|
|
MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
|
|
|
|
|
unsigned ScratchRsrcReg) const {
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
2018-05-30 01:42:50 +08:00
|
|
|
|
const Function &Fn = MF.getFunction();
|
2017-09-29 17:49:35 +08:00
|
|
|
|
DebugLoc DL;
|
|
|
|
|
|
|
|
|
|
if (ST.isAmdPalOS()) {
|
|
|
|
|
// The pointer to the GIT is formed from the offset passed in and either
|
|
|
|
|
// the amdgpu-git-ptr-high function attribute or the top part of the PC
|
|
|
|
|
unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
|
|
|
|
unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
|
|
|
|
unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
|
|
|
|
|
|
|
|
|
|
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
|
|
|
|
|
|
|
|
|
if (MFI->getGITPtrHigh() != 0xffffffff) {
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, RsrcHi)
|
|
|
|
|
.addImm(MFI->getGITPtrHigh())
|
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
} else {
|
|
|
|
|
const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
|
|
|
|
|
BuildMI(MBB, I, DL, GetPC64, Rsrc01);
|
|
|
|
|
}
|
[AMDGPU] Scratch setup fix on AMDPAL gfx9+ merge shader
Summary:
With OS type AMDPAL, the scratch descriptor is hardwired to be loaded
from offset 0 of the global information table, whose low pointer is
passed in s0. For a merge shader on gfx9+, it needs to be s8 instead, as
the hardware reserves s0-s7.
Reviewers: kzhuravl
Subscribers: arsenm, nhaehnle, dstuttard, llvm-commits, t-tye, yaxunl, wdng, kzhuravl
Differential Revision: https://reviews.llvm.org/D42203
llvm-svn: 326088
2018-02-26 22:46:43 +08:00
|
|
|
|
auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
|
|
|
|
|
if (ST.hasMergedShaders()) {
|
|
|
|
|
switch (MF.getFunction().getCallingConv()) {
|
|
|
|
|
case CallingConv::AMDGPU_HS:
|
|
|
|
|
case CallingConv::AMDGPU_GS:
|
|
|
|
|
// Low GIT address is passed in s8 rather than s0 for an LS+HS or
|
|
|
|
|
// ES+GS merged shader on gfx9+.
|
|
|
|
|
GitPtrLo = AMDGPU::SGPR8;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
[AMDGPU] For OS type AMDPAL, fixed scratch on compute shader
Summary:
For OS type AMDPAL, the scratch descriptor is loaded from offset 0 of
the GIT, whose 32 bit pointer is in s0 (s8 for gfx9 merged shaders).
This commit fixes that to use offset 0x10 instead of offset 0 for a
compute shader, per the PAL ABI spec.
V2: Ensure s0 (s8 for gfx9 merged shader) is marked live-in when loading
scratch descriptor from GIT.
Reviewers: kzhuravl, nhaehnle, timcorringham
Subscribers: kzhuravl, wdng, yaxunl, t-tye, llvm-commits, dstuttard, nhaehnle, arsenm
Differential Revision: https://reviews.llvm.org/D44468
Change-Id: I93dffa647758e37f613bb5e0dfca840d82e6d26f
llvm-svn: 329690
2018-04-10 19:25:15 +08:00
|
|
|
|
MF.getRegInfo().addLiveIn(GitPtrLo);
|
2019-06-01 06:47:36 +08:00
|
|
|
|
MBB.addLiveIn(GitPtrLo);
|
2017-09-29 17:49:35 +08:00
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, RsrcLo)
|
[AMDGPU] Scratch setup fix on AMDPAL gfx9+ merge shader
Summary:
With OS type AMDPAL, the scratch descriptor is hardwired to be loaded
from offset 0 of the global information table, whose low pointer is
passed in s0. For a merge shader on gfx9+, it needs to be s8 instead, as
the hardware reserves s0-s7.
Reviewers: kzhuravl
Subscribers: arsenm, nhaehnle, dstuttard, llvm-commits, t-tye, yaxunl, wdng, kzhuravl
Differential Revision: https://reviews.llvm.org/D42203
llvm-svn: 326088
2018-02-26 22:46:43 +08:00
|
|
|
|
.addReg(GitPtrLo)
|
2017-09-29 17:49:35 +08:00
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
|
|
// We now have the GIT ptr - now get the scratch descriptor from the entry
|
[AMDGPU] For OS type AMDPAL, fixed scratch on compute shader
Summary:
For OS type AMDPAL, the scratch descriptor is loaded from offset 0 of
the GIT, whose 32 bit pointer is in s0 (s8 for gfx9 merged shaders).
This commit fixes that to use offset 0x10 instead of offset 0 for a
compute shader, per the PAL ABI spec.
V2: Ensure s0 (s8 for gfx9 merged shader) is marked live-in when loading
scratch descriptor from GIT.
Reviewers: kzhuravl, nhaehnle, timcorringham
Subscribers: kzhuravl, wdng, yaxunl, t-tye, llvm-commits, dstuttard, nhaehnle, arsenm
Differential Revision: https://reviews.llvm.org/D44468
Change-Id: I93dffa647758e37f613bb5e0dfca840d82e6d26f
llvm-svn: 329690
2018-04-10 19:25:15 +08:00
|
|
|
|
// at offset 0 (or offset 16 for a compute shader).
|
2017-09-29 17:49:35 +08:00
|
|
|
|
PointerType *PtrTy =
|
2017-12-16 06:22:58 +08:00
|
|
|
|
PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
|
2017-09-29 17:49:35 +08:00
|
|
|
|
AMDGPUAS::CONSTANT_ADDRESS);
|
|
|
|
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
|
|
|
|
const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
|
|
|
|
|
auto MMO = MF.getMachineMemOperand(PtrInfo,
|
|
|
|
|
MachineMemOperand::MOLoad |
|
|
|
|
|
MachineMemOperand::MOInvariant |
|
|
|
|
|
MachineMemOperand::MODereferenceable,
|
2019-01-31 09:38:47 +08:00
|
|
|
|
16, 4);
|
2018-05-30 01:42:50 +08:00
|
|
|
|
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
|
[AMDGPU] Fix CS scratch setup on pre-GCN3 ASICs
Summary:
Prior to GCN3 s_load_dword offsets are in dwords rather than bytes.
Thus the scratch buffer descriptor offset must be adjusted for pre-GCN3 ASICs.
Reviewers: nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: sheredom, arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D56496
llvm-svn: 353530
2019-02-08 23:41:11 +08:00
|
|
|
|
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
|
unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
|
2017-09-29 17:49:35 +08:00
|
|
|
|
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
|
|
|
|
|
.addReg(Rsrc01)
|
[AMDGPU] Fix CS scratch setup on pre-GCN3 ASICs
Summary:
Prior to GCN3 s_load_dword offsets are in dwords rather than bytes.
Thus the scratch buffer descriptor offset must be adjusted for pre-GCN3 ASICs.
Reviewers: nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: sheredom, arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D56496
llvm-svn: 353530
2019-02-08 23:41:11 +08:00
|
|
|
|
.addImm(EncodedOffset) // offset
|
2017-09-29 17:49:35 +08:00
|
|
|
|
.addImm(0) // glc
|
2019-05-01 06:08:23 +08:00
|
|
|
|
.addImm(0) // dlc
|
2017-09-29 17:49:35 +08:00
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
|
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2018-05-30 01:42:50 +08:00
|
|
|
|
if (ST.isMesaGfxShader(Fn)
|
2017-09-29 17:49:35 +08:00
|
|
|
|
|| (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
|
2018-10-05 05:02:16 +08:00
|
|
|
|
assert(!ST.isAmdHsaOrMesa(Fn));
|
2016-09-01 05:52:25 +08:00
|
|
|
|
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
|
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
|
|
|
|
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
|
|
|
|
|
|
|
|
|
// Use relocations to get the pointer, and setup the other bits manually.
|
|
|
|
|
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
|
|
|
|
|
|
2017-06-26 11:01:31 +08:00
|
|
|
|
if (MFI->hasImplicitBufferPtr()) {
|
2017-01-25 09:25:13 +08:00
|
|
|
|
unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
|
|
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
|
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
|
2017-01-25 09:25:13 +08:00
|
|
|
|
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
|
|
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, Mov64, Rsrc01)
|
2017-06-26 11:01:31 +08:00
|
|
|
|
.addReg(MFI->getImplicitBufferPtrUserSGPR())
|
2017-01-25 09:25:13 +08:00
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
} else {
|
|
|
|
|
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
|
|
|
|
|
|
|
|
|
|
PointerType *PtrTy =
|
2017-12-16 06:22:58 +08:00
|
|
|
|
PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
|
2017-11-02 03:12:38 +08:00
|
|
|
|
AMDGPUAS::CONSTANT_ADDRESS);
|
2017-01-25 09:25:13 +08:00
|
|
|
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
|
|
|
|
auto MMO = MF.getMachineMemOperand(PtrInfo,
|
|
|
|
|
MachineMemOperand::MOLoad |
|
|
|
|
|
MachineMemOperand::MOInvariant |
|
|
|
|
|
MachineMemOperand::MODereferenceable,
|
2019-01-31 09:38:47 +08:00
|
|
|
|
8, 4);
|
2017-01-25 09:25:13 +08:00
|
|
|
|
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
|
2017-06-26 11:01:31 +08:00
|
|
|
|
.addReg(MFI->getImplicitBufferPtrUserSGPR())
|
2017-01-25 09:25:13 +08:00
|
|
|
|
.addImm(0) // offset
|
|
|
|
|
.addImm(0) // glc
|
2019-05-01 06:08:23 +08:00
|
|
|
|
.addImm(0) // dlc
|
2017-01-25 09:25:13 +08:00
|
|
|
|
.addMemOperand(MMO)
|
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
2019-06-01 06:47:36 +08:00
|
|
|
|
|
|
|
|
|
MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
|
|
|
|
|
MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
|
2017-01-25 09:25:13 +08:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
|
|
|
|
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
|
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
|
|
|
|
|
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
|
|
|
|
|
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
|
|
|
|
|
.addImm(Rsrc23 & 0xffffffff)
|
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc3)
|
|
|
|
|
.addImm(Rsrc23 >> 32)
|
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
}
|
2015-12-01 05:15:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-03-30 05:30:06 +08:00
|
|
|
|
// Find a scratch register that we can use at the start of the prologue to
|
|
|
|
|
// re-align the stack pointer. We avoid using callee-save registers since they
|
|
|
|
|
// may appear to be free when this is called from canUseAsPrologue (during
|
|
|
|
|
// shrink wrapping), but then no longer be free when this is called from
|
|
|
|
|
// emitPrologue.
|
|
|
|
|
//
|
|
|
|
|
// FIXME: This is a bit conservative, since in the above case we could use one
|
|
|
|
|
// of the callee-save registers as a scratch temp to re-align the stack pointer,
|
|
|
|
|
// but we would then have to make sure that we were in fact saving at least one
|
|
|
|
|
// callee-save register in the prologue, which is additional complexity that
|
|
|
|
|
// doesn't seem worth the benefit.
|
2019-05-25 02:18:51 +08:00
|
|
|
|
static unsigned findScratchNonCalleeSaveRegister(MachineFunction &MF,
|
|
|
|
|
LivePhysRegs &LiveRegs,
|
|
|
|
|
const TargetRegisterClass &RC) {
|
|
|
|
|
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
|
2018-03-30 05:30:06 +08:00
|
|
|
|
const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
|
|
|
|
|
|
|
|
|
|
// Mark callee saved registers as used so we will not choose them.
|
2019-05-25 02:18:51 +08:00
|
|
|
|
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
|
2018-03-30 05:30:06 +08:00
|
|
|
|
for (unsigned i = 0; CSRegs[i]; ++i)
|
|
|
|
|
LiveRegs.addReg(CSRegs[i]);
|
|
|
|
|
|
2019-05-25 02:18:51 +08:00
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2018-03-30 05:30:06 +08:00
|
|
|
|
|
2019-05-25 02:18:51 +08:00
|
|
|
|
for (unsigned Reg : RC) {
|
2018-03-30 05:30:06 +08:00
|
|
|
|
if (LiveRegs.available(MRI, Reg))
|
|
|
|
|
return Reg;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return AMDGPU::NoRegister;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
|
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|
|
|
|
MachineBasicBlock &MBB) const {
|
2018-03-30 05:30:06 +08:00
|
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
2017-06-27 01:53:59 +08:00
|
|
|
|
if (FuncInfo->isEntryFunction()) {
|
2017-05-18 05:56:25 +08:00
|
|
|
|
emitEntryFunctionPrologue(MF, MBB);
|
2017-06-27 01:53:59 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
2018-07-12 04:59:01 +08:00
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-06-27 01:53:59 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2018-03-30 05:30:06 +08:00
|
|
|
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
2017-06-27 01:53:59 +08:00
|
|
|
|
|
|
|
|
|
unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
|
|
|
|
unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
|
2019-05-25 02:18:51 +08:00
|
|
|
|
LivePhysRegs LiveRegs;
|
2017-06-27 01:53:59 +08:00
|
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.begin();
|
|
|
|
|
DebugLoc DL;
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
bool HasFP = false;
|
2018-03-30 05:30:06 +08:00
|
|
|
|
uint32_t NumBytes = MFI.getStackSize();
|
|
|
|
|
uint32_t RoundedSize = NumBytes;
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
if (TRI.needsStackRealignment(MF)) {
|
|
|
|
|
HasFP = true;
|
2018-03-30 05:30:06 +08:00
|
|
|
|
const unsigned Alignment = MFI.getMaxAlignment();
|
|
|
|
|
|
|
|
|
|
RoundedSize += Alignment;
|
|
|
|
|
|
2019-05-25 02:18:51 +08:00
|
|
|
|
LiveRegs.init(TRI);
|
|
|
|
|
LiveRegs.addLiveIns(MBB);
|
|
|
|
|
|
|
|
|
|
unsigned ScratchSPReg
|
|
|
|
|
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
|
|
|
|
|
AMDGPU::SReg_32_XM0RegClass);
|
2018-03-30 05:30:06 +08:00
|
|
|
|
assert(ScratchSPReg != AMDGPU::NoRegister);
|
|
|
|
|
|
|
|
|
|
// s_add_u32 tmp_reg, s32, NumBytes
|
|
|
|
|
// s_and_b32 s32, tmp_reg, 0b111...0000
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
|
|
|
|
|
.addReg(StackPtrReg)
|
|
|
|
|
.addImm((Alignment - 1) * ST.getWavefrontSize())
|
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
|
|
|
|
|
.addReg(ScratchSPReg, RegState::Kill)
|
|
|
|
|
.addImm(-Alignment * ST.getWavefrontSize())
|
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
|
FuncInfo->setIsStackRealigned(true);
|
2019-06-06 06:20:47 +08:00
|
|
|
|
} else if ((HasFP = hasFP(MF))) {
|
2017-06-27 01:53:59 +08:00
|
|
|
|
// If we need a base pointer, set it up here. It's whatever the value of
|
|
|
|
|
// the stack pointer is at this point. Any variable size objects will be
|
|
|
|
|
// allocated after this, so we can still use the base pointer to reference
|
|
|
|
|
// locals.
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
|
|
|
|
.addReg(StackPtrReg)
|
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
if (HasFP && RoundedSize != 0) {
|
2017-06-27 01:53:59 +08:00
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
|
|
|
|
|
.addReg(StackPtrReg)
|
2018-03-30 05:30:06 +08:00
|
|
|
|
.addImm(RoundedSize * ST.getWavefrontSize())
|
2017-06-27 01:53:59 +08:00
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
|
}
|
2017-08-02 09:52:45 +08:00
|
|
|
|
|
2019-05-29 00:46:02 +08:00
|
|
|
|
// To avoid clobbering VGPRs in lanes that weren't active on function entry,
|
|
|
|
|
// turn on all lanes before doing the spill to memory.
|
|
|
|
|
unsigned ScratchExecCopy = AMDGPU::NoRegister;
|
2019-05-25 02:18:51 +08:00
|
|
|
|
|
2019-05-29 00:46:02 +08:00
|
|
|
|
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
|
|
|
|
: FuncInfo->getSGPRSpillVGPRs()) {
|
|
|
|
|
if (!Reg.FI.hasValue())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (ScratchExecCopy == AMDGPU::NoRegister) {
|
|
|
|
|
if (LiveRegs.empty()) {
|
|
|
|
|
LiveRegs.init(TRI);
|
|
|
|
|
LiveRegs.addLiveIns(MBB);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ScratchExecCopy
|
|
|
|
|
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
|
|
|
|
|
AMDGPU::SReg_64_XEXECRegClass);
|
|
|
|
|
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
|
|
|
|
|
ScratchExecCopy)
|
|
|
|
|
.addImm(-1);
|
2019-05-25 02:18:51 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-05-29 00:46:02 +08:00
|
|
|
|
TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
|
|
|
|
|
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
|
|
|
|
|
&TII->getRegisterInfo());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ScratchExecCopy != AMDGPU::NoRegister) {
|
2019-05-25 02:18:51 +08:00
|
|
|
|
// FIXME: Split block and make terminator.
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
|
|
|
|
|
.addReg(ScratchExecCopy);
|
2017-08-02 09:52:45 +08:00
|
|
|
|
}
|
2017-05-18 05:56:25 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
|
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|
|
|
|
MachineBasicBlock &MBB) const {
|
2017-06-27 01:53:59 +08:00
|
|
|
|
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
if (FuncInfo->isEntryFunction())
|
|
|
|
|
return;
|
2016-06-24 14:30:11 +08:00
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-08-02 09:52:45 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
2019-05-25 02:18:51 +08:00
|
|
|
|
DebugLoc DL;
|
2017-08-02 09:52:45 +08:00
|
|
|
|
|
2019-05-29 00:46:02 +08:00
|
|
|
|
unsigned ScratchExecCopy = AMDGPU::NoRegister;
|
|
|
|
|
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
|
|
|
|
: FuncInfo->getSGPRSpillVGPRs()) {
|
|
|
|
|
if (!Reg.FI.hasValue())
|
|
|
|
|
continue;
|
2019-05-25 02:18:51 +08:00
|
|
|
|
|
2019-05-29 00:46:02 +08:00
|
|
|
|
if (ScratchExecCopy == AMDGPU::NoRegister) {
|
|
|
|
|
// See emitPrologue
|
|
|
|
|
LivePhysRegs LiveRegs(*ST.getRegisterInfo());
|
|
|
|
|
LiveRegs.addLiveIns(MBB);
|
|
|
|
|
|
|
|
|
|
ScratchExecCopy
|
|
|
|
|
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
|
|
|
|
|
AMDGPU::SReg_64_XEXECRegClass);
|
|
|
|
|
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
|
|
|
|
|
.addImm(-1);
|
2019-05-25 02:18:51 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-05-29 00:46:02 +08:00
|
|
|
|
TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
|
|
|
|
|
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
|
|
|
|
|
&TII->getRegisterInfo());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ScratchExecCopy != AMDGPU::NoRegister) {
|
2019-05-25 02:18:51 +08:00
|
|
|
|
// FIXME: Split block and make terminator.
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
|
|
|
|
|
.addReg(ScratchExecCopy);
|
2017-08-02 09:52:45 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
if (hasFP(MF)) {
|
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
|
|
|
uint32_t NumBytes = MFI.getStackSize();
|
2018-03-30 05:30:06 +08:00
|
|
|
|
uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
|
|
|
|
|
NumBytes + MFI.getMaxAlignment() : NumBytes;
|
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
2017-06-27 01:53:59 +08:00
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
|
|
|
|
.addReg(StackPtrReg)
|
2019-06-06 06:20:47 +08:00
|
|
|
|
.addImm(RoundedSize * ST.getWavefrontSize())
|
|
|
|
|
.setMIFlag(MachineInstr::FrameDestroy);
|
2017-06-27 01:53:59 +08:00
|
|
|
|
}
|
2016-06-24 14:30:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 06:23:32 +08:00
|
|
|
|
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
|
|
|
|
|
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
if (!MFI.isDeadObjectIndex(I))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-11 03:39:07 +08:00
|
|
|
|
int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|
|
|
|
unsigned &FrameReg) const {
|
2018-07-12 04:59:01 +08:00
|
|
|
|
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
2017-03-11 03:39:07 +08:00
|
|
|
|
|
|
|
|
|
FrameReg = RI->getFrameRegister(MF);
|
|
|
|
|
return MF.getFrameInfo().getObjectOffset(FI);
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-07 02:17:45 +08:00
|
|
|
|
void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
|
|
|
|
MachineFunction &MF,
|
|
|
|
|
RegScavenger *RS) const {
|
2016-07-29 02:40:00 +08:00
|
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
|
2016-07-29 02:40:00 +08:00
|
|
|
|
if (!MFI.hasStackObjects())
|
2015-12-01 05:15:53 +08:00
|
|
|
|
return;
|
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-02-23 06:23:32 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
|
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
bool AllSGPRSpilledToVGPRs = false;
|
|
|
|
|
|
|
|
|
|
if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
|
|
|
|
|
AllSGPRSpilledToVGPRs = true;
|
|
|
|
|
|
|
|
|
|
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
|
|
|
|
|
// are spilled to VGPRs, in which case we can eliminate the stack usage.
|
|
|
|
|
//
|
|
|
|
|
// XXX - This operates under the assumption that only other SGPR spills are
|
|
|
|
|
// users of the frame index. I'm not 100% sure this is correct. The
|
|
|
|
|
// StackColoring pass has a comment saying a future improvement would be to
|
|
|
|
|
// merging of allocas with spill slots, but for now according to
|
|
|
|
|
// MachineFrameInfo isSpillSlot can't alias any other object.
|
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
|
|
|
MachineBasicBlock::iterator Next;
|
|
|
|
|
for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
|
|
|
|
|
MachineInstr &MI = *I;
|
|
|
|
|
Next = std::next(I);
|
|
|
|
|
|
|
|
|
|
if (TII->isSGPRSpill(MI)) {
|
|
|
|
|
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
|
2018-04-23 23:51:26 +08:00
|
|
|
|
assert(MFI.getStackID(FI) == SIStackID::SGPR_SPILL);
|
2017-02-23 06:23:32 +08:00
|
|
|
|
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
|
|
|
|
|
bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
|
|
|
|
|
(void)Spilled;
|
|
|
|
|
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
|
|
|
|
|
} else
|
|
|
|
|
AllSGPRSpilledToVGPRs = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-02 17:46:52 +08:00
|
|
|
|
FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
|
|
|
|
|
|
2017-02-23 06:23:32 +08:00
|
|
|
|
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
|
|
|
|
|
// but currently hasNonSpillStackObjects is set only from source
|
|
|
|
|
// allocas. Stack temps produced from legalization are not counted currently.
|
|
|
|
|
if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
|
|
|
|
|
!AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
|
|
|
|
|
assert(RS && "RegScavenger required if spilling");
|
|
|
|
|
|
2019-06-06 06:37:50 +08:00
|
|
|
|
if (FuncInfo->isEntryFunction()) {
|
|
|
|
|
int ScavengeFI = MFI.CreateFixedObject(
|
|
|
|
|
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
|
|
|
|
|
RS->addScavengingFrameIndex(ScavengeFI);
|
|
|
|
|
} else {
|
|
|
|
|
int ScavengeFI = MFI.CreateStackObject(
|
|
|
|
|
TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
|
|
|
|
|
TRI.getSpillAlignment(AMDGPU::SGPR_32RegClass),
|
|
|
|
|
false);
|
|
|
|
|
RS->addScavengingFrameIndex(ScavengeFI);
|
|
|
|
|
}
|
2017-02-23 05:05:25 +08:00
|
|
|
|
}
|
2015-11-07 02:17:45 +08:00
|
|
|
|
}
|
2016-06-25 11:11:28 +08:00
|
|
|
|
|
2017-09-14 07:47:01 +08:00
|
|
|
|
void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
|
|
|
|
RegScavenger *RS) const {
|
|
|
|
|
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
|
|
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
|
|
// The SP is specifically managed and we don't want extra spills of it.
|
|
|
|
|
SavedRegs.reset(MFI->getStackPtrOffsetReg());
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-02 03:54:18 +08:00
|
|
|
|
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
|
|
|
|
MachineFunction &MF,
|
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
|
MachineBasicBlock::iterator I) const {
|
|
|
|
|
int64_t Amount = I->getOperand(0).getImm();
|
|
|
|
|
if (Amount == 0)
|
|
|
|
|
return MBB.erase(I);
|
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-08-02 03:54:18 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const DebugLoc &DL = I->getDebugLoc();
|
|
|
|
|
unsigned Opc = I->getOpcode();
|
|
|
|
|
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
|
|
|
|
|
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
|
|
|
|
|
|
|
|
|
|
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
|
|
|
|
|
if (!TFI->hasReservedCallFrame(MF)) {
|
|
|
|
|
unsigned Align = getStackAlignment();
|
|
|
|
|
|
|
|
|
|
Amount = alignTo(Amount, Align);
|
|
|
|
|
assert(isUInt<32>(Amount) && "exceeded stack address space size");
|
|
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
unsigned SPReg = MFI->getStackPtrOffsetReg();
|
|
|
|
|
|
|
|
|
|
unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
|
|
|
|
|
BuildMI(MBB, I, DL, TII->get(Op), SPReg)
|
|
|
|
|
.addReg(SPReg)
|
|
|
|
|
.addImm(Amount * ST.getWavefrontSize());
|
|
|
|
|
} else if (CalleePopAmount != 0) {
|
|
|
|
|
llvm_unreachable("is this used?");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return MBB.erase(I);
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-27 01:53:59 +08:00
|
|
|
|
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
2019-06-06 06:20:47 +08:00
|
|
|
|
if (MFI.hasCalls()) {
|
|
|
|
|
// All offsets are unsigned, so need to be addressed in the same direction
|
|
|
|
|
// as stack growth.
|
|
|
|
|
if (MFI.getStackSize() != 0)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// For the entry point, the input wave scratch offset must be copied to the
|
|
|
|
|
// API SP if there are calls.
|
|
|
|
|
if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// Retain behavior of always omitting the FP for leaf functions when
|
|
|
|
|
// possible.
|
|
|
|
|
if (MF.getTarget().Options.DisableFramePointerElim(MF))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2017-06-27 01:53:59 +08:00
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
|
return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
|
|
|
|
|
MFI.hasStackMap() || MFI.hasPatchPoint() ||
|
|
|
|
|
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);
|
2017-06-27 01:53:59 +08:00
|
|
|
|
}
|