forked from OSchip/llvm-project
AMDGPU: Refactor frame lowering
This will make future changes easier. llvm-svn: 280296
This commit is contained in:
parent
1b54baef15
commit
57bc4324f8
|
@ -37,6 +37,145 @@ static ArrayRef<MCPhysReg> getAllSGPRs() {
|
|||
AMDGPU::SGPR_32RegClass.getNumRegs());
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
|
||||
const SIRegisterInfo* TRI,
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
// We don't need this if we only have spills since there is no user facing
|
||||
// scratch.
|
||||
|
||||
// TODO: If we know we don't have flat instructions earlier, we can omit
|
||||
// this from the input registers.
|
||||
//
|
||||
// TODO: We only need to know if we access scratch space through a flat
|
||||
// pointer. Because we only detect if flat instructions are used at all,
|
||||
// this will be used more often than necessary on VI.
|
||||
|
||||
// Debug location must be unknown since the first debug location is used to
|
||||
// determine the end of the prologue.
|
||||
DebugLoc DL;
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
|
||||
unsigned FlatScratchInitReg
|
||||
= TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
|
||||
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MRI.addLiveIn(FlatScratchInitReg);
|
||||
MBB.addLiveIn(FlatScratchInitReg);
|
||||
|
||||
// Copy the size in bytes.
|
||||
unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::FLAT_SCR_LO)
|
||||
.addReg(FlatScrInitHi, RegState::Kill);
|
||||
|
||||
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
|
||||
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
|
||||
|
||||
|
||||
// Add wave offset in bytes to private base offset.
|
||||
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
||||
.addReg(FlatScrInitLo)
|
||||
.addReg(ScratchWaveOffsetReg);
|
||||
|
||||
// Convert offset to 256-byte units.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
|
||||
.addReg(FlatScrInitLo, RegState::Kill)
|
||||
.addImm(8);
|
||||
}
|
||||
|
||||
unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
|
||||
const SISubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
MachineFunction &MF) const {
|
||||
|
||||
// We need to insert initialization of the scratch resource descriptor.
|
||||
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
|
||||
assert(ScratchRsrcReg != AMDGPU::NoRegister);
|
||||
|
||||
if (ST.hasSGPRInitBug() ||
|
||||
ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
|
||||
return ScratchRsrcReg;
|
||||
|
||||
// We reserved the last registers for this. Shift it down to the end of those
|
||||
// which were actually used.
|
||||
//
|
||||
// FIXME: It might be safer to use a pseudoregister before replacement.
|
||||
|
||||
// FIXME: We should be able to eliminate unused input registers. We only
|
||||
// cannot do this for the resources required for scratch access. For now we
|
||||
// skip over user SGPRs and may leave unused holes.
|
||||
|
||||
// We find the resource first because it has an alignment requirement.
|
||||
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
// this is the 2nd to last element already.
|
||||
for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated one. Make sure we don't clobber the other
|
||||
// reserved input we needed.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
assert(MRI.isAllocatable(Reg));
|
||||
MRI.replaceRegWith(ScratchRsrcReg, Reg);
|
||||
MFI->setScratchRSrcReg(Reg);
|
||||
return Reg;
|
||||
}
|
||||
}
|
||||
|
||||
return ScratchRsrcReg;
|
||||
}
|
||||
|
||||
unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
const SISubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
MachineFunction &MF) const {
|
||||
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
|
||||
if (ST.hasSGPRInitBug() ||
|
||||
ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF))
|
||||
return ScratchWaveOffsetReg;
|
||||
|
||||
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
||||
|
||||
// We need to drop register from the end of the list that we cannot use
|
||||
// for the scratch wave offset.
|
||||
// + 2 s102 and s103 do not exist on VI.
|
||||
// + 2 for vcc
|
||||
// + 2 for xnack_mask
|
||||
// + 2 for flat_scratch
|
||||
// + 4 for registers reserved for scratch resource register
|
||||
// + 1 for register reserved for scratch wave offset. (By exluding this
|
||||
// register from the list to consider, it means that when this
|
||||
// register is being used for the scratch wave offset and there
|
||||
// are no other free SGPRs, then the value will stay in this register.
|
||||
// ----
|
||||
// 13
|
||||
for (MCPhysReg Reg : getAllSGPRs().drop_back(13).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
|
||||
// scratch descriptor, since we haven’t added its uses yet.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
if (!MRI.isAllocatable(Reg) ||
|
||||
TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
|
||||
continue;
|
||||
|
||||
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
|
||||
MFI->setScratchWaveOffsetReg(Reg);
|
||||
return Reg;
|
||||
}
|
||||
}
|
||||
|
||||
return ScratchWaveOffsetReg;
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
|
||||
|
@ -63,15 +202,19 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
|
||||
unsigned ScratchRsrcReg
|
||||
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
|
||||
unsigned ScratchWaveOffsetReg
|
||||
= getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
|
||||
assert(ScratchRsrcReg != AMDGPU::NoRegister);
|
||||
assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
|
||||
assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
|
||||
|
||||
if (MFI->hasFlatScratchInit())
|
||||
emitFlatScratchInit(TII, TRI, MF, MBB);
|
||||
|
||||
// We need to insert initialization of the scratch resource descriptor.
|
||||
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
|
||||
assert(ScratchRsrcReg != AMDGPU::NoRegister);
|
||||
|
||||
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
|
||||
assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
|
||||
|
||||
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
|
||||
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
|
||||
|
||||
|
@ -81,46 +224,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
|
||||
}
|
||||
|
||||
if (MFI->hasFlatScratchInit()) {
|
||||
// We don't need this if we only have spills since there is no user facing
|
||||
// scratch.
|
||||
|
||||
// TODO: If we know we don't have flat instructions earlier, we can omit
|
||||
// this from the input registers.
|
||||
//
|
||||
// TODO: We only need to know if we access scratch space through a flat
|
||||
// pointer. Because we only detect if flat instructions are used at all,
|
||||
// this will be used more often than necessary on VI.
|
||||
|
||||
// Debug location must be unknown since the first debug location is used to
|
||||
// determine the end of the prologue.
|
||||
DebugLoc DL;
|
||||
|
||||
unsigned FlatScratchInitReg
|
||||
= TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
|
||||
|
||||
MRI.addLiveIn(FlatScratchInitReg);
|
||||
MBB.addLiveIn(FlatScratchInitReg);
|
||||
|
||||
// Copy the size in bytes.
|
||||
unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::FLAT_SCR_LO)
|
||||
.addReg(FlatScrInitHi, RegState::Kill);
|
||||
|
||||
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
|
||||
|
||||
// Add wave offset in bytes to private base offset.
|
||||
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
||||
.addReg(FlatScrInitLo)
|
||||
.addReg(ScratchWaveOffsetReg);
|
||||
|
||||
// Convert offset to 256-byte units.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
|
||||
.addReg(FlatScrInitLo, RegState::Kill)
|
||||
.addImm(8);
|
||||
}
|
||||
|
||||
// If we reserved the original input registers, we don't need to copy to the
|
||||
// reserved registers.
|
||||
if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
|
||||
|
@ -130,7 +233,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
// We added live-ins during argument lowering, but since they were not used
|
||||
// they were deleted. We're adding the uses now, so add them back.
|
||||
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
|
||||
|
@ -141,79 +243,23 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
MBB.addLiveIn(PreloadedPrivateBufferReg);
|
||||
}
|
||||
|
||||
if (!ST.hasSGPRInitBug()) {
|
||||
// We reserved the last registers for this. Shift it down to the end of those
|
||||
// which were actually used.
|
||||
//
|
||||
// FIXME: It might be safer to use a pseudoregister before replacement.
|
||||
// Make the register selected live throughout the function.
|
||||
for (MachineBasicBlock &OtherBB : MF) {
|
||||
if (&OtherBB == &MBB)
|
||||
continue;
|
||||
|
||||
// FIXME: We should be able to eliminate unused input registers. We only
|
||||
// cannot do this for the resources required for scratch access. For now we
|
||||
// skip over user SGPRs and may leave unused holes.
|
||||
|
||||
// We find the resource first because it has an alignment requirement.
|
||||
if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
// this is the 2nd to last element already.
|
||||
for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated one. Make sure we don't clobber the other
|
||||
// reserved input we needed.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
assert(MRI.isAllocatable(Reg));
|
||||
MRI.replaceRegWith(ScratchRsrcReg, Reg);
|
||||
ScratchRsrcReg = Reg;
|
||||
MFI->setScratchRSrcReg(ScratchRsrcReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
||||
|
||||
// We need to drop register from the end of the list that we cannot use
|
||||
// for the scratch wave offset.
|
||||
// + 2 s102 and s103 do not exist on VI.
|
||||
// + 2 for vcc
|
||||
// + 2 for xnack_mask
|
||||
// + 2 for flat_scratch
|
||||
// + 4 for registers reserved for scratch resource register
|
||||
// + 1 for register reserved for scratch wave offset. (By exluding this
|
||||
// register from the list to consider, it means that when this
|
||||
// register is being used for the scratch wave offset and there
|
||||
// are no other free SGPRs, then the value will stay in this register.
|
||||
// ----
|
||||
// 13
|
||||
for (MCPhysReg Reg : getAllSGPRs().drop_back(13).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
|
||||
// scratch descriptor, since we haven’t added its uses yet.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
if (!MRI.isAllocatable(Reg) ||
|
||||
TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
|
||||
continue;
|
||||
|
||||
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
|
||||
ScratchWaveOffsetReg = Reg;
|
||||
MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
OtherBB.addLiveIn(ScratchRsrcReg);
|
||||
OtherBB.addLiveIn(ScratchWaveOffsetReg);
|
||||
}
|
||||
|
||||
|
||||
assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
|
||||
|
||||
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
||||
DebugLoc DL;
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
|
||||
if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
|
||||
// Make sure we emit the copy for the offset first. We may have chosen to copy
|
||||
// the buffer resource into a register that aliases the input offset register.
|
||||
// Make sure we emit the copy for the offset first. We may have chosen to
|
||||
// copy the buffer resource into a register that aliases the input offset
|
||||
// register.
|
||||
BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg)
|
||||
.addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
|
||||
}
|
||||
|
@ -260,15 +306,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
.addImm(Rsrc23 >> 32)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
}
|
||||
|
||||
// Make the register selected live throughout the function.
|
||||
for (MachineBasicBlock &OtherBB : MF) {
|
||||
if (&OtherBB == &MBB)
|
||||
continue;
|
||||
|
||||
OtherBB.addLiveIn(ScratchRsrcReg);
|
||||
OtherBB.addLiveIn(ScratchWaveOffsetReg);
|
||||
}
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
#include "AMDGPUFrameLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
class SIInstrInfo;
|
||||
class SIMachineFunctionInfo;
|
||||
class SIRegisterInfo;
|
||||
class SISubtarget;
|
||||
|
||||
class SIFrameLowering final : public AMDGPUFrameLowering {
|
||||
public:
|
||||
|
@ -31,6 +35,25 @@ public:
|
|||
RegScavenger *RS = nullptr) const override;
|
||||
|
||||
private:
|
||||
void emitFlatScratchInit(const SIInstrInfo *TII,
|
||||
const SIRegisterInfo* TRI,
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const;
|
||||
|
||||
unsigned getReservedPrivateSegmentBufferReg(
|
||||
const SISubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
unsigned getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
const SISubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
/// \brief Emits debugger prologue.
|
||||
void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue