forked from OSchip/llvm-project
[AMDGPU] Move register related queries to subtarget class
Differential Revision: https://reviews.llvm.org/D29318 llvm-svn: 294440
This commit is contained in:
parent
fd0dda765e
commit
e03b1d7b6a
|
@ -473,26 +473,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
ExtraSGPRs = 6;
|
||||
}
|
||||
|
||||
// Record first reserved register and reserved register count fields, and
|
||||
// update max register counts if "amdgpu-debugger-reserve-regs" attribute was
|
||||
// requested.
|
||||
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
|
||||
ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
|
||||
|
||||
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
||||
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
||||
// attribute was requested.
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
|
||||
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
|
||||
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
|
||||
RI->getHWRegIndex(MFI->getScratchRSrcReg());
|
||||
}
|
||||
unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
|
||||
|
||||
// Check the addressable register limit before we add ExtraSGPRs.
|
||||
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||
!STM.hasSGPRInitBug()) {
|
||||
unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
|
||||
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
|
||||
if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
|
||||
// This can happen due to a compiler bug or when using inline asm.
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
|
@ -507,23 +493,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
|
||||
// Account for extra SGPRs and VGPRs reserved for debugger use.
|
||||
MaxSGPR += ExtraSGPRs;
|
||||
MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
|
||||
MaxVGPR += ExtraVGPRs;
|
||||
|
||||
// We found the maximum register index. They start at 0, so add one to get the
|
||||
// number of registers.
|
||||
ProgInfo.NumVGPR = MaxVGPR + 1;
|
||||
ProgInfo.NumSGPR = MaxSGPR + 1;
|
||||
ProgInfo.NumVGPR = MaxVGPR + 1;
|
||||
|
||||
// Adjust number of registers used to meet default/requested minimum/maximum
|
||||
// number of waves per execution unit request.
|
||||
ProgInfo.NumSGPRsForWavesPerEU = std::max(
|
||||
ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
|
||||
ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
|
||||
ProgInfo.NumVGPRsForWavesPerEU = std::max(
|
||||
ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
|
||||
ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
|
||||
|
||||
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
|
||||
STM.hasSGPRInitBug()) {
|
||||
unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
|
||||
unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs();
|
||||
if (ProgInfo.NumSGPR > MaxNumSGPRs) {
|
||||
// This can happen due to a compiler bug or when using inline asm to use the
|
||||
// registers which are usually reserved for vcc etc.
|
||||
|
@ -560,13 +546,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
|
||||
// SGPRBlocks is actual number of SGPR blocks minus 1.
|
||||
ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
|
||||
RI->getSGPRAllocGranule());
|
||||
ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
|
||||
STM.getSGPRAllocGranule());
|
||||
ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPRAllocGranule() - 1;
|
||||
|
||||
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
||||
ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
|
||||
RI->getVGPRAllocGranule());
|
||||
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
|
||||
STM.getVGPRAllocGranule());
|
||||
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPRAllocGranule() - 1;
|
||||
|
||||
// Record first reserved register and reserved register count fields, and
|
||||
// update max register counts if "amdgpu-debugger-reserve-regs" attribute was
|
||||
// requested.
|
||||
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
|
||||
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
|
||||
|
||||
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
||||
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
||||
// attribute was requested.
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
|
||||
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
|
||||
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
|
||||
RI->getHWRegIndex(MFI->getScratchRSrcReg());
|
||||
}
|
||||
|
||||
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
|
||||
// register.
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
@ -322,12 +323,179 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
|
|||
return 1;
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumSGPRs() const {
|
||||
if (hasSGPRInitBug())
|
||||
return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
|
||||
|
||||
if (getGeneration() >= VOLCANIC_ISLANDS)
|
||||
return 102;
|
||||
|
||||
return 104;
|
||||
unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 0;
|
||||
case 10: return 0;
|
||||
case 9: return 0;
|
||||
case 8: return 81;
|
||||
default: return 97;
|
||||
}
|
||||
} else {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 0;
|
||||
case 10: return 0;
|
||||
case 9: return 49;
|
||||
case 8: return 57;
|
||||
case 7: return 65;
|
||||
case 6: return 73;
|
||||
case 5: return 81;
|
||||
default: return 97;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
|
||||
bool Addressable) const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 80;
|
||||
case 10: return 80;
|
||||
case 9: return 80;
|
||||
case 8: return 96;
|
||||
default: return Addressable ? getAddressableNumSGPRs() : 112;
|
||||
}
|
||||
} else {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 48;
|
||||
case 10: return 48;
|
||||
case 9: return 56;
|
||||
case 8: return 64;
|
||||
case 7: return 72;
|
||||
case 6: return 80;
|
||||
case 5: return 96;
|
||||
default: return getAddressableNumSGPRs();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (MFI.hasFlatScratchInit()) {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
|
||||
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
|
||||
return 4; // FLAT_SCRATCH, VCC (in that order).
|
||||
}
|
||||
|
||||
if (isXNACKEnabled())
|
||||
return 4; // XNACK, VCC (in that order).
|
||||
return 2; // VCC.
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = *MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// Compute maximum number of SGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
|
||||
unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
|
||||
|
||||
// Check if maximum number of SGPRs was explicitly requested using
|
||||
// "amdgpu-num-sgpr" attribute.
|
||||
if (F.hasFnAttribute("amdgpu-num-sgpr")) {
|
||||
unsigned Requested = AMDGPU::getIntegerAttribute(
|
||||
F, "amdgpu-num-sgpr", MaxNumSGPRs);
|
||||
|
||||
// Make sure requested value does not violate subtarget's specifications.
|
||||
if (Requested && (Requested <= getReservedNumSGPRs(MF)))
|
||||
Requested = 0;
|
||||
|
||||
// If more SGPRs are required to support the input user/system SGPRs,
|
||||
// increase to accommodate them.
|
||||
//
|
||||
// FIXME: This really ends up using the requested number of SGPRs + number
|
||||
// of reserved special registers in total. Theoretically you could re-use
|
||||
// the last input registers for these special registers, but this would
|
||||
// require a lot of complexity to deal with the weird aliasing.
|
||||
unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
|
||||
if (Requested && Requested < InputNumSGPRs)
|
||||
Requested = InputNumSGPRs;
|
||||
|
||||
// Make sure requested value is compatible with values implied by
|
||||
// default/requested minimum/maximum number of waves per execution unit.
|
||||
if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
|
||||
Requested = 0;
|
||||
if (WavesPerEU.second &&
|
||||
Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
|
||||
Requested = 0;
|
||||
|
||||
if (Requested)
|
||||
MaxNumSGPRs = Requested;
|
||||
}
|
||||
|
||||
if (hasSGPRInitBug())
|
||||
MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
|
||||
|
||||
return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
|
||||
MaxAddressableNumSGPRs);
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 0;
|
||||
case 10: return 0;
|
||||
case 9: return 25;
|
||||
case 8: return 29;
|
||||
case 7: return 33;
|
||||
case 6: return 37;
|
||||
case 5: return 41;
|
||||
case 4: return 49;
|
||||
case 3: return 65;
|
||||
case 2: return 85;
|
||||
default: return 129;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 24;
|
||||
case 10: return 24;
|
||||
case 9: return 28;
|
||||
case 8: return 32;
|
||||
case 7: return 36;
|
||||
case 6: return 40;
|
||||
case 5: return 48;
|
||||
case 4: return 64;
|
||||
case 3: return 84;
|
||||
case 2: return 128;
|
||||
default: return getTotalNumVGPRs();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = *MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// Compute maximum number of VGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
|
||||
|
||||
// Check if maximum number of VGPRs was explicitly requested using
|
||||
// "amdgpu-num-vgpr" attribute.
|
||||
if (F.hasFnAttribute("amdgpu-num-vgpr")) {
|
||||
unsigned Requested = AMDGPU::getIntegerAttribute(
|
||||
F, "amdgpu-num-vgpr", MaxNumVGPRs);
|
||||
|
||||
// Make sure requested value does not violate subtarget's specifications.
|
||||
if (Requested && Requested <= getReservedNumVGPRs(MF))
|
||||
Requested = 0;
|
||||
|
||||
// Make sure requested value is compatible with values implied by
|
||||
// default/requested minimum/maximum number of waves per execution unit.
|
||||
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
|
||||
Requested = 0;
|
||||
if (WavesPerEU.second &&
|
||||
Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
|
||||
Requested = 0;
|
||||
|
||||
if (Requested)
|
||||
MaxNumVGPRs = Requested;
|
||||
}
|
||||
|
||||
return MaxNumVGPRs - getReservedNumVGPRs(MF);
|
||||
}
|
||||
|
|
|
@ -642,7 +642,85 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
unsigned getMaxNumSGPRs() const;
|
||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||
unsigned getSGPRAllocGranule() const {
|
||||
return 8;
|
||||
}
|
||||
|
||||
/// \returns Total number of SGPRs supported by the subtarget.
|
||||
unsigned getTotalNumSGPRs() const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 800;
|
||||
return 512;
|
||||
}
|
||||
|
||||
/// \returns Addressable number of SGPRs supported by the subtarget.
|
||||
unsigned getAddressableNumSGPRs() const {
|
||||
if (hasSGPRInitBug())
|
||||
return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
|
||||
if (getGeneration() >= VOLCANIC_ISLANDS)
|
||||
return 102;
|
||||
return 104;
|
||||
}
|
||||
|
||||
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMinNumSGPRs(unsigned WavesPerEU) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const;
|
||||
|
||||
/// \returns Reserved number of SGPRs for given function \p MF.
|
||||
unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of SGPRs explicitly
|
||||
/// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
|
||||
///
|
||||
/// \returns Value that meets number of waves per execution unit requirement
|
||||
/// if explicitly requested value cannot be converted to integer, violates
|
||||
/// subtarget's specifications, or does not meet number of waves per execution
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
|
||||
|
||||
/// \returns VGPR allocation granularity supported by the subtarget.
|
||||
unsigned getVGPRAllocGranule() const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
/// \returns Total number of VGPRs supported by the subtarget.
|
||||
unsigned getTotalNumVGPRs() const {
|
||||
return 256;
|
||||
}
|
||||
|
||||
/// \returns Addressable number of VGPRs supported by the subtarget.
|
||||
unsigned getAddressableNumVGPRs() const {
|
||||
return getTotalNumVGPRs();
|
||||
}
|
||||
|
||||
/// \returns Minimum number of VGPRs that meets given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
|
||||
|
||||
/// \returns Reserved number of VGPRs for given function \p MF.
|
||||
unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
|
||||
return debuggerReserveRegs() ? 4 : 0;
|
||||
}
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of VGPRs explicitly
|
||||
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
|
||||
///
|
||||
/// \returns Value that meets number of waves per execution unit requirement
|
||||
/// if explicitly requested value cannot be converted to integer, violates
|
||||
/// subtarget's specifications, or does not meet number of waves per execution
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -145,8 +145,8 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
|||
unsigned VGPRExcessLimit =
|
||||
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
|
||||
unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
|
||||
unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
|
||||
unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
|
||||
unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true);
|
||||
unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves);
|
||||
|
||||
ReadyQueue &Q = Zone.Available;
|
||||
for (SUnit *SU : Q) {
|
||||
|
|
|
@ -21,16 +21,16 @@
|
|||
using namespace llvm;
|
||||
|
||||
|
||||
static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF,
|
||||
const SIRegisterInfo *TRI) {
|
||||
static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
|
||||
const MachineFunction &MF) {
|
||||
return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
|
||||
TRI->getMaxNumSGPRs(MF) / 4);
|
||||
ST.getMaxNumSGPRs(MF) / 4);
|
||||
}
|
||||
|
||||
static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF,
|
||||
const SIRegisterInfo *TRI) {
|
||||
static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
|
||||
const MachineFunction &MF) {
|
||||
return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
|
||||
TRI->getMaxNumSGPRs(MF));
|
||||
ST.getMaxNumSGPRs(MF));
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
|
||||
|
@ -111,7 +111,7 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
|
|||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
|
||||
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(MF, TRI);
|
||||
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
|
||||
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
|
||||
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
|
@ -146,7 +146,7 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
|||
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
||||
|
||||
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(MF, TRI);
|
||||
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
|
||||
if (NumPreloaded > AllSGPRs.size())
|
||||
return ScratchWaveOffsetReg;
|
||||
|
||||
|
|
|
@ -97,14 +97,18 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
|
|||
|
||||
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
|
||||
const MachineFunction &MF) const {
|
||||
unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
|
||||
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
|
||||
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
|
||||
const MachineFunction &MF) const {
|
||||
unsigned RegCount = getMaxNumSGPRs(MF);
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
unsigned RegCount = ST.getMaxNumSGPRs(MF);
|
||||
unsigned Reg;
|
||||
|
||||
// Try to place it in a hole after PrivateSegmentbufferReg.
|
||||
|
@ -139,14 +143,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
|
||||
|
||||
unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
|
||||
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
|
||||
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
|
||||
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
|
||||
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
|
||||
reserveRegisterTuples(Reserved, Reg);
|
||||
}
|
||||
|
||||
unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
|
||||
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
|
||||
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
|
||||
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
|
||||
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
|
||||
|
@ -1156,210 +1162,6 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
|
|||
return AMDGPU::NoRegister;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 800;
|
||||
return 512;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 102;
|
||||
return 104;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
|
||||
const SIMachineFunctionInfo &MFI) const {
|
||||
if (MFI.hasFlatScratchInit()) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
|
||||
|
||||
if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
|
||||
return 4; // FLAT_SCRATCH, VCC (in that order)
|
||||
}
|
||||
|
||||
if (ST.isXNACKEnabled())
|
||||
return 4; // XNACK, VCC (in that order)
|
||||
|
||||
return 2; // VCC.
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
|
||||
unsigned WavesPerEU) const {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 0;
|
||||
case 10: return 0;
|
||||
case 9: return 0;
|
||||
case 8: return 81;
|
||||
default: return 97;
|
||||
}
|
||||
} else {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 0;
|
||||
case 10: return 0;
|
||||
case 9: return 49;
|
||||
case 8: return 57;
|
||||
case 7: return 65;
|
||||
case 6: return 73;
|
||||
case 5: return 81;
|
||||
default: return 97;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
|
||||
unsigned WavesPerEU,
|
||||
bool Addressable) const {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 80;
|
||||
case 10: return 80;
|
||||
case 9: return 80;
|
||||
case 8: return 96;
|
||||
default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
|
||||
}
|
||||
} else {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 48;
|
||||
case 10: return 48;
|
||||
case 9: return 56;
|
||||
case 8: return 64;
|
||||
case 7: return 72;
|
||||
case 6: return 80;
|
||||
case 5: return 96;
|
||||
default: return getNumAddressableSGPRs(ST);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = *MF.getFunction();
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// Compute maximum number of SGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
|
||||
unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
|
||||
|
||||
// Check if maximum number of SGPRs was explicitly requested using
|
||||
// "amdgpu-num-sgpr" attribute.
|
||||
if (F.hasFnAttribute("amdgpu-num-sgpr")) {
|
||||
unsigned Requested = AMDGPU::getIntegerAttribute(
|
||||
F, "amdgpu-num-sgpr", MaxNumSGPRs);
|
||||
|
||||
// Make sure requested value does not violate subtarget's specifications.
|
||||
if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
|
||||
Requested = 0;
|
||||
|
||||
// If more SGPRs are required to support the input user/system SGPRs,
|
||||
// increase to accommodate them.
|
||||
//
|
||||
// FIXME: This really ends up using the requested number of SGPRs + number
|
||||
// of reserved special registers in total. Theoretically you could re-use
|
||||
// the last input registers for these special registers, but this would
|
||||
// require a lot of complexity to deal with the weird aliasing.
|
||||
unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
|
||||
if (Requested && Requested < NumInputSGPRs)
|
||||
Requested = NumInputSGPRs;
|
||||
|
||||
// Make sure requested value is compatible with values implied by
|
||||
// default/requested minimum/maximum number of waves per execution unit.
|
||||
if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
|
||||
Requested = 0;
|
||||
if (WavesPerEU.second &&
|
||||
Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
|
||||
Requested = 0;
|
||||
|
||||
if (Requested)
|
||||
MaxNumSGPRs = Requested;
|
||||
}
|
||||
|
||||
if (ST.hasSGPRInitBug())
|
||||
MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
|
||||
|
||||
return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
|
||||
MaxNumAddressableSGPRs);
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
|
||||
const SISubtarget &ST) const {
|
||||
if (ST.debuggerReserveRegs())
|
||||
return 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 0;
|
||||
case 10: return 0;
|
||||
case 9: return 25;
|
||||
case 8: return 29;
|
||||
case 7: return 33;
|
||||
case 6: return 37;
|
||||
case 5: return 41;
|
||||
case 4: return 49;
|
||||
case 3: return 65;
|
||||
case 2: return 85;
|
||||
default: return 129;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 24;
|
||||
case 10: return 24;
|
||||
case 9: return 28;
|
||||
case 8: return 32;
|
||||
case 7: return 36;
|
||||
case 6: return 40;
|
||||
case 5: return 48;
|
||||
case 4: return 64;
|
||||
case 3: return 84;
|
||||
case 2: return 128;
|
||||
default: return getTotalNumVGPRs();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = *MF.getFunction();
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// Compute maximum number of VGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
|
||||
|
||||
// Check if maximum number of VGPRs was explicitly requested using
|
||||
// "amdgpu-num-vgpr" attribute.
|
||||
if (F.hasFnAttribute("amdgpu-num-vgpr")) {
|
||||
unsigned Requested = AMDGPU::getIntegerAttribute(
|
||||
F, "amdgpu-num-vgpr", MaxNumVGPRs);
|
||||
|
||||
// Make sure requested value does not violate subtarget's specifications.
|
||||
if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
|
||||
Requested = 0;
|
||||
|
||||
// Make sure requested value is compatible with values implied by
|
||||
// default/requested minimum/maximum number of waves per execution unit.
|
||||
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
|
||||
Requested = 0;
|
||||
if (WavesPerEU.second &&
|
||||
Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
|
||||
Requested = 0;
|
||||
|
||||
if (Requested)
|
||||
MaxNumVGPRs = Requested;
|
||||
}
|
||||
|
||||
return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
|
||||
}
|
||||
|
||||
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
|
||||
unsigned EltSize) const {
|
||||
if (EltSize == 4) {
|
||||
|
|
|
@ -195,72 +195,6 @@ public:
|
|||
return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
|
||||
}
|
||||
|
||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||
unsigned getSGPRAllocGranule() const {
|
||||
return 8;
|
||||
}
|
||||
|
||||
/// \returns Total number of SGPRs supported by the subtarget.
|
||||
unsigned getTotalNumSGPRs(const SISubtarget &ST) const;
|
||||
|
||||
/// \returns Number of addressable SGPRs supported by the subtarget.
|
||||
unsigned getNumAddressableSGPRs(const SISubtarget &ST) const;
|
||||
|
||||
/// \returns Number of reserved SGPRs supported by the subtarget.
|
||||
unsigned getNumReservedSGPRs(const SISubtarget &ST,
|
||||
const SIMachineFunctionInfo &MFI) const;
|
||||
|
||||
/// \returns Minimum number of SGPRs that meets given number of waves per
|
||||
/// execution unit requirement for given subtarget.
|
||||
unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets given number of waves per
|
||||
/// execution unit requirement for given subtarget.
|
||||
unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
|
||||
bool Addressable) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of SGPRs explicitly
|
||||
/// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
|
||||
///
|
||||
/// \returns Value that meets number of waves per execution unit requirement
|
||||
/// if explicitly requested value cannot be converted to integer, violates
|
||||
/// subtarget's specifications, or does not meet number of waves per execution
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
|
||||
|
||||
/// \returns VGPR allocation granularity supported by the subtarget.
|
||||
unsigned getVGPRAllocGranule() const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
/// \returns Total number of VGPRs supported by the subtarget.
|
||||
unsigned getTotalNumVGPRs() const {
|
||||
return 256;
|
||||
}
|
||||
|
||||
/// \returns Number of reserved VGPRs for debugger use supported by the
|
||||
/// subtarget.
|
||||
unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const;
|
||||
|
||||
/// \returns Minimum number of SGPRs that meets given number of waves per
|
||||
/// execution unit requirement.
|
||||
unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||
/// execution unit requirement.
|
||||
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of VGPRs explicitly
|
||||
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
|
||||
///
|
||||
/// \returns Value that meets number of waves per execution unit requirement
|
||||
/// if explicitly requested value cannot be converted to integer, violates
|
||||
/// subtarget's specifications, or does not meet number of waves per execution
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
|
||||
|
||||
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
|
||||
unsigned EltSize) const;
|
||||
|
||||
|
|
Loading…
Reference in New Issue