[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440
This commit is contained in:
Konstantin Zhuravlyov 2017-02-08 13:02:33 +00:00
parent fd0dda765e
commit e03b1d7b6a
7 changed files with 302 additions and 318 deletions

View File

@ -473,26 +473,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ExtraSGPRs = 6;
}
// Record first reserved register and reserved register count fields, and
// update max register counts if "amdgpu-debugger-reserve-regs" attribute was
// requested.
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
// attribute was requested.
if (STM.debuggerEmitPrologue()) {
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
RI->getHWRegIndex(MFI->getScratchRSrcReg());
}
unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction()->getContext();
@ -507,23 +493,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Account for extra SGPRs and VGPRs reserved for debugger use.
MaxSGPR += ExtraSGPRs;
MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
MaxVGPR += ExtraVGPRs;
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
ProgInfo.NumVGPR = MaxVGPR + 1;
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPRsForWavesPerEU = std::max(
ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs();
if (ProgInfo.NumSGPR > MaxNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm to use the
// registers which are usually reserved for vcc etc.
@ -560,13 +546,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// SGPRBlocks is actual number of SGPR blocks minus 1.
ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
RI->getSGPRAllocGranule());
ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
STM.getSGPRAllocGranule());
ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPRAllocGranule() - 1;
// VGPRBlocks is actual number of VGPR blocks minus 1.
ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
RI->getVGPRAllocGranule());
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
STM.getVGPRAllocGranule());
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPRAllocGranule() - 1;
// Record first reserved register and reserved register count fields, and
// update max register counts if "amdgpu-debugger-reserve-regs" attribute was
// requested.
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
// attribute was requested.
if (STM.debuggerEmitPrologue()) {
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
RI->getHWRegIndex(MFI->getScratchRSrcReg());
}
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/Target/TargetFrameLowering.h"
@ -322,12 +323,179 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
return 1;
}
unsigned SISubtarget::getMaxNumSGPRs() const {
if (hasSGPRInitBug())
return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
if (getGeneration() >= VOLCANIC_ISLANDS)
return 102;
return 104;
unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 0;
case 8: return 81;
default: return 97;
}
} else {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 49;
case 8: return 57;
case 7: return 65;
case 6: return 73;
case 5: return 81;
default: return 97;
}
}
}
unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
bool Addressable) const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WavesPerEU) {
case 0: return 80;
case 10: return 80;
case 9: return 80;
case 8: return 96;
default: return Addressable ? getAddressableNumSGPRs() : 112;
}
} else {
switch (WavesPerEU) {
case 0: return 48;
case 10: return 48;
case 9: return 56;
case 8: return 64;
case 7: return 72;
case 6: return 80;
case 5: return 96;
default: return getAddressableNumSGPRs();
}
}
}
unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasFlatScratchInit()) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
return 4; // FLAT_SCRATCH, VCC (in that order).
}
if (isXNACKEnabled())
return 4; // XNACK, VCC (in that order).
return 2; // VCC.
}
unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
// Compute maximum number of SGPRs function can use using default/requested
// minimum number of waves per execution unit.
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
// Check if maximum number of SGPRs was explicitly requested using
// "amdgpu-num-sgpr" attribute.
if (F.hasFnAttribute("amdgpu-num-sgpr")) {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-sgpr", MaxNumSGPRs);
// Make sure requested value does not violate subtarget's specifications.
if (Requested && (Requested <= getReservedNumSGPRs(MF)))
Requested = 0;
// If more SGPRs are required to support the input user/system SGPRs,
// increase to accommodate them.
//
// FIXME: This really ends up using the requested number of SGPRs + number
// of reserved special registers in total. Theoretically you could re-use
// the last input registers for these special registers, but this would
// require a lot of complexity to deal with the weird aliasing.
unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
if (Requested && Requested < InputNumSGPRs)
Requested = InputNumSGPRs;
// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
Requested = 0;
if (WavesPerEU.second &&
Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
Requested = 0;
if (Requested)
MaxNumSGPRs = Requested;
}
if (hasSGPRInitBug())
MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
MaxAddressableNumSGPRs);
}
unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 25;
case 8: return 29;
case 7: return 33;
case 6: return 37;
case 5: return 41;
case 4: return 49;
case 3: return 65;
case 2: return 85;
default: return 129;
}
}
unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
switch (WavesPerEU) {
case 0: return 24;
case 10: return 24;
case 9: return 28;
case 8: return 32;
case 7: return 36;
case 6: return 40;
case 5: return 48;
case 4: return 64;
case 3: return 84;
case 2: return 128;
default: return getTotalNumVGPRs();
}
}
unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
// Compute maximum number of VGPRs function can use using default/requested
// minimum number of waves per execution unit.
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
if (F.hasFnAttribute("amdgpu-num-vgpr")) {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-vgpr", MaxNumVGPRs);
// Make sure requested value does not violate subtarget's specifications.
if (Requested && Requested <= getReservedNumVGPRs(MF))
Requested = 0;
// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
Requested = 0;
if (WavesPerEU.second &&
Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
Requested = 0;
if (Requested)
MaxNumVGPRs = Requested;
}
return MaxNumVGPRs - getReservedNumVGPRs(MF);
}

View File

@ -642,7 +642,85 @@ public:
return true;
}
unsigned getMaxNumSGPRs() const;
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return 8;
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 800;
return 512;
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
if (hasSGPRInitBug())
return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
if (getGeneration() >= VOLCANIC_ISLANDS)
return 102;
return 104;
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const;
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const;
/// \returns Reserved number of SGPRs for given function \p MF.
unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
/// \returns Maximum number of SGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of SGPRs explicitly
/// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
return 4;
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
return 256;
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
return getTotalNumVGPRs();
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
/// \returns Reserved number of VGPRs for given function \p MF.
unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
return debuggerReserveRegs() ? 4 : 0;
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
};
} // end namespace llvm

View File

@ -145,8 +145,8 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned VGPRExcessLimit =
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true);
unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves);
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {

View File

@ -21,16 +21,16 @@
using namespace llvm;
static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF,
const SIRegisterInfo *TRI) {
static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
const MachineFunction &MF) {
return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
TRI->getMaxNumSGPRs(MF) / 4);
ST.getMaxNumSGPRs(MF) / 4);
}
static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF,
const SIRegisterInfo *TRI) {
static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
const MachineFunction &MF) {
return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
TRI->getMaxNumSGPRs(MF));
ST.getMaxNumSGPRs(MF));
}
void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
@ -111,7 +111,7 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(MF, TRI);
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
// Skip the last 2 elements because the last one is reserved for VCC, and
@ -146,7 +146,7 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(MF, TRI);
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
return ScratchWaveOffsetReg;

View File

@ -97,14 +97,18 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
unsigned RegCount = getMaxNumSGPRs(MF);
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned RegCount = ST.getMaxNumSGPRs(MF);
unsigned Reg;
// Try to place it in a hole after PrivateSegmentbufferReg.
@ -139,14 +143,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
}
unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
@ -1156,210 +1162,6 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
return AMDGPU::NoRegister;
}
unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 800;
return 512;
}
unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 102;
return 104;
}
unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
const SIMachineFunctionInfo &MFI) const {
if (MFI.hasFlatScratchInit()) {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
return 4; // FLAT_SCRATCH, VCC (in that order)
}
if (ST.isXNACKEnabled())
return 4; // XNACK, VCC (in that order)
return 2; // VCC.
}
unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
unsigned WavesPerEU) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 0;
case 8: return 81;
default: return 97;
}
} else {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 49;
case 8: return 57;
case 7: return 65;
case 6: return 73;
case 5: return 81;
default: return 97;
}
}
}
unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
unsigned WavesPerEU,
bool Addressable) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WavesPerEU) {
case 0: return 80;
case 10: return 80;
case 9: return 80;
case 8: return 96;
default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
}
} else {
switch (WavesPerEU) {
case 0: return 48;
case 10: return 48;
case 9: return 56;
case 8: return 64;
case 7: return 72;
case 6: return 80;
case 5: return 96;
default: return getNumAddressableSGPRs(ST);
}
}
}
unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
// Compute maximum number of SGPRs function can use using default/requested
// minimum number of waves per execution unit.
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
// Check if maximum number of SGPRs was explicitly requested using
// "amdgpu-num-sgpr" attribute.
if (F.hasFnAttribute("amdgpu-num-sgpr")) {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-sgpr", MaxNumSGPRs);
// Make sure requested value does not violate subtarget's specifications.
if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
Requested = 0;
// If more SGPRs are required to support the input user/system SGPRs,
// increase to accommodate them.
//
// FIXME: This really ends up using the requested number of SGPRs + number
// of reserved special registers in total. Theoretically you could re-use
// the last input registers for these special registers, but this would
// require a lot of complexity to deal with the weird aliasing.
unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
if (Requested && Requested < NumInputSGPRs)
Requested = NumInputSGPRs;
// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
Requested = 0;
if (WavesPerEU.second &&
Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
Requested = 0;
if (Requested)
MaxNumSGPRs = Requested;
}
if (ST.hasSGPRInitBug())
MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
MaxNumAddressableSGPRs);
}
unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
const SISubtarget &ST) const {
if (ST.debuggerReserveRegs())
return 4;
return 0;
}
unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 25;
case 8: return 29;
case 7: return 33;
case 6: return 37;
case 5: return 41;
case 4: return 49;
case 3: return 65;
case 2: return 85;
default: return 129;
}
}
unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
switch (WavesPerEU) {
case 0: return 24;
case 10: return 24;
case 9: return 28;
case 8: return 32;
case 7: return 36;
case 6: return 40;
case 5: return 48;
case 4: return 64;
case 3: return 84;
case 2: return 128;
default: return getTotalNumVGPRs();
}
}
unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
// Compute maximum number of VGPRs function can use using default/requested
// minimum number of waves per execution unit.
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
if (F.hasFnAttribute("amdgpu-num-vgpr")) {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-vgpr", MaxNumVGPRs);
// Make sure requested value does not violate subtarget's specifications.
if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
Requested = 0;
// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
Requested = 0;
if (WavesPerEU.second &&
Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
Requested = 0;
if (Requested)
MaxNumVGPRs = Requested;
}
return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
}
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
if (EltSize == 4) {

View File

@ -195,72 +195,6 @@ public:
return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
}
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return 8;
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs(const SISubtarget &ST) const;
/// \returns Number of addressable SGPRs supported by the subtarget.
unsigned getNumAddressableSGPRs(const SISubtarget &ST) const;
/// \returns Number of reserved SGPRs supported by the subtarget.
unsigned getNumReservedSGPRs(const SISubtarget &ST,
const SIMachineFunctionInfo &MFI) const;
/// \returns Minimum number of SGPRs that meets given number of waves per
/// execution unit requirement for given subtarget.
unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
/// \returns Maximum number of SGPRs that meets given number of waves per
/// execution unit requirement for given subtarget.
unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
bool Addressable) const;
/// \returns Maximum number of SGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of SGPRs explicitly
/// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
return 4;
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
return 256;
}
/// \returns Number of reserved VGPRs for debugger use supported by the
/// subtarget.
unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const;
/// \returns Minimum number of SGPRs that meets given number of waves per
/// execution unit requirement.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const;