forked from OSchip/llvm-project
AMDGPU/SI: Allow using SGPRs 96-101 on VI
Summary: There is no point in setting SGPRS=104, because VI allocates SGPRs in multiples of 16, so 104 -> 112. That enables us to use all 102 SGPRs for general purposes. Reviewers: tstellarAMD Subscribers: qcolombet, arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D27149 llvm-svn: 289260
This commit is contained in:
parent
1a4ab7e772
commit
91f22fbf4f
|
@ -489,6 +489,22 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
RI->getHWRegIndex(MFI->getScratchRSrcReg());
|
||||
}
|
||||
|
||||
// Check the addressable register limit before we add ExtraSGPRs.
|
||||
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||
!STM.hasSGPRInitBug()) {
|
||||
unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
|
||||
if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
|
||||
// This can happen due to a compiler bug or when using inline asm.
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
|
||||
"addressable scalar registers",
|
||||
MaxSGPR + 1, DS_Error,
|
||||
DK_ResourceLimit, MaxAddressableNumSGPRs);
|
||||
Ctx.diagnose(Diag);
|
||||
MaxSGPR = MaxAddressableNumSGPRs - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Account for extra SGPRs and VGPRs reserved for debugger use.
|
||||
MaxSGPR += ExtraSGPRs;
|
||||
MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
|
||||
|
@ -505,19 +521,22 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
ProgInfo.NumVGPRsForWavesPerEU = std::max(
|
||||
ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
|
||||
|
||||
unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
|
||||
if (ProgInfo.NumSGPR > MaxNumSGPRs) {
|
||||
// This can happen due to a compiler bug or when using inline asm to use the
|
||||
// registers which are usually reserved for vcc etc.
|
||||
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
|
||||
STM.hasSGPRInitBug()) {
|
||||
unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
|
||||
if (ProgInfo.NumSGPR > MaxNumSGPRs) {
|
||||
// This can happen due to a compiler bug or when using inline asm to use the
|
||||
// registers which are usually reserved for vcc etc.
|
||||
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
|
||||
"scalar registers",
|
||||
ProgInfo.NumSGPR, DS_Error,
|
||||
DK_ResourceLimit, MaxNumSGPRs);
|
||||
Ctx.diagnose(Diag);
|
||||
ProgInfo.NumSGPR = MaxNumSGPRs;
|
||||
ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
|
||||
"scalar registers",
|
||||
ProgInfo.NumSGPR, DS_Error,
|
||||
DK_ResourceLimit, MaxNumSGPRs);
|
||||
Ctx.diagnose(Diag);
|
||||
ProgInfo.NumSGPR = MaxNumSGPRs;
|
||||
ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
|
||||
}
|
||||
}
|
||||
|
||||
if (STM.hasSGPRInitBug()) {
|
||||
|
|
|
@ -144,7 +144,7 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
|||
unsigned VGPRExcessLimit =
|
||||
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
|
||||
unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
|
||||
unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves);
|
||||
unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
|
||||
unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
|
||||
|
||||
ReadyQueue &Q = Zone.Available;
|
||||
|
|
|
@ -1211,14 +1211,15 @@ unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
|
|||
}
|
||||
|
||||
unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
|
||||
unsigned WavesPerEU) const {
|
||||
unsigned WavesPerEU,
|
||||
bool Addressable) const {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
switch (WavesPerEU) {
|
||||
case 0: return 80;
|
||||
case 10: return 80;
|
||||
case 9: return 80;
|
||||
case 8: return 96;
|
||||
default: return getNumAddressableSGPRs(ST);
|
||||
default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
|
||||
}
|
||||
} else {
|
||||
switch (WavesPerEU) {
|
||||
|
@ -1243,7 +1244,8 @@ unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
|
|||
// Compute maximum number of SGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first);
|
||||
unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
|
||||
unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
|
||||
|
||||
// Check if maximum number of SGPRs was explicitly requested using
|
||||
// "amdgpu-num-sgpr" attribute.
|
||||
|
@ -1268,7 +1270,7 @@ unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
|
|||
|
||||
// Make sure requested value is compatible with values implied by
|
||||
// default/requested minimum/maximum number of waves per execution unit.
|
||||
if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first))
|
||||
if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
|
||||
Requested = 0;
|
||||
if (WavesPerEU.second &&
|
||||
Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
|
||||
|
@ -1281,7 +1283,7 @@ unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
|
|||
if (ST.hasSGPRInitBug())
|
||||
MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
|
||||
|
||||
return MaxNumSGPRs - getNumReservedSGPRs(ST);
|
||||
return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST), MaxNumAddressableSGPRs);
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
|
||||
|
|
|
@ -206,7 +206,8 @@ public:
|
|||
|
||||
/// \returns Maximum number of SGPRs that meets given number of waves per
|
||||
/// execution unit requirement for given subtarget.
|
||||
unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
|
||||
unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
|
||||
bool Addressable) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of SGPRs explicitly
|
||||
|
|
|
@ -76,7 +76,7 @@ define void @use_too_many_sgprs_iceland() #2 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; ERROR: error: scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji
|
||||
; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji
|
||||
define void @use_too_many_sgprs_fiji() #3 {
|
||||
call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
|
||||
call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
; Make sure this doesn't crash.
|
||||
; ALL-LABEL: {{^}}test:
|
||||
; ALL: s_mov_b32 s92, SCRATCH_RSRC_DWORD0
|
||||
; ALL: s_mov_b32 s91, s3
|
||||
; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0
|
||||
; ALL: s_mov_b32 s[[OFF:[0-9]+]], s3
|
||||
; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000
|
||||
|
||||
; Make sure we are handling hazards correctly.
|
||||
; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
|
||||
|
@ -15,11 +16,11 @@
|
|||
|
||||
; Make sure scratch wave offset register is correctly incremented and
|
||||
; then restored.
|
||||
; SMEM: s_mov_b32 m0, s91{{$}}
|
||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[92:95], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_mov_b32 m0, s[[OFF]]{{$}}
|
||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Spill
|
||||
|
||||
; SMEM: s_mov_b32 m0, s91{{$}}
|
||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[92:95], m0 ; 16-byte Folded Reload
|
||||
; SMEM: s_mov_b32 m0, s[[OFF]]{{$}}
|
||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Reload
|
||||
|
||||
; SMEM: s_dcache_wb
|
||||
; ALL: s_endpgm
|
||||
|
|
Loading…
Reference in New Issue