forked from OSchip/llvm-project
[AMDGPU] Do not reserve any VGPR for SGPR spills
After the split register allocation changes in eebe841a47
it is no
longer necessary to reserve a VGPR before RA. This can also create bugs
when IPRA is enabled since we cannot predict that a called function may
not reserve any register if it does not have any SGPR spills. If that
happens those functions may override reserved registers that are
normally callee saved. Added a test to show this.
Fixes: SWDEV-309900
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D115551
This commit is contained in:
parent
bbced74199
commit
8470bf2b08
|
@ -1320,16 +1320,14 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
|
|||
const BitVector AllSavedRegs = SavedRegs;
|
||||
SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
|
||||
|
||||
// If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
|
||||
const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
|
||||
|
||||
// We have to anticipate introducing CSR VGPR spills or spill of caller
|
||||
// save VGPR reserved for SGPR spills as we now always create stack entry
|
||||
// for it, if we don't have any stack objects already, since we require
|
||||
// an FP if there is a call and stack.
|
||||
// for it, if we don't have any stack objects already, since we require a FP
|
||||
// if there is a call and stack. We will allocate a VGPR for SGPR spills if
|
||||
// there are any SGPR spills. Whether they are CSR spills or otherwise.
|
||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
const bool WillHaveFP =
|
||||
FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
|
||||
FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
|
||||
|
||||
// FP will be specially managed like SP.
|
||||
if (WillHaveFP || hasFP(MF))
|
||||
|
|
|
@ -45,10 +45,6 @@ static cl::opt<bool> DisableLoopAlignment(
|
|||
cl::desc("Do not align and prefetch loops"),
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<bool> VGPRReserveforSGPRSpill(
|
||||
"amdgpu-reserve-vgpr-for-sgpr-spill",
|
||||
cl::desc("Allocates one VGPR for future SGPR Spill"), cl::init(true));
|
||||
|
||||
static cl::opt<bool> UseDivergentRegisterIndexing(
|
||||
"amdgpu-use-divergent-register-indexing",
|
||||
cl::Hidden,
|
||||
|
@ -11990,13 +11986,6 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
|
|||
}
|
||||
|
||||
TargetLoweringBase::finalizeLowering(MF);
|
||||
|
||||
// Allocate a VGPR for future SGPR Spill if
|
||||
// "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
|
||||
// FIXME: We won't need this hack if we split SGPR allocation from VGPR
|
||||
if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
|
||||
!Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction())
|
||||
Info->reserveVGPRforSGPRSpills(MF);
|
||||
}
|
||||
|
||||
void SITargetLowering::computeKnownBitsForFrameIndex(
|
||||
|
|
|
@ -239,50 +239,6 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
|
||||
static bool lowerShiftReservedVGPR(MachineFunction &MF,
|
||||
const GCNSubtarget &ST) {
|
||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
|
||||
// Early out if pre-reservation of a VGPR for SGPR spilling is disabled.
|
||||
if (!PreReservedVGPR)
|
||||
return false;
|
||||
|
||||
// If there are no free lower VGPRs available, default to using the
|
||||
// pre-reserved register instead.
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
Register LowestAvailableVGPR =
|
||||
TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF);
|
||||
if (!LowestAvailableVGPR)
|
||||
LowestAvailableVGPR = PreReservedVGPR;
|
||||
|
||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
// Create a stack object for a possible spill in the function prologue.
|
||||
// Note Non-CSR VGPR also need this as we may overwrite inactive lanes.
|
||||
Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4));
|
||||
|
||||
// Find saved info about the pre-reserved register.
|
||||
const auto *ReservedVGPRInfoItr =
|
||||
llvm::find_if(FuncInfo->getSGPRSpillVGPRs(),
|
||||
[PreReservedVGPR](const auto &SpillRegInfo) {
|
||||
return SpillRegInfo.VGPR == PreReservedVGPR;
|
||||
});
|
||||
|
||||
assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end());
|
||||
auto Index =
|
||||
std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr);
|
||||
|
||||
FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index);
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR");
|
||||
MBB.addLiveIn(LowestAvailableVGPR);
|
||||
MBB.sortUniqueLiveIns();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
|
@ -304,11 +260,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (!MFI.hasStackObjects() && !HasCSRs) {
|
||||
SaveBlocks.clear();
|
||||
RestoreBlocks.clear();
|
||||
if (FuncInfo->VGPRReservedForSGPRSpill) {
|
||||
// Free the reserved VGPR for later possible use by frame lowering.
|
||||
FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
|
||||
MRI.freezeReservedRegs(MF);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -326,8 +277,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
|
|||
// This operates under the assumption that only other SGPR spills are users
|
||||
// of the frame index.
|
||||
|
||||
lowerShiftReservedVGPR(MF, ST);
|
||||
|
||||
// To track the spill frame indices handled in this pass.
|
||||
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
|
||||
|
||||
|
@ -375,8 +324,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
|
|||
FuncInfo->removeDeadFrameIndices(MFI);
|
||||
|
||||
MadeChange = true;
|
||||
} else if (FuncInfo->VGPRReservedForSGPRSpill) {
|
||||
FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
|
||||
}
|
||||
|
||||
SaveBlocks.clear();
|
||||
|
|
|
@ -274,7 +274,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned WaveSize = ST.getWavefrontSize();
|
||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned Size = FrameInfo.getObjectSize(FI);
|
||||
unsigned NumLanes = Size / 4;
|
||||
|
@ -291,16 +290,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|||
Register LaneVGPR;
|
||||
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
|
||||
|
||||
// Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
|
||||
// when one of the two conditions is true:
|
||||
// 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
|
||||
// reserved.
|
||||
// 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
|
||||
// required.
|
||||
if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
|
||||
assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
|
||||
LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
|
||||
} else if (VGPRIndex == 0) {
|
||||
if (VGPRIndex == 0) {
|
||||
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
|
||||
if (LaneVGPR == AMDGPU::NoRegister) {
|
||||
// We have no VGPRs left for spilling SGPRs. Reset because we will not
|
||||
|
@ -308,6 +298,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|||
SGPRToVGPRSpills.erase(FI);
|
||||
NumVGPRSpillLanes -= I;
|
||||
|
||||
// FIXME: We can run out of free registers with split allocation if
|
||||
// IPRA is enabled and a called function already uses every VGPR.
|
||||
#if 0
|
||||
DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
|
||||
"VGPRs for SGPR spilling",
|
||||
|
@ -340,21 +332,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Reserve a VGPR for spilling of SGPRs
|
||||
bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
Register LaneVGPR = TRI->findUnusedRegister(
|
||||
MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
|
||||
if (LaneVGPR == Register())
|
||||
return false;
|
||||
SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
|
||||
FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
|
||||
/// Either AGPR is spilled to VGPR to vice versa.
|
||||
/// Returns true if a \p FI can be eliminated completely.
|
||||
|
@ -616,24 +593,6 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
|
|||
return false;
|
||||
}
|
||||
|
||||
// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
|
||||
bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
|
||||
MachineFunction &MF) {
|
||||
for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
|
||||
if (i->VGPR == ReservedVGPR) {
|
||||
SpillVGPRs.erase(i);
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
MBB.removeLiveIn(ReservedVGPR);
|
||||
MBB.sortUniqueLiveIns();
|
||||
}
|
||||
this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
|
||||
if (UsesAGPRs)
|
||||
return *UsesAGPRs;
|
||||
|
|
|
@ -502,7 +502,6 @@ public: // FIXME
|
|||
Register SGPRForBPSaveRestoreCopy;
|
||||
Optional<int> BasePointerSaveIndex;
|
||||
|
||||
Register VGPRReservedForSGPRSpill;
|
||||
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
|
||||
|
||||
public:
|
||||
|
@ -528,7 +527,6 @@ public:
|
|||
void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
|
||||
SpillVGPRs[Index].VGPR = NewVGPR;
|
||||
SpillVGPRs[Index].FI = newFI;
|
||||
VGPRReservedForSGPRSpill = NewVGPR;
|
||||
}
|
||||
|
||||
bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
|
||||
|
@ -556,7 +554,6 @@ public:
|
|||
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
|
||||
unsigned NumLane) const;
|
||||
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
|
||||
bool reserveVGPRforSGPRSpills(MachineFunction &MF);
|
||||
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
|
||||
void removeDeadFrameIndices(MachineFrameInfo &MFI);
|
||||
|
||||
|
|
|
@ -520,58 +520,58 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
|
|||
; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
|
||||
; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
|
||||
; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
|
||||
; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48
|
||||
; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:64
|
||||
; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:80
|
||||
; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:96
|
||||
; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:112
|
||||
; GCN-NEXT: global_load_dwordx4 v[44:47], v[0:1], off offset:128
|
||||
; GCN-NEXT: global_load_dwordx4 v[48:51], v[0:1], off offset:144
|
||||
; GCN-NEXT: global_load_dwordx4 v[52:55], v[0:1], off offset:160
|
||||
; GCN-NEXT: global_load_dwordx4 v[56:59], v[0:1], off offset:176
|
||||
; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:192
|
||||
; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:16
|
||||
; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32
|
||||
; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48
|
||||
; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:64
|
||||
; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:80
|
||||
; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:96
|
||||
; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:112
|
||||
; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:128
|
||||
; GCN-NEXT: global_load_dwordx4 v[44:47], v[0:1], off offset:144
|
||||
; GCN-NEXT: global_load_dwordx4 v[48:51], v[0:1], off offset:160
|
||||
; GCN-NEXT: global_load_dwordx4 v[52:55], v[0:1], off offset:176
|
||||
; GCN-NEXT: global_load_dwordx4 v[11:14], v[0:1], off offset:192
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:208
|
||||
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: global_load_dwordx4 v[11:14], v[0:1], off offset:208
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:224
|
||||
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: global_load_dwordx4 v[56:59], v[0:1], off offset:224
|
||||
; GCN-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:240
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 31, v2
|
||||
; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33
|
||||
|
@ -582,50 +582,50 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
|
|||
; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:260
|
||||
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:264
|
||||
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:268
|
||||
; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:272
|
||||
; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:276
|
||||
; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:280
|
||||
; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:284
|
||||
; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:288
|
||||
; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:292
|
||||
; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:296
|
||||
; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:300
|
||||
; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:304
|
||||
; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:308
|
||||
; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:312
|
||||
; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:316
|
||||
; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:320
|
||||
; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:324
|
||||
; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:328
|
||||
; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:332
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:336
|
||||
; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:340
|
||||
; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:344
|
||||
; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:348
|
||||
; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:352
|
||||
; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:356
|
||||
; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:360
|
||||
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:364
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:368
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:372
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:376
|
||||
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:380
|
||||
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:384
|
||||
; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:388
|
||||
; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:392
|
||||
; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:396
|
||||
; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:400
|
||||
; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:404
|
||||
; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:408
|
||||
; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:412
|
||||
; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:416
|
||||
; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:420
|
||||
; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:424
|
||||
; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:428
|
||||
; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:432
|
||||
; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:436
|
||||
; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:440
|
||||
; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:444
|
||||
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:272
|
||||
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:276
|
||||
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:280
|
||||
; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:284
|
||||
; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:288
|
||||
; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:292
|
||||
; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:296
|
||||
; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:300
|
||||
; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:304
|
||||
; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:308
|
||||
; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:312
|
||||
; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:316
|
||||
; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:320
|
||||
; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:324
|
||||
; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:328
|
||||
; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:332
|
||||
; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:336
|
||||
; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:340
|
||||
; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:344
|
||||
; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:348
|
||||
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:352
|
||||
; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:356
|
||||
; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:360
|
||||
; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:364
|
||||
; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:368
|
||||
; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:372
|
||||
; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:376
|
||||
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:380
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:384
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:388
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:392
|
||||
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:396
|
||||
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:400
|
||||
; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:404
|
||||
; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:408
|
||||
; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:412
|
||||
; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:416
|
||||
; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:420
|
||||
; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:424
|
||||
; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:428
|
||||
; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:432
|
||||
; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:436
|
||||
; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:440
|
||||
; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:444
|
||||
; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload
|
||||
|
@ -676,10 +676,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
|
|||
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:468
|
||||
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:472
|
||||
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:476
|
||||
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480
|
||||
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484
|
||||
; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488
|
||||
; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:492
|
||||
; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:480
|
||||
; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:484
|
||||
; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:488
|
||||
; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:492
|
||||
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:496
|
||||
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:500
|
||||
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:504
|
||||
|
|
|
@ -5,7 +5,7 @@ define void @child_function() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}reserve_vgpr_with_no_lower_vgpr_available:
|
||||
; GCN-LABEL: {{^}}spill_sgpr_with_no_lower_vgpr_available:
|
||||
; GCN: buffer_store_dword v255, off, s[0:3], s32
|
||||
; GCN: v_writelane_b32 v255, s33, 2
|
||||
; GCN: v_writelane_b32 v255, s30, 0
|
||||
|
@ -16,7 +16,7 @@ define void @child_function() #0 {
|
|||
; GCN: v_readlane_b32 s33, v255, 2
|
||||
; GCN: ; NumVgprs: 256
|
||||
|
||||
define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
|
||||
define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
|
@ -51,7 +51,7 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}reserve_lowest_available_vgpr:
|
||||
; GCN-LABEL: {{^}}spill_to_lowest_available_vgpr:
|
||||
; GCN: buffer_store_dword v254, off, s[0:3], s32
|
||||
; GCN: v_writelane_b32 v254, s33, 2
|
||||
; GCN: v_writelane_b32 v254, s30, 0
|
||||
|
@ -61,7 +61,7 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
|
|||
; GCN: v_readlane_b32 s31, v254, 1
|
||||
; GCN: v_readlane_b32 s33, v254, 2
|
||||
|
||||
define void @reserve_lowest_available_vgpr() #0 {
|
||||
define void @spill_to_lowest_available_vgpr() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
|
@ -96,14 +96,14 @@ define void @reserve_lowest_available_vgpr() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills:
|
||||
; GCN-LABEL: {{^}}spill_sgpr_with_sgpr_uses:
|
||||
; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32
|
||||
; GCN: ; def s4
|
||||
; GCN: v_writelane_b32 v254, s4, 2
|
||||
; GCN: v_readlane_b32 s4, v254, 2
|
||||
; GCN: ; use s4
|
||||
|
||||
define void @reserve_vgpr_with_sgpr_spills() #0 {
|
||||
define void @spill_sgpr_with_sgpr_uses() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
|
@ -147,12 +147,12 @@ ret:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}reserve_vgpr_with_tail_call
|
||||
; GCN-LABEL: {{^}}spill_sgpr_with_tail_call
|
||||
; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32
|
||||
; GCN-NOT: v_writelane
|
||||
; GCN: s_setpc_b64 s[4:5]
|
||||
|
||||
define void @reserve_vgpr_with_tail_call() #0 {
|
||||
define void @spill_sgpr_with_tail_call() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
|
@ -187,17 +187,29 @@ define void @reserve_vgpr_with_tail_call() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}reserve_vgpr_for_sgpr_spills_no_alloca:
|
||||
; GCN: v_writelane_b32 v5, s34, 0
|
||||
; GCN: v_writelane_b32 v5, s35, 1
|
||||
; GCN: v_writelane_b32 v5, s36, 2
|
||||
; GCN: v_writelane_b32 v5, s37, 3
|
||||
; GCN: v_readlane_b32 s37, v5, 3
|
||||
; GCN: v_readlane_b32 s36, v5, 2
|
||||
; GCN: v_readlane_b32 s35, v5, 1
|
||||
; GCN: v_readlane_b32 s34, v5, 0
|
||||
; Special case where all registers are explicitly clobbered in the function and
|
||||
; we have no VGPR to allocate for SGPR spills. We are forced to spill to memory.
|
||||
|
||||
define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr:
|
||||
; GCN: v_writelane_b32 [[A:v[0-9]+]], s34, 0
|
||||
; GCN: buffer_store_dword [[A]], off, s[0:3], s32
|
||||
; GCN: v_writelane_b32 [[B:v[0-9]+]], s35, 0
|
||||
; GCN: buffer_store_dword [[B]], off, s[0:3], s32
|
||||
; GCN: v_writelane_b32 [[C:v[0-9]+]], s36, 0
|
||||
; GCN: buffer_store_dword [[C]], off, s[0:3], s32
|
||||
; GCN: v_writelane_b32 [[D:v[0-9]+]], s37, 0
|
||||
; GCN: buffer_store_dword [[D]], off, s[0:3], s32
|
||||
; GCN: #ASMEND
|
||||
; GCN: buffer_load_dword [[E:v[0-9]+]]
|
||||
; GCN: v_readlane_b32 s37, [[E]], 0
|
||||
; GCN: buffer_load_dword [[F:v[0-9]+]]
|
||||
; GCN: v_readlane_b32 s36, [[F]], 0
|
||||
; GCN: buffer_load_dword [[G:v[0-9]+]]
|
||||
; GCN: v_readlane_b32 s35, [[G]], 0
|
||||
; GCN: buffer_load_dword [[H:v[0-9]+]]
|
||||
; GCN: v_readlane_b32 s34, [[H]], 0
|
||||
|
||||
define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||
call void asm sideeffect "",
|
||||
"~{v6},~{v7},~{v8},~{v9}
|
||||
|
@ -234,4 +246,96 @@ define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out
|
|||
ret void
|
||||
}
|
||||
|
||||
; If IPRA no-CSR optimization is enabled, we will not be able to allocate an
|
||||
; SGPR for VGPR spills in the parent function since this child function uses all
|
||||
; VGPRs.
|
||||
|
||||
define internal void @child_function_ipra() #0 {
|
||||
call void asm sideeffect "",
|
||||
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
||||
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
||||
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
||||
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
||||
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
||||
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
||||
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
||||
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
||||
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
||||
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
||||
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
||||
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
||||
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
||||
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
||||
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
||||
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
||||
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
||||
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
||||
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
||||
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
||||
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
||||
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
||||
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
||||
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
||||
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
||||
,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra:
|
||||
; GCN: v_writelane_b32 v0, s30, 0
|
||||
; GCN: v_writelane_b32 v0, s31, 1
|
||||
; GCN: buffer_store_dword v0, off
|
||||
; GCN: swappc
|
||||
; GCN: buffer_load_dword v0, off
|
||||
; GCN: v_readlane_b32 s30, v0, 0
|
||||
; GCN: v_readlane_b32 s31, v0, 1
|
||||
define void @spill_sgpr_no_free_vgpr_ipra() #0 {
|
||||
call void @child_function_ipra()
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @child_function_ipra_tail_call() #0 {
|
||||
call void asm sideeffect "",
|
||||
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
||||
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
||||
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
||||
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
||||
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
||||
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
||||
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
||||
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
||||
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
||||
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
||||
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
||||
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
||||
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
||||
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
||||
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
||||
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
||||
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
||||
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
||||
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
||||
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
||||
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
||||
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
||||
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
||||
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
||||
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
||||
,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra_tail_call:
|
||||
; GCN-NOT: v_writelane_b32
|
||||
; GCN-NOT: buffer_store_dword
|
||||
; GCN-NOT: swappc
|
||||
; GCN-NOT: buffer_load_dword v0, off
|
||||
; GCN-NOT: v_readlane_b32
|
||||
; GCN: setpc
|
||||
define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
|
||||
tail call void @child_function_ipra_tail_call()
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
|
Loading…
Reference in New Issue