2016-06-24 04:00:34 +08:00
|
|
|
//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
2014-09-24 09:33:17 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2014-05-02 23:41:42 +08:00
|
|
|
#include "SIInstrInfo.h"
|
2014-08-22 04:40:54 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2016-06-27 18:26:36 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2013-11-28 05:23:35 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2014-05-02 23:41:42 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2013-11-28 05:23:35 +08:00
|
|
|
|
|
|
|
#define MAX_LANES 64
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
2013-04-02 05:47:53 +08:00
|
|
|
: AMDGPUMachineFunction(MF),
|
2014-09-24 09:33:17 +08:00
|
|
|
TIDReg(AMDGPU::NoRegister),
|
2017-07-19 00:44:56 +08:00
|
|
|
ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
|
|
|
|
ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
|
|
|
|
FrameOffsetReg(AMDGPU::FP_REG),
|
|
|
|
StackPtrOffsetReg(AMDGPU::SP_REG),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
|
|
|
|
DispatchPtrUserSGPR(AMDGPU::NoRegister),
|
|
|
|
QueuePtrUserSGPR(AMDGPU::NoRegister),
|
|
|
|
KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
|
|
|
|
DispatchIDUserSGPR(AMDGPU::NoRegister),
|
|
|
|
FlatScratchInitUserSGPR(AMDGPU::NoRegister),
|
|
|
|
PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
|
|
|
|
GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
|
|
|
|
GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
|
|
|
|
GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
|
|
|
|
WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
|
|
|
|
WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
|
|
|
|
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
|
|
|
|
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
|
|
|
|
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
|
2017-07-18 06:35:50 +08:00
|
|
|
WorkItemIDXVGPR(AMDGPU::NoRegister),
|
|
|
|
WorkItemIDYVGPR(AMDGPU::NoRegister),
|
|
|
|
WorkItemIDZVGPR(AMDGPU::NoRegister),
|
2013-11-28 05:23:35 +08:00
|
|
|
PSInputAddr(0),
|
2017-04-12 06:29:24 +08:00
|
|
|
PSInputEnable(0),
|
2016-01-14 01:23:09 +08:00
|
|
|
ReturnsVoid(true),
|
2016-09-07 04:22:28 +08:00
|
|
|
FlatWorkGroupSizes(0, 0),
|
|
|
|
WavesPerEU(0, 0),
|
2016-06-27 18:26:43 +08:00
|
|
|
DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
|
|
|
|
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
|
2016-01-13 19:45:36 +08:00
|
|
|
LDSWaveSpillSize(0),
|
2014-09-24 09:33:17 +08:00
|
|
|
NumUserSGPRs(0),
|
2015-12-01 05:16:03 +08:00
|
|
|
NumSystemSGPRs(0),
|
2015-11-26 04:55:12 +08:00
|
|
|
HasSpilledSGPRs(false),
|
|
|
|
HasSpilledVGPRs(false),
|
2016-02-12 14:31:30 +08:00
|
|
|
HasNonSpillStackObjects(false),
|
2016-07-14 01:35:15 +08:00
|
|
|
NumSpilledSGPRs(0),
|
|
|
|
NumSpilledVGPRs(0),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentBuffer(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
DispatchPtr(false),
|
|
|
|
QueuePtr(false),
|
2015-12-01 05:16:03 +08:00
|
|
|
KernargSegmentPtr(false),
|
2016-07-23 01:01:30 +08:00
|
|
|
DispatchID(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
FlatScratchInit(false),
|
|
|
|
GridWorkgroupCountX(false),
|
|
|
|
GridWorkgroupCountY(false),
|
|
|
|
GridWorkgroupCountZ(false),
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkGroupIDX(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkGroupIDY(false),
|
|
|
|
WorkGroupIDZ(false),
|
|
|
|
WorkGroupInfo(false),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentWaveByteOffset(false),
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkItemIDX(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkItemIDY(false),
|
2017-01-25 09:25:13 +08:00
|
|
|
WorkItemIDZ(false),
|
2017-06-26 11:01:31 +08:00
|
|
|
ImplicitBufferPtr(false) {
|
2016-06-24 14:30:11 +08:00
|
|
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
2015-11-26 04:55:12 +08:00
|
|
|
const Function *F = MF.getFunction();
|
2017-04-12 06:29:28 +08:00
|
|
|
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
|
|
|
|
WavesPerEU = ST.getWavesPerEU(*F);
|
2015-11-26 04:55:12 +08:00
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
if (!isEntryFunction()) {
|
|
|
|
// Non-entry functions have no special inputs for now, other registers
|
|
|
|
// required for scratch access.
|
|
|
|
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
|
|
|
ScratchWaveOffsetReg = AMDGPU::SGPR4;
|
|
|
|
FrameOffsetReg = AMDGPU::SGPR5;
|
2017-06-27 01:53:59 +08:00
|
|
|
StackPtrOffsetReg = AMDGPU::SGPR32;
|
2017-07-19 00:44:56 +08:00
|
|
|
|
|
|
|
// FIXME: Not really a system SGPR.
|
|
|
|
PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
|
2017-07-28 23:52:08 +08:00
|
|
|
if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
|
|
|
|
ImplicitArgPtr = true;
|
|
|
|
} else {
|
|
|
|
if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
|
|
|
|
KernargSegmentPtr = true;
|
2017-05-18 05:56:25 +08:00
|
|
|
}
|
2016-01-13 19:45:36 +08:00
|
|
|
|
2017-04-12 06:29:28 +08:00
|
|
|
CallingConv::ID CC = F->getCallingConv();
|
2017-05-18 05:56:25 +08:00
|
|
|
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
|
2017-07-28 23:52:08 +08:00
|
|
|
if (!F->arg_empty())
|
|
|
|
KernargSegmentPtr = true;
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkGroupIDX = true;
|
|
|
|
WorkItemIDX = true;
|
2017-05-18 05:56:25 +08:00
|
|
|
} else if (CC == CallingConv::AMDGPU_PS) {
|
|
|
|
PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
|
2016-04-15 00:27:03 +08:00
|
|
|
}
|
2015-11-26 04:55:12 +08:00
|
|
|
|
2017-04-12 06:29:28 +08:00
|
|
|
if (ST.debuggerEmitPrologue()) {
|
|
|
|
// Enable everything.
|
2017-07-18 06:35:50 +08:00
|
|
|
WorkGroupIDX = true;
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkGroupIDY = true;
|
|
|
|
WorkGroupIDZ = true;
|
2017-07-18 06:35:50 +08:00
|
|
|
WorkItemIDX = true;
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkItemIDY = true;
|
|
|
|
WorkItemIDZ = true;
|
2017-04-12 06:29:28 +08:00
|
|
|
} else {
|
2017-07-18 06:35:50 +08:00
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-x"))
|
|
|
|
WorkGroupIDX = true;
|
|
|
|
|
2017-04-12 06:29:28 +08:00
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
|
|
|
|
WorkGroupIDY = true;
|
|
|
|
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-z"))
|
|
|
|
WorkGroupIDZ = true;
|
|
|
|
|
2017-07-18 06:35:50 +08:00
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-x"))
|
|
|
|
WorkItemIDX = true;
|
|
|
|
|
2017-04-12 06:29:28 +08:00
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-y"))
|
|
|
|
WorkItemIDY = true;
|
|
|
|
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-z"))
|
|
|
|
WorkItemIDZ = true;
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-04-12 06:29:28 +08:00
|
|
|
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
2016-04-07 03:40:20 +08:00
|
|
|
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
|
2017-07-19 00:44:56 +08:00
|
|
|
bool HasStackObjects = FrameInfo.hasStackObjects();
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-07-18 06:35:50 +08:00
|
|
|
if (isEntryFunction()) {
|
|
|
|
// X, XY, and XYZ are the only supported combinations, so make sure Y is
|
|
|
|
// enabled if Z is.
|
|
|
|
if (WorkItemIDZ)
|
|
|
|
WorkItemIDY = true;
|
|
|
|
|
|
|
|
if (HasStackObjects || MaySpill) {
|
|
|
|
PrivateSegmentWaveByteOffset = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-07-18 06:35:50 +08:00
|
|
|
// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
|
|
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
|
|
|
|
(CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
|
|
|
|
PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
|
|
|
|
}
|
2017-05-05 06:25:20 +08:00
|
|
|
}
|
|
|
|
|
2017-07-19 00:44:58 +08:00
|
|
|
bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
|
|
|
|
if (IsCOV2) {
|
2015-12-01 05:16:03 +08:00
|
|
|
if (HasStackObjects || MaySpill)
|
|
|
|
PrivateSegmentBuffer = true;
|
|
|
|
|
|
|
|
if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
|
|
|
|
DispatchPtr = true;
|
2016-04-26 03:27:18 +08:00
|
|
|
|
|
|
|
if (F->hasFnAttribute("amdgpu-queue-ptr"))
|
|
|
|
QueuePtr = true;
|
2016-07-23 01:01:30 +08:00
|
|
|
|
|
|
|
if (F->hasFnAttribute("amdgpu-dispatch-id"))
|
|
|
|
DispatchID = true;
|
2017-01-25 09:25:13 +08:00
|
|
|
} else if (ST.isMesaGfxShader(MF)) {
|
|
|
|
if (HasStackObjects || MaySpill)
|
2017-06-26 11:01:31 +08:00
|
|
|
ImplicitBufferPtr = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2017-07-14 08:11:13 +08:00
|
|
|
if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
|
|
|
|
KernargSegmentPtr = true;
|
|
|
|
|
2017-07-19 00:44:58 +08:00
|
|
|
if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
|
|
|
|
// TODO: This could be refined a lot. The attribute is a poor way of
|
|
|
|
// detecting calls that may require it before argument lowering.
|
|
|
|
if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
|
|
|
|
FlatScratchInit = true;
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
|
|
|
|
const SIRegisterInfo &TRI) {
|
|
|
|
PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
|
|
|
|
NumUserSGPRs += 4;
|
|
|
|
return PrivateSegmentBufferUserSGPR;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
|
|
|
|
DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
|
|
NumUserSGPRs += 2;
|
|
|
|
return DispatchPtrUserSGPR;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
|
|
|
|
QueuePtrUserSGPR = TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
|
|
NumUserSGPRs += 2;
|
|
|
|
return QueuePtrUserSGPR;
|
2015-11-26 04:55:12 +08:00
|
|
|
}
|
2014-08-22 04:40:54 +08:00
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
|
|
|
|
KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
|
|
NumUserSGPRs += 2;
|
|
|
|
return KernargSegmentPtrUserSGPR;
|
2015-12-01 05:15:53 +08:00
|
|
|
}
|
|
|
|
|
2016-07-23 01:01:30 +08:00
|
|
|
unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
|
|
|
|
DispatchIDUserSGPR = TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
|
|
NumUserSGPRs += 2;
|
|
|
|
return DispatchIDUserSGPR;
|
|
|
|
}
|
|
|
|
|
2016-02-12 14:31:30 +08:00
|
|
|
unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
|
|
|
|
FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
|
|
NumUserSGPRs += 2;
|
|
|
|
return FlatScratchInitUserSGPR;
|
|
|
|
}
|
|
|
|
|
2017-06-26 11:01:31 +08:00
|
|
|
unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
|
|
|
|
ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
|
2017-01-25 09:25:13 +08:00
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
|
|
NumUserSGPRs += 2;
|
2017-06-26 11:01:31 +08:00
|
|
|
return ImplicitBufferPtrUserSGPR;
|
2017-01-25 09:25:13 +08:00
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
|
|
|
|
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|
|
|
int FI) {
|
|
|
|
std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
|
2016-01-04 23:50:01 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
// This has already been allocated.
|
|
|
|
if (!SpillLanes.empty())
|
|
|
|
return true;
|
2016-01-04 23:50:01 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
unsigned WaveSize = ST.getWavefrontSize();
|
|
|
|
|
|
|
|
unsigned Size = FrameInfo.getObjectSize(FI);
|
|
|
|
assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
|
|
|
|
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
|
|
|
|
|
|
|
|
int NumLanes = Size / 4;
|
|
|
|
|
|
|
|
// Make sure to handle the case where a wide SGPR spill may span between two
|
|
|
|
// VGPRs.
|
|
|
|
for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
|
|
|
|
unsigned LaneVGPR;
|
|
|
|
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
|
|
|
|
|
|
|
|
if (VGPRIndex == 0) {
|
|
|
|
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
|
|
|
|
if (LaneVGPR == AMDGPU::NoRegister) {
|
|
|
|
// We have no VGPRs left for spilling SGPRs. Reset because we won't
|
|
|
|
// partially spill the SGPR to VGPRs.
|
|
|
|
SGPRToVGPRSpills.erase(FI);
|
|
|
|
NumVGPRSpillLanes -= I;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
SpillVGPRs.push_back(LaneVGPR);
|
|
|
|
|
|
|
|
// Add this register as live-in to all blocks to avoid machine verifer
|
|
|
|
// complaining about use of an undefined physical register.
|
|
|
|
for (MachineBasicBlock &BB : MF)
|
|
|
|
BB.addLiveIn(LaneVGPR);
|
|
|
|
} else {
|
|
|
|
LaneVGPR = SpillVGPRs.back();
|
2014-08-22 04:40:54 +08:00
|
|
|
}
|
2017-02-22 03:12:08 +08:00
|
|
|
|
|
|
|
SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
|
2014-08-22 04:40:54 +08:00
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
|
|
|
|
for (auto &R : SGPRToVGPRSpills)
|
|
|
|
MFI.RemoveStackObject(R.first);
|
2013-11-28 05:23:35 +08:00
|
|
|
}
|