forked from OSchip/llvm-project
591 lines
16 KiB
C++
591 lines
16 KiB
C++
//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
|
|
#include "AMDGPUMachineFunction.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include <array>
|
|
#include <cassert>
|
|
#include <map>
|
|
#include <utility>
|
|
|
|
namespace llvm {
|
|
|
|
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
|
|
public:
|
|
explicit AMDGPUImagePseudoSourceValue() :
|
|
PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
|
|
|
|
bool isConstant(const MachineFrameInfo *) const override {
|
|
// This should probably be true for most images, but we will start by being
|
|
// conservative.
|
|
return false;
|
|
}
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
// FIXME: If we ever change image intrinsics to accept fat pointers, then
|
|
// this could be true for some cases.
|
|
return false;
|
|
}
|
|
|
|
bool mayAlias(const MachineFrameInfo*) const override {
|
|
// FIXME: If we ever change image intrinsics to accept fat pointers, then
|
|
// this could be true for some cases.
|
|
return false;
|
|
}
|
|
};
|
|
|
|
class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
|
|
public:
|
|
explicit AMDGPUBufferPseudoSourceValue() :
|
|
PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
|
|
|
|
bool isConstant(const MachineFrameInfo *) const override {
|
|
// This should probably be true for most images, but we will start by being
|
|
// conservative.
|
|
return false;
|
|
}
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
// FIXME: If we ever change image intrinsics to accept fat pointers, then
|
|
// this could be true for some cases.
|
|
return false;
|
|
}
|
|
|
|
bool mayAlias(const MachineFrameInfo*) const override {
|
|
// FIXME: If we ever change image intrinsics to accept fat pointers, then
|
|
// this could be true for some cases.
|
|
return false;
|
|
}
|
|
};
|
|
|
|
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
|
|
/// tells the hardware which interpolation parameters to load.
|
|
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
|
// FIXME: This should be removed and getPreloadedValue moved here.
|
|
friend class SIRegisterInfo;
|
|
|
|
unsigned TIDReg;
|
|
|
|
// Registers that may be reserved for spilling purposes. These may be the same
|
|
// as the input registers.
|
|
unsigned ScratchRSrcReg;
|
|
unsigned ScratchWaveOffsetReg;
|
|
|
|
// This is the current function's incremented size from the kernel's scratch
|
|
// wave offset register. For an entry function, this is exactly the same as
|
|
// the ScratchWaveOffsetReg.
|
|
unsigned FrameOffsetReg;
|
|
|
|
// Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
|
|
unsigned StackPtrOffsetReg;
|
|
|
|
// Input registers for non-HSA ABI
|
|
unsigned ImplicitBufferPtrUserSGPR;
|
|
|
|
// Input registers setup for the HSA ABI.
|
|
// User SGPRs in allocation order.
|
|
unsigned PrivateSegmentBufferUserSGPR;
|
|
unsigned DispatchPtrUserSGPR;
|
|
unsigned QueuePtrUserSGPR;
|
|
unsigned KernargSegmentPtrUserSGPR;
|
|
unsigned DispatchIDUserSGPR;
|
|
unsigned FlatScratchInitUserSGPR;
|
|
unsigned PrivateSegmentSizeUserSGPR;
|
|
unsigned GridWorkGroupCountXUserSGPR;
|
|
unsigned GridWorkGroupCountYUserSGPR;
|
|
unsigned GridWorkGroupCountZUserSGPR;
|
|
|
|
// System SGPRs in allocation order.
|
|
unsigned WorkGroupIDXSystemSGPR;
|
|
unsigned WorkGroupIDYSystemSGPR;
|
|
unsigned WorkGroupIDZSystemSGPR;
|
|
unsigned WorkGroupInfoSystemSGPR;
|
|
unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
|
|
|
|
// VGPR inputs. These are always v0, v1 and v2 for entry functions.
|
|
unsigned WorkItemIDXVGPR;
|
|
unsigned WorkItemIDYVGPR;
|
|
unsigned WorkItemIDZVGPR;
|
|
|
|
// Graphics info.
|
|
unsigned PSInputAddr;
|
|
unsigned PSInputEnable;
|
|
|
|
bool ReturnsVoid;
|
|
|
|
// A pair of default/requested minimum/maximum flat work group sizes.
|
|
// Minimum - first, maximum - second.
|
|
std::pair<unsigned, unsigned> FlatWorkGroupSizes;
|
|
|
|
// A pair of default/requested minimum/maximum number of waves per execution
|
|
// unit. Minimum - first, maximum - second.
|
|
std::pair<unsigned, unsigned> WavesPerEU;
|
|
|
|
// Stack object indices for work group IDs.
|
|
std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
|
|
// Stack object indices for work item IDs.
|
|
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
|
|
|
|
AMDGPUBufferPseudoSourceValue BufferPSV;
|
|
AMDGPUImagePseudoSourceValue ImagePSV;
|
|
|
|
private:
|
|
unsigned LDSWaveSpillSize;
|
|
unsigned ScratchOffsetReg;
|
|
unsigned NumUserSGPRs;
|
|
unsigned NumSystemSGPRs;
|
|
|
|
bool HasSpilledSGPRs;
|
|
bool HasSpilledVGPRs;
|
|
bool HasNonSpillStackObjects;
|
|
|
|
unsigned NumSpilledSGPRs;
|
|
unsigned NumSpilledVGPRs;
|
|
|
|
// Feature bits required for inputs passed in user SGPRs.
|
|
bool PrivateSegmentBuffer : 1;
|
|
bool DispatchPtr : 1;
|
|
bool QueuePtr : 1;
|
|
bool KernargSegmentPtr : 1;
|
|
bool DispatchID : 1;
|
|
bool FlatScratchInit : 1;
|
|
bool GridWorkgroupCountX : 1;
|
|
bool GridWorkgroupCountY : 1;
|
|
bool GridWorkgroupCountZ : 1;
|
|
|
|
// Feature bits required for inputs passed in system SGPRs.
|
|
bool WorkGroupIDX : 1; // Always initialized.
|
|
bool WorkGroupIDY : 1;
|
|
bool WorkGroupIDZ : 1;
|
|
bool WorkGroupInfo : 1;
|
|
bool PrivateSegmentWaveByteOffset : 1;
|
|
|
|
bool WorkItemIDX : 1; // Always initialized.
|
|
bool WorkItemIDY : 1;
|
|
bool WorkItemIDZ : 1;
|
|
|
|
// Private memory buffer
|
|
// Compute directly in sgpr[0:1]
|
|
// Other shaders indirect 64-bits at sgpr[0:1]
|
|
bool ImplicitBufferPtr : 1;
|
|
|
|
// Pointer to where the ABI inserts special kernel arguments separate from the
|
|
// user arguments. This is an offset from the KernargSegmentPtr.
|
|
bool ImplicitArgPtr : 1;
|
|
|
|
MCPhysReg getNextUserSGPR() const {
|
|
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
|
|
return AMDGPU::SGPR0 + NumUserSGPRs;
|
|
}
|
|
|
|
MCPhysReg getNextSystemSGPR() const {
|
|
return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
|
|
}
|
|
|
|
public:
|
|
struct SpilledReg {
|
|
unsigned VGPR = AMDGPU::NoRegister;
|
|
int Lane = -1;
|
|
|
|
SpilledReg() = default;
|
|
SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
|
|
|
|
bool hasLane() { return Lane != -1;}
|
|
bool hasReg() { return VGPR != AMDGPU::NoRegister;}
|
|
};
|
|
|
|
private:
|
|
// SGPR->VGPR spilling support.
|
|
typedef std::pair<unsigned, unsigned> SpillRegMask;
|
|
|
|
// Track VGPR + wave index for each subregister of the SGPR spilled to
|
|
// frameindex key.
|
|
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
|
|
unsigned NumVGPRSpillLanes = 0;
|
|
SmallVector<unsigned, 2> SpillVGPRs;
|
|
|
|
public:
|
|
|
|
SIMachineFunctionInfo(const MachineFunction &MF);
|
|
|
|
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
|
|
auto I = SGPRToVGPRSpills.find(FrameIndex);
|
|
return (I == SGPRToVGPRSpills.end()) ?
|
|
ArrayRef<SpilledReg>() : makeArrayRef(I->second);
|
|
}
|
|
|
|
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
|
|
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
|
|
|
|
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
|
|
unsigned getTIDReg() const { return TIDReg; };
|
|
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
|
|
|
|
// Add user SGPRs.
|
|
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
|
|
unsigned addDispatchPtr(const SIRegisterInfo &TRI);
|
|
unsigned addQueuePtr(const SIRegisterInfo &TRI);
|
|
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
|
|
unsigned addDispatchID(const SIRegisterInfo &TRI);
|
|
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
|
|
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
|
|
|
|
// Add system SGPRs.
|
|
unsigned addWorkGroupIDX() {
|
|
WorkGroupIDXSystemSGPR = getNextSystemSGPR();
|
|
NumSystemSGPRs += 1;
|
|
return WorkGroupIDXSystemSGPR;
|
|
}
|
|
|
|
unsigned addWorkGroupIDY() {
|
|
WorkGroupIDYSystemSGPR = getNextSystemSGPR();
|
|
NumSystemSGPRs += 1;
|
|
return WorkGroupIDYSystemSGPR;
|
|
}
|
|
|
|
unsigned addWorkGroupIDZ() {
|
|
WorkGroupIDZSystemSGPR = getNextSystemSGPR();
|
|
NumSystemSGPRs += 1;
|
|
return WorkGroupIDZSystemSGPR;
|
|
}
|
|
|
|
unsigned addWorkGroupInfo() {
|
|
WorkGroupInfoSystemSGPR = getNextSystemSGPR();
|
|
NumSystemSGPRs += 1;
|
|
return WorkGroupInfoSystemSGPR;
|
|
}
|
|
|
|
unsigned addPrivateSegmentWaveByteOffset() {
|
|
PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
|
|
NumSystemSGPRs += 1;
|
|
return PrivateSegmentWaveByteOffsetSystemSGPR;
|
|
}
|
|
|
|
void setPrivateSegmentWaveByteOffset(unsigned Reg) {
|
|
PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
|
|
}
|
|
|
|
bool hasPrivateSegmentBuffer() const {
|
|
return PrivateSegmentBuffer;
|
|
}
|
|
|
|
bool hasDispatchPtr() const {
|
|
return DispatchPtr;
|
|
}
|
|
|
|
bool hasQueuePtr() const {
|
|
return QueuePtr;
|
|
}
|
|
|
|
bool hasKernargSegmentPtr() const {
|
|
return KernargSegmentPtr;
|
|
}
|
|
|
|
bool hasDispatchID() const {
|
|
return DispatchID;
|
|
}
|
|
|
|
bool hasFlatScratchInit() const {
|
|
return FlatScratchInit;
|
|
}
|
|
|
|
bool hasGridWorkgroupCountX() const {
|
|
return GridWorkgroupCountX;
|
|
}
|
|
|
|
bool hasGridWorkgroupCountY() const {
|
|
return GridWorkgroupCountY;
|
|
}
|
|
|
|
bool hasGridWorkgroupCountZ() const {
|
|
return GridWorkgroupCountZ;
|
|
}
|
|
|
|
bool hasWorkGroupIDX() const {
|
|
return WorkGroupIDX;
|
|
}
|
|
|
|
bool hasWorkGroupIDY() const {
|
|
return WorkGroupIDY;
|
|
}
|
|
|
|
bool hasWorkGroupIDZ() const {
|
|
return WorkGroupIDZ;
|
|
}
|
|
|
|
bool hasWorkGroupInfo() const {
|
|
return WorkGroupInfo;
|
|
}
|
|
|
|
bool hasPrivateSegmentWaveByteOffset() const {
|
|
return PrivateSegmentWaveByteOffset;
|
|
}
|
|
|
|
bool hasWorkItemIDX() const {
|
|
return WorkItemIDX;
|
|
}
|
|
|
|
bool hasWorkItemIDY() const {
|
|
return WorkItemIDY;
|
|
}
|
|
|
|
bool hasWorkItemIDZ() const {
|
|
return WorkItemIDZ;
|
|
}
|
|
|
|
bool hasImplicitArgPtr() const {
|
|
return ImplicitArgPtr;
|
|
}
|
|
|
|
bool hasImplicitBufferPtr() const {
|
|
return ImplicitBufferPtr;
|
|
}
|
|
|
|
unsigned getNumUserSGPRs() const {
|
|
return NumUserSGPRs;
|
|
}
|
|
|
|
unsigned getNumPreloadedSGPRs() const {
|
|
return NumUserSGPRs + NumSystemSGPRs;
|
|
}
|
|
|
|
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
|
|
return PrivateSegmentWaveByteOffsetSystemSGPR;
|
|
}
|
|
|
|
/// \brief Returns the physical register reserved for use as the resource
|
|
/// descriptor for scratch accesses.
|
|
unsigned getScratchRSrcReg() const {
|
|
return ScratchRSrcReg;
|
|
}
|
|
|
|
void setScratchRSrcReg(unsigned Reg) {
|
|
assert(Reg != AMDGPU::NoRegister && "Should never be unset");
|
|
ScratchRSrcReg = Reg;
|
|
}
|
|
|
|
unsigned getScratchWaveOffsetReg() const {
|
|
return ScratchWaveOffsetReg;
|
|
}
|
|
|
|
unsigned getFrameOffsetReg() const {
|
|
return FrameOffsetReg;
|
|
}
|
|
|
|
void setStackPtrOffsetReg(unsigned Reg) {
|
|
StackPtrOffsetReg = Reg;
|
|
}
|
|
|
|
// Note the unset value for this is AMDGPU::SP_REG rather than
|
|
// NoRegister. This is mostly a workaround for MIR tests where state that
|
|
// can't be directly computed from the function is not preserved in serialized
|
|
// MIR.
|
|
unsigned getStackPtrOffsetReg() const {
|
|
return StackPtrOffsetReg;
|
|
}
|
|
|
|
void setScratchWaveOffsetReg(unsigned Reg) {
|
|
assert(Reg != AMDGPU::NoRegister && "Should never be unset");
|
|
ScratchWaveOffsetReg = Reg;
|
|
if (isEntryFunction())
|
|
FrameOffsetReg = ScratchWaveOffsetReg;
|
|
}
|
|
|
|
unsigned getQueuePtrUserSGPR() const {
|
|
return QueuePtrUserSGPR;
|
|
}
|
|
|
|
unsigned getImplicitBufferPtrUserSGPR() const {
|
|
return ImplicitBufferPtrUserSGPR;
|
|
}
|
|
|
|
bool hasSpilledSGPRs() const {
|
|
return HasSpilledSGPRs;
|
|
}
|
|
|
|
void setHasSpilledSGPRs(bool Spill = true) {
|
|
HasSpilledSGPRs = Spill;
|
|
}
|
|
|
|
bool hasSpilledVGPRs() const {
|
|
return HasSpilledVGPRs;
|
|
}
|
|
|
|
void setHasSpilledVGPRs(bool Spill = true) {
|
|
HasSpilledVGPRs = Spill;
|
|
}
|
|
|
|
bool hasNonSpillStackObjects() const {
|
|
return HasNonSpillStackObjects;
|
|
}
|
|
|
|
void setHasNonSpillStackObjects(bool StackObject = true) {
|
|
HasNonSpillStackObjects = StackObject;
|
|
}
|
|
|
|
unsigned getNumSpilledSGPRs() const {
|
|
return NumSpilledSGPRs;
|
|
}
|
|
|
|
unsigned getNumSpilledVGPRs() const {
|
|
return NumSpilledVGPRs;
|
|
}
|
|
|
|
void addToSpilledSGPRs(unsigned num) {
|
|
NumSpilledSGPRs += num;
|
|
}
|
|
|
|
void addToSpilledVGPRs(unsigned num) {
|
|
NumSpilledVGPRs += num;
|
|
}
|
|
|
|
unsigned getPSInputAddr() const {
|
|
return PSInputAddr;
|
|
}
|
|
|
|
unsigned getPSInputEnable() const {
|
|
return PSInputEnable;
|
|
}
|
|
|
|
bool isPSInputAllocated(unsigned Index) const {
|
|
return PSInputAddr & (1 << Index);
|
|
}
|
|
|
|
void markPSInputAllocated(unsigned Index) {
|
|
PSInputAddr |= 1 << Index;
|
|
}
|
|
|
|
void markPSInputEnabled(unsigned Index) {
|
|
PSInputEnable |= 1 << Index;
|
|
}
|
|
|
|
bool returnsVoid() const {
|
|
return ReturnsVoid;
|
|
}
|
|
|
|
void setIfReturnsVoid(bool Value) {
|
|
ReturnsVoid = Value;
|
|
}
|
|
|
|
/// \returns A pair of default/requested minimum/maximum flat work group sizes
|
|
/// for this function.
|
|
std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
|
|
return FlatWorkGroupSizes;
|
|
}
|
|
|
|
/// \returns Default/requested minimum flat work group size for this function.
|
|
unsigned getMinFlatWorkGroupSize() const {
|
|
return FlatWorkGroupSizes.first;
|
|
}
|
|
|
|
/// \returns Default/requested maximum flat work group size for this function.
|
|
unsigned getMaxFlatWorkGroupSize() const {
|
|
return FlatWorkGroupSizes.second;
|
|
}
|
|
|
|
/// \returns A pair of default/requested minimum/maximum number of waves per
|
|
/// execution unit.
|
|
std::pair<unsigned, unsigned> getWavesPerEU() const {
|
|
return WavesPerEU;
|
|
}
|
|
|
|
/// \returns Default/requested minimum number of waves per execution unit.
|
|
unsigned getMinWavesPerEU() const {
|
|
return WavesPerEU.first;
|
|
}
|
|
|
|
/// \returns Default/requested maximum number of waves per execution unit.
|
|
unsigned getMaxWavesPerEU() const {
|
|
return WavesPerEU.second;
|
|
}
|
|
|
|
/// \returns Stack object index for \p Dim's work group ID.
|
|
int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
|
|
assert(Dim < 3);
|
|
return DebuggerWorkGroupIDStackObjectIndices[Dim];
|
|
}
|
|
|
|
/// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
|
|
void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
|
|
assert(Dim < 3);
|
|
DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
|
|
}
|
|
|
|
/// \returns Stack object index for \p Dim's work item ID.
|
|
int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
|
|
assert(Dim < 3);
|
|
return DebuggerWorkItemIDStackObjectIndices[Dim];
|
|
}
|
|
|
|
/// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
|
|
void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
|
|
assert(Dim < 3);
|
|
DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
|
|
}
|
|
|
|
/// \returns SGPR used for \p Dim's work group ID.
|
|
unsigned getWorkGroupIDSGPR(unsigned Dim) const {
|
|
switch (Dim) {
|
|
case 0:
|
|
assert(hasWorkGroupIDX());
|
|
return WorkGroupIDXSystemSGPR;
|
|
case 1:
|
|
assert(hasWorkGroupIDY());
|
|
return WorkGroupIDYSystemSGPR;
|
|
case 2:
|
|
assert(hasWorkGroupIDZ());
|
|
return WorkGroupIDZSystemSGPR;
|
|
}
|
|
llvm_unreachable("unexpected dimension");
|
|
}
|
|
|
|
/// \returns VGPR used for \p Dim' work item ID.
|
|
unsigned getWorkItemIDVGPR(unsigned Dim) const {
|
|
switch (Dim) {
|
|
case 0:
|
|
assert(hasWorkItemIDX());
|
|
return AMDGPU::VGPR0;
|
|
case 1:
|
|
assert(hasWorkItemIDY());
|
|
return AMDGPU::VGPR1;
|
|
case 2:
|
|
assert(hasWorkItemIDZ());
|
|
return AMDGPU::VGPR2;
|
|
}
|
|
llvm_unreachable("unexpected dimension");
|
|
}
|
|
|
|
unsigned getLDSWaveSpillSize() const {
|
|
return LDSWaveSpillSize;
|
|
}
|
|
|
|
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
|
|
return &BufferPSV;
|
|
}
|
|
|
|
const AMDGPUImagePseudoSourceValue *getImagePSV() const {
|
|
return &ImagePSV;
|
|
}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|