forked from OSchip/llvm-project
693 lines
19 KiB
C++
693 lines
19 KiB
C++
//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
|
#include "AMDGPUMachineFunction.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/SparseBitVector.h"
|
|
#include "llvm/CodeGen/MIRYamlMapping.h"
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include <array>
|
|
#include <cassert>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
|
|
class MachineFrameInfo;
|
|
class MachineFunction;
|
|
class TargetRegisterClass;
|
|
|
|
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
|
|
public:
|
|
// TODO: Is the img rsrc useful?
|
|
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
|
|
PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
|
|
|
|
bool isConstant(const MachineFrameInfo *) const override {
|
|
// This should probably be true for most images, but we will start by being
|
|
// conservative.
|
|
return false;
|
|
}
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
return true;
|
|
}
|
|
|
|
bool mayAlias(const MachineFrameInfo *) const override {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
|
|
public:
|
|
explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
|
|
PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
|
|
|
|
bool isConstant(const MachineFrameInfo *) const override {
|
|
// This should probably be true for most images, but we will start by being
|
|
// conservative.
|
|
return false;
|
|
}
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
return true;
|
|
}
|
|
|
|
bool mayAlias(const MachineFrameInfo *) const override {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
namespace yaml {
|
|
|
|
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
|
|
uint64_t ExplicitKernArgSize = 0;
|
|
unsigned MaxKernArgAlign = 0;
|
|
unsigned LDSSize = 0;
|
|
bool IsEntryFunction = false;
|
|
bool NoSignedZerosFPMath = false;
|
|
bool MemoryBound = false;
|
|
bool WaveLimiter = false;
|
|
|
|
StringValue ScratchRSrcReg = "$private_rsrc_reg";
|
|
StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
|
|
StringValue FrameOffsetReg = "$fp_reg";
|
|
StringValue StackPtrOffsetReg = "$sp_reg";
|
|
|
|
SIMachineFunctionInfo() = default;
|
|
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
|
|
const TargetRegisterInfo &TRI);
|
|
|
|
void mappingImpl(yaml::IO &YamlIO) override;
|
|
~SIMachineFunctionInfo() = default;
|
|
};
|
|
|
|
template <> struct MappingTraits<SIMachineFunctionInfo> {
|
|
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
|
|
YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
|
|
UINT64_C(0));
|
|
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
|
|
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
|
|
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
|
|
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
|
|
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
|
|
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
|
|
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
|
|
StringValue("$private_rsrc_reg"));
|
|
YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
|
|
StringValue("$scratch_wave_offset_reg"));
|
|
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
|
|
StringValue("$fp_reg"));
|
|
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
|
|
StringValue("$sp_reg"));
|
|
}
|
|
};
|
|
|
|
} // end namespace yaml
|
|
|
|
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
|
|
/// tells the hardware which interpolation parameters to load.
|
|
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
|
friend class GCNTargetMachine;
|
|
|
|
unsigned TIDReg = AMDGPU::NoRegister;
|
|
|
|
// Registers that may be reserved for spilling purposes. These may be the same
|
|
// as the input registers.
|
|
unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
|
|
unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
|
|
|
|
// This is the current function's incremented size from the kernel's scratch
|
|
// wave offset register. For an entry function, this is exactly the same as
|
|
// the ScratchWaveOffsetReg.
|
|
unsigned FrameOffsetReg = AMDGPU::FP_REG;
|
|
|
|
// Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
|
|
unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
|
|
|
|
AMDGPUFunctionArgInfo ArgInfo;
|
|
|
|
// State of MODE register, assumed FP mode.
|
|
AMDGPU::SIModeRegisterDefaults Mode;
|
|
|
|
// Graphics info.
|
|
unsigned PSInputAddr = 0;
|
|
unsigned PSInputEnable = 0;
|
|
|
|
/// Number of bytes of arguments this function has on the stack. If the callee
|
|
/// is expected to restore the argument stack this should be a multiple of 16,
|
|
/// all usable during a tail call.
|
|
///
|
|
/// The alternative would forbid tail call optimisation in some cases: if we
|
|
/// want to transfer control from a function with 8-bytes of stack-argument
|
|
/// space to a function with 16-bytes then misalignment of this value would
|
|
/// make a stack adjustment necessary, which could not be undone by the
|
|
/// callee.
|
|
unsigned BytesInStackArgArea = 0;
|
|
|
|
bool ReturnsVoid = true;
|
|
|
|
// A pair of default/requested minimum/maximum flat work group sizes.
|
|
// Minimum - first, maximum - second.
|
|
std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
|
|
|
|
// A pair of default/requested minimum/maximum number of waves per execution
|
|
// unit. Minimum - first, maximum - second.
|
|
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
|
|
|
|
DenseMap<const Value *,
|
|
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
|
|
DenseMap<const Value *,
|
|
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
|
|
|
|
private:
|
|
unsigned LDSWaveSpillSize = 0;
|
|
unsigned NumUserSGPRs = 0;
|
|
unsigned NumSystemSGPRs = 0;
|
|
|
|
bool HasSpilledSGPRs = false;
|
|
bool HasSpilledVGPRs = false;
|
|
bool HasNonSpillStackObjects = false;
|
|
bool IsStackRealigned = false;
|
|
|
|
unsigned NumSpilledSGPRs = 0;
|
|
unsigned NumSpilledVGPRs = 0;
|
|
|
|
// Feature bits required for inputs passed in user SGPRs.
|
|
bool PrivateSegmentBuffer : 1;
|
|
bool DispatchPtr : 1;
|
|
bool QueuePtr : 1;
|
|
bool KernargSegmentPtr : 1;
|
|
bool DispatchID : 1;
|
|
bool FlatScratchInit : 1;
|
|
|
|
// Feature bits required for inputs passed in system SGPRs.
|
|
bool WorkGroupIDX : 1; // Always initialized.
|
|
bool WorkGroupIDY : 1;
|
|
bool WorkGroupIDZ : 1;
|
|
bool WorkGroupInfo : 1;
|
|
bool PrivateSegmentWaveByteOffset : 1;
|
|
|
|
bool WorkItemIDX : 1; // Always initialized.
|
|
bool WorkItemIDY : 1;
|
|
bool WorkItemIDZ : 1;
|
|
|
|
// Private memory buffer
|
|
// Compute directly in sgpr[0:1]
|
|
// Other shaders indirect 64-bits at sgpr[0:1]
|
|
bool ImplicitBufferPtr : 1;
|
|
|
|
// Pointer to where the ABI inserts special kernel arguments separate from the
|
|
// user arguments. This is an offset from the KernargSegmentPtr.
|
|
bool ImplicitArgPtr : 1;
|
|
|
|
// The hard-wired high half of the address of the global information table
|
|
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
|
|
// current hardware only allows a 16 bit value.
|
|
unsigned GITPtrHigh;
|
|
|
|
unsigned HighBitsOf32BitAddress;
|
|
|
|
// Current recorded maximum possible occupancy.
|
|
unsigned Occupancy;
|
|
|
|
MCPhysReg getNextUserSGPR() const;
|
|
|
|
MCPhysReg getNextSystemSGPR() const;
|
|
|
|
public:
|
|
struct SpilledReg {
|
|
unsigned VGPR = 0;
|
|
int Lane = -1;
|
|
|
|
SpilledReg() = default;
|
|
SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
|
|
|
|
bool hasLane() { return Lane != -1;}
|
|
bool hasReg() { return VGPR != 0;}
|
|
};
|
|
|
|
struct SGPRSpillVGPRCSR {
|
|
// VGPR used for SGPR spills
|
|
unsigned VGPR;
|
|
|
|
// If the VGPR is a CSR, the stack slot used to save/restore it in the
|
|
// prolog/epilog.
|
|
Optional<int> FI;
|
|
|
|
SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
|
|
};
|
|
|
|
SparseBitVector<> WWMReservedRegs;
|
|
|
|
void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
|
|
|
|
private:
|
|
// SGPR->VGPR spilling support.
|
|
using SpillRegMask = std::pair<unsigned, unsigned>;
|
|
|
|
// Track VGPR + wave index for each subregister of the SGPR spilled to
|
|
// frameindex key.
|
|
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
|
|
unsigned NumVGPRSpillLanes = 0;
|
|
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
|
|
|
|
public:
|
|
SIMachineFunctionInfo(const MachineFunction &MF);
|
|
|
|
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
|
|
|
|
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
|
|
auto I = SGPRToVGPRSpills.find(FrameIndex);
|
|
return (I == SGPRToVGPRSpills.end()) ?
|
|
ArrayRef<SpilledReg>() : makeArrayRef(I->second);
|
|
}
|
|
|
|
ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
|
|
return SpillVGPRs;
|
|
}
|
|
|
|
AMDGPU::SIModeRegisterDefaults getMode() const {
|
|
return Mode;
|
|
}
|
|
|
|
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
|
|
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
|
|
|
|
bool hasCalculatedTID() const { return TIDReg != 0; };
|
|
unsigned getTIDReg() const { return TIDReg; };
|
|
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
|
|
|
|
unsigned getBytesInStackArgArea() const {
|
|
return BytesInStackArgArea;
|
|
}
|
|
|
|
void setBytesInStackArgArea(unsigned Bytes) {
|
|
BytesInStackArgArea = Bytes;
|
|
}
|
|
|
|
// Add user SGPRs.
|
|
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
|
|
unsigned addDispatchPtr(const SIRegisterInfo &TRI);
|
|
unsigned addQueuePtr(const SIRegisterInfo &TRI);
|
|
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
|
|
unsigned addDispatchID(const SIRegisterInfo &TRI);
|
|
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
|
|
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
|
|
|
|
// Add system SGPRs.
|
|
unsigned addWorkGroupIDX() {
|
|
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupIDX.getRegister();
|
|
}
|
|
|
|
unsigned addWorkGroupIDY() {
|
|
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupIDY.getRegister();
|
|
}
|
|
|
|
unsigned addWorkGroupIDZ() {
|
|
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupIDZ.getRegister();
|
|
}
|
|
|
|
unsigned addWorkGroupInfo() {
|
|
ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupInfo.getRegister();
|
|
}
|
|
|
|
// Add special VGPR inputs
|
|
void setWorkItemIDX(ArgDescriptor Arg) {
|
|
ArgInfo.WorkItemIDX = Arg;
|
|
}
|
|
|
|
void setWorkItemIDY(ArgDescriptor Arg) {
|
|
ArgInfo.WorkItemIDY = Arg;
|
|
}
|
|
|
|
void setWorkItemIDZ(ArgDescriptor Arg) {
|
|
ArgInfo.WorkItemIDZ = Arg;
|
|
}
|
|
|
|
unsigned addPrivateSegmentWaveByteOffset() {
|
|
ArgInfo.PrivateSegmentWaveByteOffset
|
|
= ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
|
|
}
|
|
|
|
void setPrivateSegmentWaveByteOffset(unsigned Reg) {
|
|
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
|
|
}
|
|
|
|
bool hasPrivateSegmentBuffer() const {
|
|
return PrivateSegmentBuffer;
|
|
}
|
|
|
|
bool hasDispatchPtr() const {
|
|
return DispatchPtr;
|
|
}
|
|
|
|
bool hasQueuePtr() const {
|
|
return QueuePtr;
|
|
}
|
|
|
|
bool hasKernargSegmentPtr() const {
|
|
return KernargSegmentPtr;
|
|
}
|
|
|
|
bool hasDispatchID() const {
|
|
return DispatchID;
|
|
}
|
|
|
|
bool hasFlatScratchInit() const {
|
|
return FlatScratchInit;
|
|
}
|
|
|
|
bool hasWorkGroupIDX() const {
|
|
return WorkGroupIDX;
|
|
}
|
|
|
|
bool hasWorkGroupIDY() const {
|
|
return WorkGroupIDY;
|
|
}
|
|
|
|
bool hasWorkGroupIDZ() const {
|
|
return WorkGroupIDZ;
|
|
}
|
|
|
|
bool hasWorkGroupInfo() const {
|
|
return WorkGroupInfo;
|
|
}
|
|
|
|
bool hasPrivateSegmentWaveByteOffset() const {
|
|
return PrivateSegmentWaveByteOffset;
|
|
}
|
|
|
|
bool hasWorkItemIDX() const {
|
|
return WorkItemIDX;
|
|
}
|
|
|
|
bool hasWorkItemIDY() const {
|
|
return WorkItemIDY;
|
|
}
|
|
|
|
bool hasWorkItemIDZ() const {
|
|
return WorkItemIDZ;
|
|
}
|
|
|
|
bool hasImplicitArgPtr() const {
|
|
return ImplicitArgPtr;
|
|
}
|
|
|
|
bool hasImplicitBufferPtr() const {
|
|
return ImplicitBufferPtr;
|
|
}
|
|
|
|
AMDGPUFunctionArgInfo &getArgInfo() {
|
|
return ArgInfo;
|
|
}
|
|
|
|
const AMDGPUFunctionArgInfo &getArgInfo() const {
|
|
return ArgInfo;
|
|
}
|
|
|
|
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
|
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
return ArgInfo.getPreloadedValue(Value);
|
|
}
|
|
|
|
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
return ArgInfo.getPreloadedValue(Value).first->getRegister();
|
|
}
|
|
|
|
unsigned getGITPtrHigh() const {
|
|
return GITPtrHigh;
|
|
}
|
|
|
|
unsigned get32BitAddressHighBits() const {
|
|
return HighBitsOf32BitAddress;
|
|
}
|
|
|
|
unsigned getNumUserSGPRs() const {
|
|
return NumUserSGPRs;
|
|
}
|
|
|
|
unsigned getNumPreloadedSGPRs() const {
|
|
return NumUserSGPRs + NumSystemSGPRs;
|
|
}
|
|
|
|
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
|
|
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
|
|
}
|
|
|
|
/// Returns the physical register reserved for use as the resource
|
|
/// descriptor for scratch accesses.
|
|
unsigned getScratchRSrcReg() const {
|
|
return ScratchRSrcReg;
|
|
}
|
|
|
|
void setScratchRSrcReg(unsigned Reg) {
|
|
assert(Reg != 0 && "Should never be unset");
|
|
ScratchRSrcReg = Reg;
|
|
}
|
|
|
|
unsigned getScratchWaveOffsetReg() const {
|
|
return ScratchWaveOffsetReg;
|
|
}
|
|
|
|
unsigned getFrameOffsetReg() const {
|
|
return FrameOffsetReg;
|
|
}
|
|
|
|
void setStackPtrOffsetReg(unsigned Reg) {
|
|
assert(Reg != 0 && "Should never be unset");
|
|
StackPtrOffsetReg = Reg;
|
|
}
|
|
|
|
// Note the unset value for this is AMDGPU::SP_REG rather than
|
|
// NoRegister. This is mostly a workaround for MIR tests where state that
|
|
// can't be directly computed from the function is not preserved in serialized
|
|
// MIR.
|
|
unsigned getStackPtrOffsetReg() const {
|
|
return StackPtrOffsetReg;
|
|
}
|
|
|
|
void setScratchWaveOffsetReg(unsigned Reg) {
|
|
assert(Reg != 0 && "Should never be unset");
|
|
ScratchWaveOffsetReg = Reg;
|
|
if (isEntryFunction())
|
|
FrameOffsetReg = ScratchWaveOffsetReg;
|
|
}
|
|
|
|
unsigned getQueuePtrUserSGPR() const {
|
|
return ArgInfo.QueuePtr.getRegister();
|
|
}
|
|
|
|
unsigned getImplicitBufferPtrUserSGPR() const {
|
|
return ArgInfo.ImplicitBufferPtr.getRegister();
|
|
}
|
|
|
|
bool hasSpilledSGPRs() const {
|
|
return HasSpilledSGPRs;
|
|
}
|
|
|
|
void setHasSpilledSGPRs(bool Spill = true) {
|
|
HasSpilledSGPRs = Spill;
|
|
}
|
|
|
|
bool hasSpilledVGPRs() const {
|
|
return HasSpilledVGPRs;
|
|
}
|
|
|
|
void setHasSpilledVGPRs(bool Spill = true) {
|
|
HasSpilledVGPRs = Spill;
|
|
}
|
|
|
|
bool hasNonSpillStackObjects() const {
|
|
return HasNonSpillStackObjects;
|
|
}
|
|
|
|
void setHasNonSpillStackObjects(bool StackObject = true) {
|
|
HasNonSpillStackObjects = StackObject;
|
|
}
|
|
|
|
bool isStackRealigned() const {
|
|
return IsStackRealigned;
|
|
}
|
|
|
|
void setIsStackRealigned(bool Realigned = true) {
|
|
IsStackRealigned = Realigned;
|
|
}
|
|
|
|
unsigned getNumSpilledSGPRs() const {
|
|
return NumSpilledSGPRs;
|
|
}
|
|
|
|
unsigned getNumSpilledVGPRs() const {
|
|
return NumSpilledVGPRs;
|
|
}
|
|
|
|
void addToSpilledSGPRs(unsigned num) {
|
|
NumSpilledSGPRs += num;
|
|
}
|
|
|
|
void addToSpilledVGPRs(unsigned num) {
|
|
NumSpilledVGPRs += num;
|
|
}
|
|
|
|
unsigned getPSInputAddr() const {
|
|
return PSInputAddr;
|
|
}
|
|
|
|
unsigned getPSInputEnable() const {
|
|
return PSInputEnable;
|
|
}
|
|
|
|
bool isPSInputAllocated(unsigned Index) const {
|
|
return PSInputAddr & (1 << Index);
|
|
}
|
|
|
|
void markPSInputAllocated(unsigned Index) {
|
|
PSInputAddr |= 1 << Index;
|
|
}
|
|
|
|
void markPSInputEnabled(unsigned Index) {
|
|
PSInputEnable |= 1 << Index;
|
|
}
|
|
|
|
bool returnsVoid() const {
|
|
return ReturnsVoid;
|
|
}
|
|
|
|
void setIfReturnsVoid(bool Value) {
|
|
ReturnsVoid = Value;
|
|
}
|
|
|
|
/// \returns A pair of default/requested minimum/maximum flat work group sizes
|
|
/// for this function.
|
|
std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
|
|
return FlatWorkGroupSizes;
|
|
}
|
|
|
|
/// \returns Default/requested minimum flat work group size for this function.
|
|
unsigned getMinFlatWorkGroupSize() const {
|
|
return FlatWorkGroupSizes.first;
|
|
}
|
|
|
|
/// \returns Default/requested maximum flat work group size for this function.
|
|
unsigned getMaxFlatWorkGroupSize() const {
|
|
return FlatWorkGroupSizes.second;
|
|
}
|
|
|
|
/// \returns A pair of default/requested minimum/maximum number of waves per
|
|
/// execution unit.
|
|
std::pair<unsigned, unsigned> getWavesPerEU() const {
|
|
return WavesPerEU;
|
|
}
|
|
|
|
/// \returns Default/requested minimum number of waves per execution unit.
|
|
unsigned getMinWavesPerEU() const {
|
|
return WavesPerEU.first;
|
|
}
|
|
|
|
/// \returns Default/requested maximum number of waves per execution unit.
|
|
unsigned getMaxWavesPerEU() const {
|
|
return WavesPerEU.second;
|
|
}
|
|
|
|
/// \returns SGPR used for \p Dim's work group ID.
|
|
unsigned getWorkGroupIDSGPR(unsigned Dim) const {
|
|
switch (Dim) {
|
|
case 0:
|
|
assert(hasWorkGroupIDX());
|
|
return ArgInfo.WorkGroupIDX.getRegister();
|
|
case 1:
|
|
assert(hasWorkGroupIDY());
|
|
return ArgInfo.WorkGroupIDY.getRegister();
|
|
case 2:
|
|
assert(hasWorkGroupIDZ());
|
|
return ArgInfo.WorkGroupIDZ.getRegister();
|
|
}
|
|
llvm_unreachable("unexpected dimension");
|
|
}
|
|
|
|
/// \returns VGPR used for \p Dim' work item ID.
|
|
unsigned getWorkItemIDVGPR(unsigned Dim) const;
|
|
|
|
unsigned getLDSWaveSpillSize() const {
|
|
return LDSWaveSpillSize;
|
|
}
|
|
|
|
const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
|
|
const Value *BufferRsrc) {
|
|
assert(BufferRsrc);
|
|
auto PSV = BufferPSVs.try_emplace(
|
|
BufferRsrc,
|
|
llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
|
|
return PSV.first->second.get();
|
|
}
|
|
|
|
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
|
|
const Value *ImgRsrc) {
|
|
assert(ImgRsrc);
|
|
auto PSV = ImagePSVs.try_emplace(
|
|
ImgRsrc,
|
|
llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
|
|
return PSV.first->second.get();
|
|
}
|
|
|
|
unsigned getOccupancy() const {
|
|
return Occupancy;
|
|
}
|
|
|
|
unsigned getMinAllowedOccupancy() const {
|
|
if (!isMemoryBound() && !needsWaveLimiter())
|
|
return Occupancy;
|
|
return (Occupancy < 4) ? Occupancy : 4;
|
|
}
|
|
|
|
void limitOccupancy(const MachineFunction &MF);
|
|
|
|
void limitOccupancy(unsigned Limit) {
|
|
if (Occupancy > Limit)
|
|
Occupancy = Limit;
|
|
}
|
|
|
|
void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
|
|
if (Occupancy < Limit)
|
|
Occupancy = Limit;
|
|
limitOccupancy(MF);
|
|
}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|