2017-08-08 08:47:13 +08:00
|
|
|
//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-03-11 16:00:27 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
|
|
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
2017-11-08 09:01:31 +08:00
|
|
|
#include "AMDGPUMachineFunction.h"
|
2019-03-15 06:54:43 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "SIInstrInfo.h"
|
2021-04-26 23:43:00 +08:00
|
|
|
#include "llvm/ADT/MapVector.h"
|
2019-03-15 06:54:43 +08:00
|
|
|
#include "llvm/CodeGen/MIRYamlMapping.h"
|
2017-01-21 08:53:49 +08:00
|
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
2020-12-25 23:52:14 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2017-08-08 08:47:13 +08:00
|
|
|
class MachineFrameInfo;
|
|
|
|
class MachineFunction;
|
2020-12-25 23:52:14 +08:00
|
|
|
class SIMachineFunctionInfo;
|
|
|
|
class SIRegisterInfo;
|
2021-08-24 04:50:19 +08:00
|
|
|
class TargetRegisterClass;
|
2017-08-08 08:47:13 +08:00
|
|
|
|
2019-06-17 21:52:15 +08:00
|
|
|
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
|
2016-12-20 23:52:17 +08:00
|
|
|
public:
|
2019-06-17 21:52:15 +08:00
|
|
|
enum AMDGPUPSVKind : unsigned {
|
|
|
|
PSVBuffer = PseudoSourceValue::TargetCustom,
|
2019-06-20 03:55:27 +08:00
|
|
|
PSVImage,
|
|
|
|
GWSResource
|
2019-06-17 21:52:15 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
protected:
|
|
|
|
AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
|
|
|
|
: PseudoSourceValue(Kind, TII) {}
|
2016-12-20 23:52:17 +08:00
|
|
|
|
2019-06-17 21:52:15 +08:00
|
|
|
public:
|
2016-12-20 23:52:17 +08:00
|
|
|
bool isConstant(const MachineFrameInfo *) const override {
|
|
|
|
// This should probably be true for most images, but we will start by being
|
|
|
|
// conservative.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
2018-01-13 06:57:24 +08:00
|
|
|
return true;
|
2016-12-20 23:52:17 +08:00
|
|
|
}
|
|
|
|
|
2017-08-08 08:47:13 +08:00
|
|
|
bool mayAlias(const MachineFrameInfo *) const override {
|
2018-01-13 06:57:24 +08:00
|
|
|
return true;
|
2016-12-20 23:52:17 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-06-17 21:52:15 +08:00
|
|
|
class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
|
2016-12-21 01:19:44 +08:00
|
|
|
public:
|
2019-06-17 21:52:15 +08:00
|
|
|
explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
|
|
|
|
: AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
|
2016-12-21 01:19:44 +08:00
|
|
|
|
2019-06-17 21:52:15 +08:00
|
|
|
static bool classof(const PseudoSourceValue *V) {
|
|
|
|
return V->kind() == PSVBuffer;
|
2016-12-21 01:19:44 +08:00
|
|
|
}
|
2021-01-22 01:12:27 +08:00
|
|
|
|
|
|
|
void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; }
|
2019-06-17 21:52:15 +08:00
|
|
|
};
|
2016-12-21 01:19:44 +08:00
|
|
|
|
2019-06-17 21:52:15 +08:00
|
|
|
class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
|
|
|
|
public:
|
|
|
|
// TODO: Is the img rsrc useful?
|
|
|
|
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
|
|
|
|
: AMDGPUPseudoSourceValue(PSVImage, TII) {}
|
2016-12-21 01:19:44 +08:00
|
|
|
|
2019-06-17 21:52:15 +08:00
|
|
|
static bool classof(const PseudoSourceValue *V) {
|
|
|
|
return V->kind() == PSVImage;
|
2016-12-21 01:19:44 +08:00
|
|
|
}
|
2021-01-22 01:12:27 +08:00
|
|
|
|
|
|
|
void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; }
|
2016-12-21 01:19:44 +08:00
|
|
|
};
|
2016-12-20 23:52:17 +08:00
|
|
|
|
2019-06-20 03:55:27 +08:00
|
|
|
class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
|
|
|
|
public:
|
|
|
|
explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
|
|
|
|
: AMDGPUPseudoSourceValue(GWSResource, TII) {}
|
|
|
|
|
|
|
|
static bool classof(const PseudoSourceValue *V) {
|
|
|
|
return V->kind() == GWSResource;
|
|
|
|
}
|
|
|
|
|
|
|
|
// These are inaccessible memory from IR.
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// These are inaccessible memory from IR.
|
|
|
|
bool mayAlias(const MachineFrameInfo *) const override {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void printCustom(raw_ostream &OS) const override {
|
|
|
|
OS << "GWSResource";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-03-15 06:54:43 +08:00
|
|
|
namespace yaml {
|
|
|
|
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
struct SIArgument {
|
|
|
|
bool IsRegister;
|
|
|
|
union {
|
|
|
|
StringValue RegisterName;
|
|
|
|
unsigned StackOffset;
|
|
|
|
};
|
|
|
|
Optional<unsigned> Mask;
|
|
|
|
|
|
|
|
// Default constructor, which creates a stack argument.
|
|
|
|
SIArgument() : IsRegister(false), StackOffset(0) {}
|
|
|
|
SIArgument(const SIArgument &Other) {
|
|
|
|
IsRegister = Other.IsRegister;
|
|
|
|
if (IsRegister) {
|
|
|
|
::new ((void *)std::addressof(RegisterName))
|
|
|
|
StringValue(Other.RegisterName);
|
|
|
|
} else
|
|
|
|
StackOffset = Other.StackOffset;
|
|
|
|
Mask = Other.Mask;
|
|
|
|
}
|
|
|
|
SIArgument &operator=(const SIArgument &Other) {
|
|
|
|
IsRegister = Other.IsRegister;
|
|
|
|
if (IsRegister) {
|
|
|
|
::new ((void *)std::addressof(RegisterName))
|
|
|
|
StringValue(Other.RegisterName);
|
|
|
|
} else
|
|
|
|
StackOffset = Other.StackOffset;
|
|
|
|
Mask = Other.Mask;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
~SIArgument() {
|
|
|
|
if (IsRegister)
|
|
|
|
RegisterName.~StringValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Helper to create a register or stack argument.
|
|
|
|
static inline SIArgument createArgument(bool IsReg) {
|
|
|
|
if (IsReg)
|
|
|
|
return SIArgument(IsReg);
|
|
|
|
return SIArgument();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
// Construct a register argument.
|
|
|
|
SIArgument(bool) : IsRegister(true), RegisterName() {}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <> struct MappingTraits<SIArgument> {
|
|
|
|
static void mapping(IO &YamlIO, SIArgument &A) {
|
|
|
|
if (YamlIO.outputting()) {
|
|
|
|
if (A.IsRegister)
|
|
|
|
YamlIO.mapRequired("reg", A.RegisterName);
|
|
|
|
else
|
|
|
|
YamlIO.mapRequired("offset", A.StackOffset);
|
|
|
|
} else {
|
|
|
|
auto Keys = YamlIO.keys();
|
|
|
|
if (is_contained(Keys, "reg")) {
|
|
|
|
A = SIArgument::createArgument(true);
|
|
|
|
YamlIO.mapRequired("reg", A.RegisterName);
|
|
|
|
} else if (is_contained(Keys, "offset"))
|
|
|
|
YamlIO.mapRequired("offset", A.StackOffset);
|
|
|
|
else
|
|
|
|
YamlIO.setError("missing required key 'reg' or 'offset'");
|
|
|
|
}
|
|
|
|
YamlIO.mapOptional("mask", A.Mask);
|
|
|
|
}
|
|
|
|
static const bool flow = true;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct SIArgumentInfo {
|
|
|
|
Optional<SIArgument> PrivateSegmentBuffer;
|
|
|
|
Optional<SIArgument> DispatchPtr;
|
|
|
|
Optional<SIArgument> QueuePtr;
|
|
|
|
Optional<SIArgument> KernargSegmentPtr;
|
|
|
|
Optional<SIArgument> DispatchID;
|
|
|
|
Optional<SIArgument> FlatScratchInit;
|
|
|
|
Optional<SIArgument> PrivateSegmentSize;
|
|
|
|
|
|
|
|
Optional<SIArgument> WorkGroupIDX;
|
|
|
|
Optional<SIArgument> WorkGroupIDY;
|
|
|
|
Optional<SIArgument> WorkGroupIDZ;
|
|
|
|
Optional<SIArgument> WorkGroupInfo;
|
|
|
|
Optional<SIArgument> PrivateSegmentWaveByteOffset;
|
|
|
|
|
|
|
|
Optional<SIArgument> ImplicitArgPtr;
|
|
|
|
Optional<SIArgument> ImplicitBufferPtr;
|
|
|
|
|
|
|
|
Optional<SIArgument> WorkItemIDX;
|
|
|
|
Optional<SIArgument> WorkItemIDY;
|
|
|
|
Optional<SIArgument> WorkItemIDZ;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <> struct MappingTraits<SIArgumentInfo> {
|
|
|
|
static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
|
|
|
|
YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
|
|
|
|
YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
|
|
|
|
YamlIO.mapOptional("queuePtr", AI.QueuePtr);
|
|
|
|
YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
|
|
|
|
YamlIO.mapOptional("dispatchID", AI.DispatchID);
|
|
|
|
YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
|
|
|
|
YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
|
|
|
|
|
|
|
|
YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
|
|
|
|
YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
|
|
|
|
YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
|
|
|
|
YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
|
|
|
|
YamlIO.mapOptional("privateSegmentWaveByteOffset",
|
|
|
|
AI.PrivateSegmentWaveByteOffset);
|
|
|
|
|
|
|
|
YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
|
|
|
|
YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
|
|
|
|
|
|
|
|
YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
|
|
|
|
YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
|
|
|
|
YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-07-11 00:09:26 +08:00
|
|
|
// Default to default mode for default calling convention.
|
|
|
|
struct SIMode {
|
|
|
|
bool IEEE = true;
|
|
|
|
bool DX10Clamp = true;
|
2019-12-03 15:01:21 +08:00
|
|
|
bool FP32InputDenormals = true;
|
|
|
|
bool FP32OutputDenormals = true;
|
|
|
|
bool FP64FP16InputDenormals = true;
|
|
|
|
bool FP64FP16OutputDenormals = true;
|
2019-07-11 00:09:26 +08:00
|
|
|
|
|
|
|
SIMode() = default;
|
|
|
|
|
|
|
|
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
|
|
|
|
IEEE = Mode.IEEE;
|
|
|
|
DX10Clamp = Mode.DX10Clamp;
|
2019-12-03 15:01:21 +08:00
|
|
|
FP32InputDenormals = Mode.FP32InputDenormals;
|
|
|
|
FP32OutputDenormals = Mode.FP32OutputDenormals;
|
|
|
|
FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
|
|
|
|
FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
|
2019-07-11 00:09:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool operator ==(const SIMode Other) const {
|
2019-10-28 14:38:52 +08:00
|
|
|
return IEEE == Other.IEEE &&
|
|
|
|
DX10Clamp == Other.DX10Clamp &&
|
2019-12-03 15:01:21 +08:00
|
|
|
FP32InputDenormals == Other.FP32InputDenormals &&
|
|
|
|
FP32OutputDenormals == Other.FP32OutputDenormals &&
|
|
|
|
FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
|
|
|
|
FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
|
2019-07-11 00:09:26 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <> struct MappingTraits<SIMode> {
|
|
|
|
static void mapping(IO &YamlIO, SIMode &Mode) {
|
|
|
|
YamlIO.mapOptional("ieee", Mode.IEEE, true);
|
|
|
|
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
|
2019-12-03 15:01:21 +08:00
|
|
|
YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
|
|
|
|
YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
|
|
|
|
YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
|
|
|
|
YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
|
2019-07-11 00:09:26 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-03-15 06:54:43 +08:00
|
|
|
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
|
|
|
|
uint64_t ExplicitKernArgSize = 0;
|
|
|
|
unsigned MaxKernArgAlign = 0;
|
|
|
|
unsigned LDSSize = 0;
|
[amdgpu] Add codegen support for HIP dynamic shared memory.
Summary:
- HIP uses an unsized extern array `extern __shared__ T s[]` to declare
the dynamic shared memory, which size is not known at the
compile time.
Reviewers: arsenm, yaxunl, kpyzhov, b-sumner
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D82496
2020-06-25 00:13:10 +08:00
|
|
|
Align DynLDSAlign;
|
2019-03-15 06:54:43 +08:00
|
|
|
bool IsEntryFunction = false;
|
|
|
|
bool NoSignedZerosFPMath = false;
|
|
|
|
bool MemoryBound = false;
|
|
|
|
bool WaveLimiter = false;
|
2020-07-24 09:11:46 +08:00
|
|
|
bool HasSpilledSGPRs = false;
|
|
|
|
bool HasSpilledVGPRs = false;
|
2019-08-28 02:18:38 +08:00
|
|
|
uint32_t HighBitsOf32BitAddress = 0;
|
2019-03-15 06:54:43 +08:00
|
|
|
|
2021-01-16 05:07:37 +08:00
|
|
|
// TODO: 10 may be a better default since it's the maximum.
|
|
|
|
unsigned Occupancy = 0;
|
|
|
|
|
2019-03-15 06:54:43 +08:00
|
|
|
StringValue ScratchRSrcReg = "$private_rsrc_reg";
|
|
|
|
StringValue FrameOffsetReg = "$fp_reg";
|
|
|
|
StringValue StackPtrOffsetReg = "$sp_reg";
|
|
|
|
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
Optional<SIArgumentInfo> ArgInfo;
|
2019-07-11 00:09:26 +08:00
|
|
|
SIMode Mode;
|
2021-05-01 03:31:55 +08:00
|
|
|
Optional<FrameIndex> ScavengeFI;
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
|
2019-03-15 06:54:43 +08:00
|
|
|
SIMachineFunctionInfo() = default;
|
|
|
|
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
|
2021-05-01 03:31:55 +08:00
|
|
|
const TargetRegisterInfo &TRI,
|
|
|
|
const llvm::MachineFunction &MF);
|
2019-03-15 06:54:43 +08:00
|
|
|
|
|
|
|
void mappingImpl(yaml::IO &YamlIO) override;
|
|
|
|
~SIMachineFunctionInfo() = default;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <> struct MappingTraits<SIMachineFunctionInfo> {
|
|
|
|
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
|
|
|
|
YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
|
|
|
|
UINT64_C(0));
|
|
|
|
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
|
|
|
|
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
|
[amdgpu] Add codegen support for HIP dynamic shared memory.
Summary:
- HIP uses an unsized extern array `extern __shared__ T s[]` to declare
the dynamic shared memory, which size is not known at the
compile time.
Reviewers: arsenm, yaxunl, kpyzhov, b-sumner
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D82496
2020-06-25 00:13:10 +08:00
|
|
|
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
|
2019-03-15 06:54:43 +08:00
|
|
|
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
|
|
|
|
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
|
|
|
|
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
|
|
|
|
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
|
2020-07-24 09:11:46 +08:00
|
|
|
YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
|
|
|
|
YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
|
2019-03-15 06:54:43 +08:00
|
|
|
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
|
|
|
|
StringValue("$private_rsrc_reg"));
|
|
|
|
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
|
|
|
|
StringValue("$fp_reg"));
|
|
|
|
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
|
|
|
|
StringValue("$sp_reg"));
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
|
2019-07-11 00:09:26 +08:00
|
|
|
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
|
2019-08-28 02:18:38 +08:00
|
|
|
YamlIO.mapOptional("highBitsOf32BitAddress",
|
|
|
|
MFI.HighBitsOf32BitAddress, 0u);
|
2021-01-16 05:07:37 +08:00
|
|
|
YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
|
2021-05-01 03:31:55 +08:00
|
|
|
YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
|
2019-03-15 06:54:43 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace yaml
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
|
|
|
|
/// tells the hardware which interpolation parameters to load.
|
2016-03-11 16:00:27 +08:00
|
|
|
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
2019-03-15 06:54:43 +08:00
|
|
|
friend class GCNTargetMachine;
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register TIDReg = AMDGPU::NoRegister;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
|
|
// Registers that may be reserved for spilling purposes. These may be the same
|
|
|
|
// as the input registers.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
// This is the the unswizzled offset from the current dispatch's scratch wave
|
|
|
|
// base to the beginning of the current function's frame.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register FrameOffsetReg = AMDGPU::FP_REG;
|
2017-04-25 02:05:16 +08:00
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
// This is an ABI register used in the non-entry calling convention to
|
|
|
|
// communicate the unswizzled offset from the current dispatch's scratch wave
|
|
|
|
// base to the beginning of the new function's frame.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register StackPtrOffsetReg = AMDGPU::SP_REG;
|
2017-04-25 02:05:16 +08:00
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
AMDGPUFunctionArgInfo ArgInfo;
|
2017-07-18 06:35:50 +08:00
|
|
|
|
2016-01-13 19:45:36 +08:00
|
|
|
// Graphics info.
|
2017-08-08 08:47:13 +08:00
|
|
|
unsigned PSInputAddr = 0;
|
|
|
|
unsigned PSInputEnable = 0;
|
2017-04-12 06:29:24 +08:00
|
|
|
|
2017-08-12 04:42:08 +08:00
|
|
|
/// Number of bytes of arguments this function has on the stack. If the callee
|
|
|
|
/// is expected to restore the argument stack this should be a multiple of 16,
|
|
|
|
/// all usable during a tail call.
|
|
|
|
///
|
|
|
|
/// The alternative would forbid tail call optimisation in some cases: if we
|
|
|
|
/// want to transfer control from a function with 8-bytes of stack-argument
|
|
|
|
/// space to a function with 16-bytes then misalignment of this value would
|
|
|
|
/// make a stack adjustment necessary, which could not be undone by the
|
|
|
|
/// callee.
|
|
|
|
unsigned BytesInStackArgArea = 0;
|
|
|
|
|
2017-08-08 08:47:13 +08:00
|
|
|
bool ReturnsVoid = true;
|
2016-01-13 19:45:36 +08:00
|
|
|
|
2016-09-07 04:22:28 +08:00
|
|
|
// A pair of default/requested minimum/maximum flat work group sizes.
|
|
|
|
// Minimum - first, maximum - second.
|
2017-08-08 08:47:13 +08:00
|
|
|
std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
|
2016-09-07 04:22:28 +08:00
|
|
|
|
|
|
|
// A pair of default/requested minimum/maximum number of waves per execution
|
|
|
|
// unit. Minimum - first, maximum - second.
|
2017-08-08 08:47:13 +08:00
|
|
|
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
|
AMDGPU: allow specifying a workgroup size that needs to fit in a compute unit
Summary:
For GL_ARB_compute_shader we need to support workgroup sizes of at least 1024. However, if we want to allow large workgroup sizes, we may need to use less registers, as we have to run more waves per SIMD.
This patch adds an attribute to specify the maximum work group size the compiled program needs to support. It defaults, to 256, as that has no wave restrictions.
Reducing the number of registers available is done similarly to how the registers were reserved for chips with the sgpr init bug.
Reviewers: mareko, arsenm, tstellarAMD, nhaehnle
Subscribers: FireBurn, kerberizer, llvm-commits, arsenm
Differential Revision: http://reviews.llvm.org/D18340
Patch By: Bas Nieuwenhuizen
llvm-svn: 266337
2016-04-15 00:27:07 +08:00
|
|
|
|
2021-01-22 01:12:27 +08:00
|
|
|
std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
|
|
|
|
std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
|
2019-06-20 03:55:27 +08:00
|
|
|
std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
|
2017-12-30 01:18:14 +08:00
|
|
|
|
2017-04-19 04:59:40 +08:00
|
|
|
private:
|
2017-08-08 08:47:13 +08:00
|
|
|
unsigned LDSWaveSpillSize = 0;
|
|
|
|
unsigned NumUserSGPRs = 0;
|
|
|
|
unsigned NumSystemSGPRs = 0;
|
2015-11-26 04:55:12 +08:00
|
|
|
|
2017-08-08 08:47:13 +08:00
|
|
|
bool HasSpilledSGPRs = false;
|
|
|
|
bool HasSpilledVGPRs = false;
|
|
|
|
bool HasNonSpillStackObjects = false;
|
2018-03-30 05:30:06 +08:00
|
|
|
bool IsStackRealigned = false;
|
2014-09-24 09:33:17 +08:00
|
|
|
|
2017-08-08 08:47:13 +08:00
|
|
|
unsigned NumSpilledSGPRs = 0;
|
|
|
|
unsigned NumSpilledVGPRs = 0;
|
2016-07-14 01:35:15 +08:00
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
// Feature bits required for inputs passed in user SGPRs.
|
|
|
|
bool PrivateSegmentBuffer : 1;
|
2015-11-26 04:55:12 +08:00
|
|
|
bool DispatchPtr : 1;
|
|
|
|
bool QueuePtr : 1;
|
|
|
|
bool KernargSegmentPtr : 1;
|
2016-07-23 01:01:30 +08:00
|
|
|
bool DispatchID : 1;
|
2015-11-26 04:55:12 +08:00
|
|
|
bool FlatScratchInit : 1;
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
// Feature bits required for inputs passed in system SGPRs.
|
2015-11-26 04:55:12 +08:00
|
|
|
bool WorkGroupIDX : 1; // Always initialized.
|
|
|
|
bool WorkGroupIDY : 1;
|
|
|
|
bool WorkGroupIDZ : 1;
|
|
|
|
bool WorkGroupInfo : 1;
|
2015-12-01 05:16:03 +08:00
|
|
|
bool PrivateSegmentWaveByteOffset : 1;
|
2015-11-26 04:55:12 +08:00
|
|
|
|
|
|
|
bool WorkItemIDX : 1; // Always initialized.
|
|
|
|
bool WorkItemIDY : 1;
|
|
|
|
bool WorkItemIDZ : 1;
|
2013-11-28 05:23:35 +08:00
|
|
|
|
2017-01-25 09:25:13 +08:00
|
|
|
// Private memory buffer
|
|
|
|
// Compute directly in sgpr[0:1]
|
|
|
|
// Other shaders indirect 64-bits at sgpr[0:1]
|
2017-06-26 11:01:31 +08:00
|
|
|
bool ImplicitBufferPtr : 1;
|
2017-01-25 09:25:13 +08:00
|
|
|
|
2017-07-28 23:52:08 +08:00
|
|
|
// Pointer to where the ABI inserts special kernel arguments separate from the
|
|
|
|
// user arguments. This is an offset from the KernargSegmentPtr.
|
|
|
|
bool ImplicitArgPtr : 1;
|
|
|
|
|
2017-09-29 17:49:35 +08:00
|
|
|
// The hard-wired high half of the address of the global information table
|
|
|
|
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
|
|
|
|
// current hardware only allows a 16 bit value.
|
|
|
|
unsigned GITPtrHigh;
|
|
|
|
|
2018-02-10 00:57:57 +08:00
|
|
|
unsigned HighBitsOf32BitAddress;
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
unsigned GDSSize;
|
2018-02-10 00:57:57 +08:00
|
|
|
|
2018-05-31 13:36:04 +08:00
|
|
|
// Current recorded maximum possible occupancy.
|
|
|
|
unsigned Occupancy;
|
|
|
|
|
2021-10-14 06:47:07 +08:00
|
|
|
mutable Optional<bool> UsesAGPRs;
|
|
|
|
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
MCPhysReg getNextUserSGPR() const;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
MCPhysReg getNextSystemSGPR() const;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2015-11-26 04:55:12 +08:00
|
|
|
public:
|
2013-11-28 05:23:35 +08:00
|
|
|
struct SpilledReg {
|
2020-03-12 07:22:30 +08:00
|
|
|
Register VGPR;
|
2017-01-21 08:53:49 +08:00
|
|
|
int Lane = -1;
|
|
|
|
|
|
|
|
SpilledReg() = default;
|
2020-03-12 07:22:30 +08:00
|
|
|
SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
|
2017-01-21 08:53:49 +08:00
|
|
|
|
2013-11-28 05:23:35 +08:00
|
|
|
bool hasLane() { return Lane != -1;}
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
bool hasReg() { return VGPR != 0;}
|
2013-11-28 05:23:35 +08:00
|
|
|
};
|
|
|
|
|
2021-04-01 20:50:59 +08:00
|
|
|
struct SGPRSpillVGPR {
|
2017-08-02 09:52:45 +08:00
|
|
|
// VGPR used for SGPR spills
|
2020-03-12 07:22:30 +08:00
|
|
|
Register VGPR;
|
2017-08-02 09:52:45 +08:00
|
|
|
|
2021-04-01 20:50:59 +08:00
|
|
|
// If the VGPR is is used for SGPR spills in a non-entrypoint function, the
|
|
|
|
// stack slot used to save/restore it in the prolog/epilog.
|
2017-08-02 09:52:45 +08:00
|
|
|
Optional<int> FI;
|
|
|
|
|
2021-04-01 20:50:59 +08:00
|
|
|
SGPRSpillVGPR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
|
2017-08-02 09:52:45 +08:00
|
|
|
};
|
|
|
|
|
2019-07-12 05:54:13 +08:00
|
|
|
struct VGPRSpillToAGPR {
|
|
|
|
SmallVector<MCPhysReg, 32> Lanes;
|
|
|
|
bool FullyAllocated = false;
|
2021-12-19 05:01:50 +08:00
|
|
|
bool IsDead = false;
|
2019-07-12 05:54:13 +08:00
|
|
|
};
|
|
|
|
|
2021-04-23 22:09:31 +08:00
|
|
|
// Map WWM VGPR to a stack slot that is used to save/restore it in the
|
|
|
|
// prolog/epilog.
|
2021-04-26 23:43:00 +08:00
|
|
|
MapVector<Register, Optional<int>> WWMReservedRegs;
|
2019-04-01 23:19:52 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
private:
|
|
|
|
// Track VGPR + wave index for each subregister of the SGPR spilled to
|
|
|
|
// frameindex key.
|
|
|
|
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
|
|
|
|
unsigned NumVGPRSpillLanes = 0;
|
2021-04-01 20:50:59 +08:00
|
|
|
SmallVector<SGPRSpillVGPR, 2> SpillVGPRs;
|
2017-02-22 03:12:08 +08:00
|
|
|
|
2019-07-12 05:54:13 +08:00
|
|
|
DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
|
|
|
|
|
|
|
|
// AGPRs used for VGPR spills.
|
|
|
|
SmallVector<MCPhysReg, 32> SpillAGPR;
|
|
|
|
|
|
|
|
// VGPRs used for AGPR spills.
|
|
|
|
SmallVector<MCPhysReg, 32> SpillVGPR;
|
|
|
|
|
[AMDGPU] Save VGPR of whole wave when spilling
Spilling SGPRs to scratch uses a temporary VGPR. LLVM currently cannot
determine if a VGPR is used in other lanes or not, so we need to save
all lanes of the VGPR. We even need to save the VGPR if it is marked as
dead.
The generated code depends on two things:
- Can we scavenge an SGPR to save EXEC?
- And can we scavenge a VGPR?
If we can scavenge an SGPR, we
- save EXEC into the SGPR
- set the needed lane mask
- save the temporary VGPR
- write the spilled SGPR into VGPR lanes
- save the VGPR again to the target stack slot
- restore the VGPR
- restore EXEC
If we were not able to scavenge an SGPR, we do the same operations, but
everytime the temporary VGPR is written to memory, we
- write VGPR to memory
- flip exec (s_not exec, exec)
- write VGPR again (previously inactive lanes)
Surprisingly often, we are able to scavenge an SGPR, even though we are
at the brink of running out of SGPRs.
Scavenging a VGPR does not have a great effect (saves three instructions
if no SGPR was scavenged), but we need to know if the VGPR we use is
live before or not, otherwise the machine verifier complains.
Differential Revision: https://reviews.llvm.org/D96336
2021-04-12 16:25:54 +08:00
|
|
|
// Emergency stack slot. Sometimes, we create this before finalizing the stack
|
|
|
|
// frame, so save it here and add it to the RegScavenger later.
|
|
|
|
Optional<int> ScavengeFI;
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
public: // FIXME
|
|
|
|
/// If this is set, an SGPR used for save/restore of the register used for the
|
|
|
|
/// frame pointer.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SGPRForFPSaveRestoreCopy;
|
2019-07-09 03:03:38 +08:00
|
|
|
Optional<int> FramePointerSaveIndex;
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
/// If this is set, an SGPR used for save/restore of the register used for the
|
|
|
|
/// base pointer.
|
|
|
|
Register SGPRForBPSaveRestoreCopy;
|
|
|
|
Optional<int> BasePointerSaveIndex;
|
|
|
|
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
public:
|
2012-12-12 05:25:42 +08:00
|
|
|
SIMachineFunctionInfo(const MachineFunction &MF);
|
2017-01-21 08:53:49 +08:00
|
|
|
|
2021-05-01 03:31:55 +08:00
|
|
|
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
|
|
|
|
const MachineFunction &MF,
|
|
|
|
PerFunctionMIParsingState &PFS,
|
|
|
|
SMDiagnostic &Error, SMRange &SourceRange);
|
2019-03-15 06:54:43 +08:00
|
|
|
|
2021-04-23 22:09:31 +08:00
|
|
|
void reserveWWMRegister(Register Reg, Optional<int> FI) {
|
|
|
|
WWMReservedRegs.insert(std::make_pair(Reg, FI));
|
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
|
|
|
|
auto I = SGPRToVGPRSpills.find(FrameIndex);
|
|
|
|
return (I == SGPRToVGPRSpills.end()) ?
|
|
|
|
ArrayRef<SpilledReg>() : makeArrayRef(I->second);
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:50:59 +08:00
|
|
|
ArrayRef<SGPRSpillVGPR> getSGPRSpillVGPRs() const { return SpillVGPRs; }
|
2017-08-02 09:52:45 +08:00
|
|
|
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
|
|
|
|
SpillVGPRs[Index].VGPR = NewVGPR;
|
|
|
|
SpillVGPRs[Index].FI = newFI;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
|
|
|
|
|
2019-07-12 05:54:13 +08:00
|
|
|
ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
|
|
|
|
return SpillAGPR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
|
|
|
|
return SpillVGPR;
|
|
|
|
}
|
|
|
|
|
|
|
|
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
|
|
|
|
auto I = VGPRToAGPRSpills.find(FrameIndex);
|
|
|
|
return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
|
|
|
|
: I->second.Lanes[Lane];
|
|
|
|
}
|
|
|
|
|
2021-12-19 05:01:50 +08:00
|
|
|
void setVGPRToAGPRSpillDead(int FrameIndex) {
|
|
|
|
auto I = VGPRToAGPRSpills.find(FrameIndex);
|
|
|
|
if (I != VGPRToAGPRSpills.end())
|
|
|
|
I->second.IsDead = true;
|
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
|
|
|
|
unsigned NumLane) const;
|
2017-02-22 03:12:08 +08:00
|
|
|
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
|
2019-07-12 05:54:13 +08:00
|
|
|
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
|
|
|
|
void removeDeadFrameIndices(MachineFrameInfo &MFI);
|
2017-02-22 03:12:08 +08:00
|
|
|
|
[AMDGPU] Save VGPR of whole wave when spilling
Spilling SGPRs to scratch uses a temporary VGPR. LLVM currently cannot
determine if a VGPR is used in other lanes or not, so we need to save
all lanes of the VGPR. We even need to save the VGPR if it is marked as
dead.
The generated code depends on two things:
- Can we scavenge an SGPR to save EXEC?
- And can we scavenge a VGPR?
If we can scavenge an SGPR, we
- save EXEC into the SGPR
- set the needed lane mask
- save the temporary VGPR
- write the spilled SGPR into VGPR lanes
- save the VGPR again to the target stack slot
- restore the VGPR
- restore EXEC
If we were not able to scavenge an SGPR, we do the same operations, but
everytime the temporary VGPR is written to memory, we
- write VGPR to memory
- flip exec (s_not exec, exec)
- write VGPR again (previously inactive lanes)
Surprisingly often, we are able to scavenge an SGPR, even though we are
at the brink of running out of SGPRs.
Scavenging a VGPR does not have a great effect (saves three instructions
if no SGPR was scavenged), but we need to know if the VGPR we use is
live before or not, otherwise the machine verifier complains.
Differential Revision: https://reviews.llvm.org/D96336
2021-04-12 16:25:54 +08:00
|
|
|
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
|
2021-05-01 03:31:55 +08:00
|
|
|
Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
|
[AMDGPU] Save VGPR of whole wave when spilling
Spilling SGPRs to scratch uses a temporary VGPR. LLVM currently cannot
determine if a VGPR is used in other lanes or not, so we need to save
all lanes of the VGPR. We even need to save the VGPR if it is marked as
dead.
The generated code depends on two things:
- Can we scavenge an SGPR to save EXEC?
- And can we scavenge a VGPR?
If we can scavenge an SGPR, we
- save EXEC into the SGPR
- set the needed lane mask
- save the temporary VGPR
- write the spilled SGPR into VGPR lanes
- save the VGPR again to the target stack slot
- restore the VGPR
- restore EXEC
If we were not able to scavenge an SGPR, we do the same operations, but
everytime the temporary VGPR is written to memory, we
- write VGPR to memory
- flip exec (s_not exec, exec)
- write VGPR again (previously inactive lanes)
Surprisingly often, we are able to scavenge an SGPR, even though we are
at the brink of running out of SGPRs.
Scavenging a VGPR does not have a great effect (saves three instructions
if no SGPR was scavenged), but we need to know if the VGPR we use is
live before or not, otherwise the machine verifier complains.
Differential Revision: https://reviews.llvm.org/D96336
2021-04-12 16:25:54 +08:00
|
|
|
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
bool hasCalculatedTID() const { return TIDReg != 0; };
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getTIDReg() const { return TIDReg; };
|
|
|
|
void setTIDReg(Register Reg) { TIDReg = Reg; }
|
2015-11-05 13:27:10 +08:00
|
|
|
|
2017-08-12 04:42:08 +08:00
|
|
|
unsigned getBytesInStackArgArea() const {
|
|
|
|
return BytesInStackArgArea;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setBytesInStackArgArea(unsigned Bytes) {
|
|
|
|
BytesInStackArgArea = Bytes;
|
|
|
|
}
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
// Add user SGPRs.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
|
|
|
|
Register addDispatchPtr(const SIRegisterInfo &TRI);
|
|
|
|
Register addQueuePtr(const SIRegisterInfo &TRI);
|
|
|
|
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
|
|
|
|
Register addDispatchID(const SIRegisterInfo &TRI);
|
|
|
|
Register addFlatScratchInit(const SIRegisterInfo &TRI);
|
|
|
|
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
|
|
// Add system SGPRs.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register addWorkGroupIDX() {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
|
2015-12-01 05:16:03 +08:00
|
|
|
NumSystemSGPRs += 1;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupIDX.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register addWorkGroupIDY() {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
|
2015-12-01 05:16:03 +08:00
|
|
|
NumSystemSGPRs += 1;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupIDY.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register addWorkGroupIDZ() {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
|
2015-12-01 05:16:03 +08:00
|
|
|
NumSystemSGPRs += 1;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupIDZ.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register addWorkGroupInfo() {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
|
2015-12-01 05:16:03 +08:00
|
|
|
NumSystemSGPRs += 1;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupInfo.getRegister();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add special VGPR inputs
|
|
|
|
void setWorkItemIDX(ArgDescriptor Arg) {
|
|
|
|
ArgInfo.WorkItemIDX = Arg;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setWorkItemIDY(ArgDescriptor Arg) {
|
|
|
|
ArgInfo.WorkItemIDY = Arg;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setWorkItemIDZ(ArgDescriptor Arg) {
|
|
|
|
ArgInfo.WorkItemIDZ = Arg;
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register addPrivateSegmentWaveByteOffset() {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.PrivateSegmentWaveByteOffset
|
|
|
|
= ArgDescriptor::createRegister(getNextSystemSGPR());
|
2015-12-01 05:16:03 +08:00
|
|
|
NumSystemSGPRs += 1;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
void setPrivateSegmentWaveByteOffset(Register Reg) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
|
2016-04-15 00:27:03 +08:00
|
|
|
}
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
bool hasPrivateSegmentBuffer() const {
|
|
|
|
return PrivateSegmentBuffer;
|
|
|
|
}
|
|
|
|
|
2015-11-26 04:55:12 +08:00
|
|
|
bool hasDispatchPtr() const {
|
|
|
|
return DispatchPtr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasQueuePtr() const {
|
|
|
|
return QueuePtr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasKernargSegmentPtr() const {
|
|
|
|
return KernargSegmentPtr;
|
|
|
|
}
|
|
|
|
|
2016-07-23 01:01:30 +08:00
|
|
|
bool hasDispatchID() const {
|
|
|
|
return DispatchID;
|
|
|
|
}
|
|
|
|
|
2015-11-26 04:55:12 +08:00
|
|
|
bool hasFlatScratchInit() const {
|
|
|
|
return FlatScratchInit;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasWorkGroupIDX() const {
|
|
|
|
return WorkGroupIDX;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasWorkGroupIDY() const {
|
|
|
|
return WorkGroupIDY;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasWorkGroupIDZ() const {
|
|
|
|
return WorkGroupIDZ;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasWorkGroupInfo() const {
|
|
|
|
return WorkGroupInfo;
|
|
|
|
}
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
bool hasPrivateSegmentWaveByteOffset() const {
|
|
|
|
return PrivateSegmentWaveByteOffset;
|
|
|
|
}
|
|
|
|
|
2015-11-26 04:55:12 +08:00
|
|
|
bool hasWorkItemIDX() const {
|
|
|
|
return WorkItemIDX;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasWorkItemIDY() const {
|
|
|
|
return WorkItemIDY;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasWorkItemIDZ() const {
|
|
|
|
return WorkItemIDZ;
|
|
|
|
}
|
|
|
|
|
2017-07-28 23:52:08 +08:00
|
|
|
bool hasImplicitArgPtr() const {
|
|
|
|
return ImplicitArgPtr;
|
|
|
|
}
|
|
|
|
|
2017-06-26 11:01:31 +08:00
|
|
|
bool hasImplicitBufferPtr() const {
|
|
|
|
return ImplicitBufferPtr;
|
2017-01-25 09:25:13 +08:00
|
|
|
}
|
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
AMDGPUFunctionArgInfo &getArgInfo() {
|
|
|
|
return ArgInfo;
|
|
|
|
}
|
|
|
|
|
|
|
|
const AMDGPUFunctionArgInfo &getArgInfo() const {
|
|
|
|
return ArgInfo;
|
|
|
|
}
|
|
|
|
|
2020-07-06 01:17:02 +08:00
|
|
|
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
2017-08-04 07:00:29 +08:00
|
|
|
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
|
|
return ArgInfo.getPreloadedValue(Value);
|
|
|
|
}
|
|
|
|
|
2020-07-21 01:25:07 +08:00
|
|
|
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
2020-07-06 01:17:02 +08:00
|
|
|
auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
|
2020-07-21 01:25:07 +08:00
|
|
|
return Arg ? Arg->getRegister() : MCRegister();
|
2017-08-04 07:00:29 +08:00
|
|
|
}
|
|
|
|
|
2017-09-29 17:49:35 +08:00
|
|
|
unsigned getGITPtrHigh() const {
|
|
|
|
return GITPtrHigh;
|
|
|
|
}
|
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
Register getGITPtrLoReg(const MachineFunction &MF) const;
|
|
|
|
|
2019-08-28 02:18:38 +08:00
|
|
|
uint32_t get32BitAddressHighBits() const {
|
2018-02-10 00:57:57 +08:00
|
|
|
return HighBitsOf32BitAddress;
|
|
|
|
}
|
|
|
|
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
unsigned getGDSSize() const {
|
|
|
|
return GDSSize;
|
|
|
|
}
|
|
|
|
|
2015-12-01 05:16:03 +08:00
|
|
|
unsigned getNumUserSGPRs() const {
|
|
|
|
return NumUserSGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned getNumPreloadedSGPRs() const {
|
|
|
|
return NumUserSGPRs + NumSystemSGPRs;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Returns the physical register reserved for use as the resource
|
2015-11-26 04:55:12 +08:00
|
|
|
/// descriptor for scratch accesses.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getScratchRSrcReg() const {
|
2015-11-26 04:55:12 +08:00
|
|
|
return ScratchRSrcReg;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
void setScratchRSrcReg(Register Reg) {
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
assert(Reg != 0 && "Should never be unset");
|
2015-12-01 05:16:03 +08:00
|
|
|
ScratchRSrcReg = Reg;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getFrameOffsetReg() const {
|
2017-04-25 02:05:16 +08:00
|
|
|
return FrameOffsetReg;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
void setFrameOffsetReg(Register Reg) {
|
2019-06-06 06:20:47 +08:00
|
|
|
assert(Reg != 0 && "Should never be unset");
|
|
|
|
FrameOffsetReg = Reg;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
void setStackPtrOffsetReg(Register Reg) {
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
assert(Reg != 0 && "Should never be unset");
|
2017-04-25 02:05:16 +08:00
|
|
|
StackPtrOffsetReg = Reg;
|
|
|
|
}
|
|
|
|
|
2017-07-19 00:44:56 +08:00
|
|
|
// Note the unset value for this is AMDGPU::SP_REG rather than
|
|
|
|
// NoRegister. This is mostly a workaround for MIR tests where state that
|
|
|
|
// can't be directly computed from the function is not preserved in serialized
|
|
|
|
// MIR.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getStackPtrOffsetReg() const {
|
2017-04-25 02:05:16 +08:00
|
|
|
return StackPtrOffsetReg;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getQueuePtrUserSGPR() const {
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.QueuePtr.getRegister();
|
2016-04-26 03:27:24 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getImplicitBufferPtrUserSGPR() const {
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.ImplicitBufferPtr.getRegister();
|
2017-01-25 09:25:13 +08:00
|
|
|
}
|
|
|
|
|
2015-11-05 13:27:10 +08:00
|
|
|
bool hasSpilledSGPRs() const {
|
|
|
|
return HasSpilledSGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setHasSpilledSGPRs(bool Spill = true) {
|
|
|
|
HasSpilledSGPRs = Spill;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasSpilledVGPRs() const {
|
|
|
|
return HasSpilledVGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setHasSpilledVGPRs(bool Spill = true) {
|
|
|
|
HasSpilledVGPRs = Spill;
|
|
|
|
}
|
2014-09-24 09:33:17 +08:00
|
|
|
|
2016-02-12 14:31:30 +08:00
|
|
|
bool hasNonSpillStackObjects() const {
|
|
|
|
return HasNonSpillStackObjects;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setHasNonSpillStackObjects(bool StackObject = true) {
|
|
|
|
HasNonSpillStackObjects = StackObject;
|
|
|
|
}
|
|
|
|
|
2018-03-30 05:30:06 +08:00
|
|
|
bool isStackRealigned() const {
|
|
|
|
return IsStackRealigned;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setIsStackRealigned(bool Realigned = true) {
|
|
|
|
IsStackRealigned = Realigned;
|
|
|
|
}
|
|
|
|
|
2016-07-14 01:35:15 +08:00
|
|
|
unsigned getNumSpilledSGPRs() const {
|
|
|
|
return NumSpilledSGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned getNumSpilledVGPRs() const {
|
|
|
|
return NumSpilledVGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
void addToSpilledSGPRs(unsigned num) {
|
|
|
|
NumSpilledSGPRs += num;
|
|
|
|
}
|
|
|
|
|
|
|
|
void addToSpilledVGPRs(unsigned num) {
|
|
|
|
NumSpilledVGPRs += num;
|
|
|
|
}
|
|
|
|
|
2016-01-13 19:45:36 +08:00
|
|
|
unsigned getPSInputAddr() const {
|
|
|
|
return PSInputAddr;
|
|
|
|
}
|
|
|
|
|
2017-04-12 06:29:24 +08:00
|
|
|
unsigned getPSInputEnable() const {
|
|
|
|
return PSInputEnable;
|
|
|
|
}
|
|
|
|
|
2016-01-13 19:45:36 +08:00
|
|
|
bool isPSInputAllocated(unsigned Index) const {
|
|
|
|
return PSInputAddr & (1 << Index);
|
|
|
|
}
|
|
|
|
|
|
|
|
void markPSInputAllocated(unsigned Index) {
|
|
|
|
PSInputAddr |= 1 << Index;
|
|
|
|
}
|
|
|
|
|
2017-04-12 06:29:24 +08:00
|
|
|
void markPSInputEnabled(unsigned Index) {
|
|
|
|
PSInputEnable |= 1 << Index;
|
|
|
|
}
|
|
|
|
|
2016-01-14 01:23:09 +08:00
|
|
|
bool returnsVoid() const {
|
|
|
|
return ReturnsVoid;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setIfReturnsVoid(bool Value) {
|
|
|
|
ReturnsVoid = Value;
|
|
|
|
}
|
|
|
|
|
2016-09-07 04:22:28 +08:00
|
|
|
/// \returns A pair of default/requested minimum/maximum flat work group sizes
|
|
|
|
/// for this function.
|
|
|
|
std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
|
|
|
|
return FlatWorkGroupSizes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns Default/requested minimum flat work group size for this function.
|
|
|
|
unsigned getMinFlatWorkGroupSize() const {
|
|
|
|
return FlatWorkGroupSizes.first;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns Default/requested maximum flat work group size for this function.
|
|
|
|
unsigned getMaxFlatWorkGroupSize() const {
|
|
|
|
return FlatWorkGroupSizes.second;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns A pair of default/requested minimum/maximum number of waves per
|
|
|
|
/// execution unit.
|
|
|
|
std::pair<unsigned, unsigned> getWavesPerEU() const {
|
|
|
|
return WavesPerEU;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns Default/requested minimum number of waves per execution unit.
|
|
|
|
unsigned getMinWavesPerEU() const {
|
|
|
|
return WavesPerEU.first;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \returns Default/requested maximum number of waves per execution unit.
|
|
|
|
unsigned getMaxWavesPerEU() const {
|
|
|
|
return WavesPerEU.second;
|
2016-04-27 01:24:40 +08:00
|
|
|
}
|
|
|
|
|
2016-06-25 11:11:28 +08:00
|
|
|
/// \returns SGPR used for \p Dim's work group ID.
|
2020-03-12 07:22:30 +08:00
|
|
|
Register getWorkGroupIDSGPR(unsigned Dim) const {
|
2016-06-25 11:11:28 +08:00
|
|
|
switch (Dim) {
|
|
|
|
case 0:
|
|
|
|
assert(hasWorkGroupIDX());
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupIDX.getRegister();
|
2016-06-25 11:11:28 +08:00
|
|
|
case 1:
|
|
|
|
assert(hasWorkGroupIDY());
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupIDY.getRegister();
|
2016-06-25 11:11:28 +08:00
|
|
|
case 2:
|
|
|
|
assert(hasWorkGroupIDZ());
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.WorkGroupIDZ.getRegister();
|
2016-06-25 11:11:28 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("unexpected dimension");
|
|
|
|
}
|
|
|
|
|
2017-04-19 04:59:40 +08:00
|
|
|
unsigned getLDSWaveSpillSize() const {
|
|
|
|
return LDSWaveSpillSize;
|
|
|
|
}
|
|
|
|
|
2021-01-22 01:12:27 +08:00
|
|
|
const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
|
|
|
|
if (!BufferPSV)
|
|
|
|
BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
|
|
|
|
|
|
|
|
return BufferPSV.get();
|
2016-12-21 01:19:44 +08:00
|
|
|
}
|
|
|
|
|
2021-01-22 01:12:27 +08:00
|
|
|
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
|
|
|
|
if (!ImagePSV)
|
|
|
|
ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
|
|
|
|
|
|
|
|
return ImagePSV.get();
|
2016-12-20 23:52:17 +08:00
|
|
|
}
|
2018-05-31 13:36:04 +08:00
|
|
|
|
2019-06-20 03:55:27 +08:00
|
|
|
const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
|
|
|
|
if (!GWSResourcePSV) {
|
|
|
|
GWSResourcePSV =
|
2019-08-15 23:54:37 +08:00
|
|
|
std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
|
2019-06-20 03:55:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return GWSResourcePSV.get();
|
|
|
|
}
|
|
|
|
|
2018-05-31 13:36:04 +08:00
|
|
|
unsigned getOccupancy() const {
|
|
|
|
return Occupancy;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned getMinAllowedOccupancy() const {
|
|
|
|
if (!isMemoryBound() && !needsWaveLimiter())
|
|
|
|
return Occupancy;
|
|
|
|
return (Occupancy < 4) ? Occupancy : 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
void limitOccupancy(const MachineFunction &MF);
|
|
|
|
|
|
|
|
void limitOccupancy(unsigned Limit) {
|
|
|
|
if (Occupancy > Limit)
|
|
|
|
Occupancy = Limit;
|
|
|
|
}
|
|
|
|
|
|
|
|
void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
|
|
|
|
if (Occupancy < Limit)
|
|
|
|
Occupancy = Limit;
|
|
|
|
limitOccupancy(MF);
|
|
|
|
}
|
2021-10-14 06:47:07 +08:00
|
|
|
|
|
|
|
// \returns true if a function needs or may need AGPRs.
|
|
|
|
bool usesAGPRs(const MachineFunction &MF) const;
|
2012-12-12 05:25:42 +08:00
|
|
|
};
|
|
|
|
|
2017-01-21 08:53:49 +08:00
|
|
|
} // end namespace llvm
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-01-21 08:53:49 +08:00
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|