forked from OSchip/llvm-project
AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination
into TargetParser. Also switch away from target features to CPU string when determining isa version. This fixes an issue when we output wrong isa version in the object code when features of a particular CPU are altered (i.e. gfx902 w/o xnack used to result in gfx900). Differential Revision: https://reviews.llvm.org/D51890 llvm-svn: 341982
This commit is contained in:
parent
342c3bcf11
commit
941615e4c8
|
@ -320,6 +320,13 @@ enum GPUKind : uint32_t {
|
||||||
GK_AMDGCN_LAST = GK_GFX906,
|
GK_AMDGCN_LAST = GK_GFX906,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Instruction set architecture version.
|
||||||
|
struct IsaVersion {
|
||||||
|
unsigned Major;
|
||||||
|
unsigned Minor;
|
||||||
|
unsigned Stepping;
|
||||||
|
};
|
||||||
|
|
||||||
// This isn't comprehensive for now, just things that are needed from the
|
// This isn't comprehensive for now, just things that are needed from the
|
||||||
// frontend driver.
|
// frontend driver.
|
||||||
enum ArchFeatureKind : uint32_t {
|
enum ArchFeatureKind : uint32_t {
|
||||||
|
@ -335,18 +342,22 @@ enum ArchFeatureKind : uint32_t {
|
||||||
FEATURE_FAST_DENORMAL_F32 = 1 << 5
|
FEATURE_FAST_DENORMAL_F32 = 1 << 5
|
||||||
};
|
};
|
||||||
|
|
||||||
GPUKind parseArchAMDGCN(StringRef CPU);
|
|
||||||
GPUKind parseArchR600(StringRef CPU);
|
|
||||||
StringRef getArchNameAMDGCN(GPUKind AK);
|
StringRef getArchNameAMDGCN(GPUKind AK);
|
||||||
StringRef getArchNameR600(GPUKind AK);
|
StringRef getArchNameR600(GPUKind AK);
|
||||||
StringRef getCanonicalArchName(StringRef Arch);
|
StringRef getCanonicalArchName(StringRef Arch);
|
||||||
|
GPUKind parseArchAMDGCN(StringRef CPU);
|
||||||
|
GPUKind parseArchR600(StringRef CPU);
|
||||||
unsigned getArchAttrAMDGCN(GPUKind AK);
|
unsigned getArchAttrAMDGCN(GPUKind AK);
|
||||||
unsigned getArchAttrR600(GPUKind AK);
|
unsigned getArchAttrR600(GPUKind AK);
|
||||||
|
|
||||||
void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
|
void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
|
||||||
void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
|
void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
|
||||||
|
|
||||||
}
|
StringRef getArchNameFromElfMach(unsigned ElfMach);
|
||||||
|
unsigned getElfMach(StringRef GPU);
|
||||||
|
IsaVersion getIsaVersion(StringRef GPU);
|
||||||
|
|
||||||
|
} // namespace AMDGPU
|
||||||
|
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
||||||
|
|
|
@ -17,11 +17,13 @@
|
||||||
#include "llvm/ADT/ArrayRef.h"
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
#include "llvm/ADT/StringSwitch.h"
|
#include "llvm/ADT/StringSwitch.h"
|
||||||
#include "llvm/ADT/Twine.h"
|
#include "llvm/ADT/Twine.h"
|
||||||
|
#include "llvm/BinaryFormat/ELF.h"
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
using namespace ARM;
|
using namespace ARM;
|
||||||
using namespace AArch64;
|
using namespace AArch64;
|
||||||
|
using namespace AMDGPU;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
@ -947,6 +949,8 @@ bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) {
|
||||||
TT.isOSWindows();
|
TT.isOSWindows();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct GPUInfo {
|
struct GPUInfo {
|
||||||
StringLiteral Name;
|
StringLiteral Name;
|
||||||
StringLiteral CanonicalName;
|
StringLiteral CanonicalName;
|
||||||
|
@ -954,11 +958,9 @@ struct GPUInfo {
|
||||||
unsigned Features;
|
unsigned Features;
|
||||||
};
|
};
|
||||||
|
|
||||||
using namespace AMDGPU;
|
constexpr GPUInfo R600GPUs[26] = {
|
||||||
static constexpr GPUInfo R600GPUs[26] = {
|
// Name Canonical Kind Features
|
||||||
// Name Canonical Kind Features
|
// Name
|
||||||
// Name
|
|
||||||
//
|
|
||||||
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
|
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
|
||||||
{{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
|
{{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
|
||||||
{{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
|
{{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
|
||||||
|
@ -989,9 +991,9 @@ static constexpr GPUInfo R600GPUs[26] = {
|
||||||
|
|
||||||
// This table should be sorted by the value of GPUKind
|
// This table should be sorted by the value of GPUKind
|
||||||
// Don't bother listing the implicitly true features
|
// Don't bother listing the implicitly true features
|
||||||
static constexpr GPUInfo AMDGCNGPUs[32] = {
|
constexpr GPUInfo AMDGCNGPUs[32] = {
|
||||||
// Name Canonical Kind Features
|
// Name Canonical Kind Features
|
||||||
// Name
|
// Name
|
||||||
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
||||||
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
||||||
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
|
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
|
||||||
|
@ -1026,8 +1028,7 @@ static constexpr GPUInfo AMDGCNGPUs[32] = {
|
||||||
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
|
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
|
||||||
ArrayRef<GPUInfo> Table) {
|
|
||||||
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
|
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
|
||||||
|
|
||||||
auto I = std::lower_bound(Table.begin(), Table.end(), Search,
|
auto I = std::lower_bound(Table.begin(), Table.end(), Search,
|
||||||
|
@ -1040,6 +1041,8 @@ static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
|
||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
|
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
|
||||||
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
|
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
|
||||||
return Entry->CanonicalName;
|
return Entry->CanonicalName;
|
||||||
|
@ -1092,3 +1095,118 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
|
||||||
for (const auto C : R600GPUs)
|
for (const auto C : R600GPUs)
|
||||||
Values.push_back(C.Name);
|
Values.push_back(C.Name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
|
||||||
|
AMDGPU::GPUKind AK;
|
||||||
|
|
||||||
|
switch (ElfMach) {
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
|
||||||
|
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef GPUName = getArchNameAMDGCN(AK);
|
||||||
|
if (GPUName != "")
|
||||||
|
return GPUName;
|
||||||
|
return getArchNameR600(AK);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned AMDGPU::getElfMach(StringRef GPU) {
|
||||||
|
AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
|
||||||
|
if (AK == AMDGPU::GPUKind::GK_NONE)
|
||||||
|
AK = parseArchR600(GPU);
|
||||||
|
|
||||||
|
switch (AK) {
|
||||||
|
case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
|
||||||
|
case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
|
||||||
|
case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
|
||||||
|
case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
|
||||||
|
case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
|
||||||
|
case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
|
||||||
|
case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
|
||||||
|
case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
|
||||||
|
case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
|
||||||
|
case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
|
||||||
|
case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
|
||||||
|
case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
|
||||||
|
case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
|
||||||
|
case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
|
||||||
|
case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
|
||||||
|
case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
|
||||||
|
case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
|
||||||
|
case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
|
||||||
|
case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
|
||||||
|
case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
|
||||||
|
case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
|
||||||
|
case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
|
||||||
|
case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
|
||||||
|
case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
|
||||||
|
case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
|
||||||
|
case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
|
||||||
|
case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
|
||||||
|
case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
|
||||||
|
case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
|
||||||
|
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
|
||||||
|
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
|
||||||
|
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm_unreachable("unknown GPU");
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
||||||
|
if (GPU == "generic")
|
||||||
|
return {7, 0, 0};
|
||||||
|
|
||||||
|
AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
|
||||||
|
if (AK == AMDGPU::GPUKind::GK_NONE)
|
||||||
|
return {0, 0, 0};
|
||||||
|
|
||||||
|
switch (AK) {
|
||||||
|
case GK_GFX600: return {6, 0, 0};
|
||||||
|
case GK_GFX601: return {6, 0, 1};
|
||||||
|
case GK_GFX700: return {7, 0, 0};
|
||||||
|
case GK_GFX701: return {7, 0, 1};
|
||||||
|
case GK_GFX702: return {7, 0, 2};
|
||||||
|
case GK_GFX703: return {7, 0, 3};
|
||||||
|
case GK_GFX704: return {7, 0, 4};
|
||||||
|
case GK_GFX801: return {8, 0, 1};
|
||||||
|
case GK_GFX802: return {8, 0, 2};
|
||||||
|
case GK_GFX803: return {8, 0, 3};
|
||||||
|
case GK_GFX810: return {8, 1, 0};
|
||||||
|
case GK_GFX900: return {9, 0, 0};
|
||||||
|
case GK_GFX902: return {9, 0, 2};
|
||||||
|
case GK_GFX904: return {9, 0, 4};
|
||||||
|
case GK_GFX906: return {9, 0, 6};
|
||||||
|
default: return {0, 0, 0};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
#include "llvm/MC/MCStreamer.h"
|
#include "llvm/MC/MCStreamer.h"
|
||||||
#include "llvm/Support/AMDGPUMetadata.h"
|
#include "llvm/Support/AMDGPUMetadata.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
|
#include "llvm/Support/TargetParser.h"
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||||
|
|
||||||
|
@ -134,9 +135,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
||||||
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
|
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
|
||||||
|
|
||||||
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
|
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
|
||||||
IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
|
IsaVersion Version = getIsaVersion(getSTI()->getCPU());
|
||||||
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
|
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
|
||||||
ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
|
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
||||||
|
@ -240,7 +241,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
|
||||||
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
|
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
|
||||||
CurrentProgramInfo.NumVGPRsForWavesPerEU,
|
CurrentProgramInfo.NumVGPRsForWavesPerEU,
|
||||||
CurrentProgramInfo.NumSGPRsForWavesPerEU -
|
CurrentProgramInfo.NumSGPRsForWavesPerEU -
|
||||||
IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
|
IsaInfo::getNumExtraSGPRs(getSTI(),
|
||||||
CurrentProgramInfo.VCCUsed,
|
CurrentProgramInfo.VCCUsed,
|
||||||
CurrentProgramInfo.FlatUsed),
|
CurrentProgramInfo.FlatUsed),
|
||||||
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
|
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
|
||||||
|
@ -561,7 +562,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
|
||||||
|
|
||||||
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
|
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
|
||||||
const GCNSubtarget &ST) const {
|
const GCNSubtarget &ST) const {
|
||||||
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
|
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
|
||||||
UsesVCC, UsesFlatScratch);
|
UsesVCC, UsesFlatScratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -758,7 +759,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
||||||
|
|
||||||
// 48 SGPRs - vcc, - flat_scr, -xnack
|
// 48 SGPRs - vcc, - flat_scr, -xnack
|
||||||
int MaxSGPRGuess =
|
int MaxSGPRGuess =
|
||||||
47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
|
47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
|
||||||
ST.hasFlatAddressSpace());
|
ST.hasFlatAddressSpace());
|
||||||
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
|
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
|
||||||
MaxVGPR = std::max(MaxVGPR, 23);
|
MaxVGPR = std::max(MaxVGPR, 23);
|
||||||
|
@ -823,7 +824,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||||
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
|
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
|
||||||
// unified.
|
// unified.
|
||||||
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
||||||
STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
|
getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
|
||||||
|
|
||||||
// Check the addressable register limit before we add ExtraSGPRs.
|
// Check the addressable register limit before we add ExtraSGPRs.
|
||||||
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||||
|
@ -905,9 +906,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
|
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
|
||||||
STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
|
getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
|
||||||
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
|
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
|
||||||
STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
|
getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
|
||||||
|
|
||||||
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
||||||
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
||||||
|
@ -1137,7 +1138,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||||
|
|
||||||
AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
|
AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
|
||||||
|
|
||||||
Out.compute_pgm_resource_registers =
|
Out.compute_pgm_resource_registers =
|
||||||
CurrentProgramInfo.ComputePGMRSrc1 |
|
CurrentProgramInfo.ComputePGMRSrc1 |
|
||||||
|
|
|
@ -124,10 +124,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
|
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
|
||||||
const FeatureBitset &FeatureBits) :
|
|
||||||
TargetTriple(TT),
|
TargetTriple(TT),
|
||||||
SubtargetFeatureBits(FeatureBits),
|
|
||||||
Has16BitInsts(false),
|
Has16BitInsts(false),
|
||||||
HasMadMixInsts(false),
|
HasMadMixInsts(false),
|
||||||
FP32Denormals(false),
|
FP32Denormals(false),
|
||||||
|
@ -144,9 +142,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
const GCNTargetMachine &TM) :
|
const GCNTargetMachine &TM) :
|
||||||
AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
||||||
AMDGPUSubtarget(TT, getFeatureBits()),
|
AMDGPUSubtarget(TT),
|
||||||
TargetTriple(TT),
|
TargetTriple(TT),
|
||||||
Gen(SOUTHERN_ISLANDS),
|
Gen(SOUTHERN_ISLANDS),
|
||||||
IsaVersion(ISAVersion0_0_0),
|
IsaVersion(ISAVersion0_0_0),
|
||||||
|
@ -448,7 +446,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
|
||||||
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
const TargetMachine &TM) :
|
const TargetMachine &TM) :
|
||||||
R600GenSubtargetInfo(TT, GPU, FS),
|
R600GenSubtargetInfo(TT, GPU, FS),
|
||||||
AMDGPUSubtarget(TT, getFeatureBits()),
|
AMDGPUSubtarget(TT),
|
||||||
InstrInfo(*this),
|
InstrInfo(*this),
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||||
FMA(false),
|
FMA(false),
|
||||||
|
|
|
@ -63,7 +63,6 @@ private:
|
||||||
Triple TargetTriple;
|
Triple TargetTriple;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const FeatureBitset &SubtargetFeatureBits;
|
|
||||||
bool Has16BitInsts;
|
bool Has16BitInsts;
|
||||||
bool HasMadMixInsts;
|
bool HasMadMixInsts;
|
||||||
bool FP32Denormals;
|
bool FP32Denormals;
|
||||||
|
@ -79,7 +78,7 @@ protected:
|
||||||
unsigned WavefrontSize;
|
unsigned WavefrontSize;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
|
AMDGPUSubtarget(const Triple &TT);
|
||||||
|
|
||||||
static const AMDGPUSubtarget &get(const MachineFunction &MF);
|
static const AMDGPUSubtarget &get(const MachineFunction &MF);
|
||||||
static const AMDGPUSubtarget &get(const TargetMachine &TM,
|
static const AMDGPUSubtarget &get(const TargetMachine &TM,
|
||||||
|
@ -203,33 +202,21 @@ public:
|
||||||
|
|
||||||
/// \returns Maximum number of work groups per compute unit supported by the
|
/// \returns Maximum number of work groups per compute unit supported by the
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
|
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
|
||||||
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
|
|
||||||
FlatWorkGroupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Minimum flat work group size supported by the subtarget.
|
/// \returns Minimum flat work group size supported by the subtarget.
|
||||||
unsigned getMinFlatWorkGroupSize() const {
|
virtual unsigned getMinFlatWorkGroupSize() const = 0;
|
||||||
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum flat work group size supported by the subtarget.
|
/// \returns Maximum flat work group size supported by the subtarget.
|
||||||
unsigned getMaxFlatWorkGroupSize() const {
|
virtual unsigned getMaxFlatWorkGroupSize() const = 0;
|
||||||
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit supported by the
|
/// \returns Maximum number of waves per execution unit supported by the
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
|
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
|
|
||||||
FlatWorkGroupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Minimum number of waves per execution unit supported by the
|
/// \returns Minimum number of waves per execution unit supported by the
|
||||||
/// subtarget.
|
/// subtarget.
|
||||||
unsigned getMinWavesPerEU() const {
|
virtual unsigned getMinWavesPerEU() const = 0;
|
||||||
return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned getMaxWavesPerEU() const { return 10; }
|
unsigned getMaxWavesPerEU() const { return 10; }
|
||||||
|
|
||||||
|
@ -708,20 +695,19 @@ public:
|
||||||
/// \returns Number of execution units per compute unit supported by the
|
/// \returns Number of execution units per compute unit supported by the
|
||||||
/// subtarget.
|
/// subtarget.
|
||||||
unsigned getEUsPerCU() const {
|
unsigned getEUsPerCU() const {
|
||||||
return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
|
return AMDGPU::IsaInfo::getEUsPerCU(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit supported by the
|
/// \returns Maximum number of waves per compute unit supported by the
|
||||||
/// subtarget without any kind of limitation.
|
/// subtarget without any kind of limitation.
|
||||||
unsigned getMaxWavesPerCU() const {
|
unsigned getMaxWavesPerCU() const {
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
|
return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit supported by the
|
/// \returns Maximum number of waves per compute unit supported by the
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
|
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
|
return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
|
||||||
FlatWorkGroupSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit supported by the
|
/// \returns Maximum number of waves per execution unit supported by the
|
||||||
|
@ -733,8 +719,7 @@ public:
|
||||||
/// \returns Number of waves per work group supported by the subtarget and
|
/// \returns Number of waves per work group supported by the subtarget and
|
||||||
/// limited by given \p FlatWorkGroupSize.
|
/// limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
|
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
|
||||||
return AMDGPU::IsaInfo::getWavesPerWorkGroup(
|
return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
|
||||||
MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// static wrappers
|
// static wrappers
|
||||||
|
@ -853,39 +838,34 @@ public:
|
||||||
|
|
||||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||||
unsigned getSGPRAllocGranule() const {
|
unsigned getSGPRAllocGranule() const {
|
||||||
return AMDGPU::IsaInfo::getSGPRAllocGranule(
|
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
|
||||||
MCSubtargetInfo::getFeatureBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns SGPR encoding granularity supported by the subtarget.
|
/// \returns SGPR encoding granularity supported by the subtarget.
|
||||||
unsigned getSGPREncodingGranule() const {
|
unsigned getSGPREncodingGranule() const {
|
||||||
return AMDGPU::IsaInfo::getSGPREncodingGranule(
|
return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
|
||||||
MCSubtargetInfo::getFeatureBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Total number of SGPRs supported by the subtarget.
|
/// \returns Total number of SGPRs supported by the subtarget.
|
||||||
unsigned getTotalNumSGPRs() const {
|
unsigned getTotalNumSGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
|
return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Addressable number of SGPRs supported by the subtarget.
|
/// \returns Addressable number of SGPRs supported by the subtarget.
|
||||||
unsigned getAddressableNumSGPRs() const {
|
unsigned getAddressableNumSGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getAddressableNumSGPRs(
|
return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
|
||||||
MCSubtargetInfo::getFeatureBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
|
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
|
||||||
return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
|
return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
|
||||||
WavesPerEU);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
|
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
|
||||||
return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
|
return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
|
||||||
WavesPerEU, Addressable);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Reserved number of SGPRs for given function \p MF.
|
/// \returns Reserved number of SGPRs for given function \p MF.
|
||||||
|
@ -903,39 +883,34 @@ public:
|
||||||
|
|
||||||
/// \returns VGPR allocation granularity supported by the subtarget.
|
/// \returns VGPR allocation granularity supported by the subtarget.
|
||||||
unsigned getVGPRAllocGranule() const {
|
unsigned getVGPRAllocGranule() const {
|
||||||
return AMDGPU::IsaInfo::getVGPRAllocGranule(
|
return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
|
||||||
MCSubtargetInfo::getFeatureBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns VGPR encoding granularity supported by the subtarget.
|
/// \returns VGPR encoding granularity supported by the subtarget.
|
||||||
unsigned getVGPREncodingGranule() const {
|
unsigned getVGPREncodingGranule() const {
|
||||||
return AMDGPU::IsaInfo::getVGPREncodingGranule(
|
return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
|
||||||
MCSubtargetInfo::getFeatureBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Total number of VGPRs supported by the subtarget.
|
/// \returns Total number of VGPRs supported by the subtarget.
|
||||||
unsigned getTotalNumVGPRs() const {
|
unsigned getTotalNumVGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
|
return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Addressable number of VGPRs supported by the subtarget.
|
/// \returns Addressable number of VGPRs supported by the subtarget.
|
||||||
unsigned getAddressableNumVGPRs() const {
|
unsigned getAddressableNumVGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getAddressableNumVGPRs(
|
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
|
||||||
MCSubtargetInfo::getFeatureBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Minimum number of VGPRs that meets given number of waves per
|
/// \returns Minimum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
|
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
|
||||||
return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
|
return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
|
||||||
WavesPerEU);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
|
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
|
||||||
return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
|
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
|
||||||
WavesPerEU);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||||
|
@ -951,6 +926,34 @@ public:
|
||||||
void getPostRAMutations(
|
void getPostRAMutations(
|
||||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
|
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
|
||||||
const override;
|
const override;
|
||||||
|
|
||||||
|
/// \returns Maximum number of work groups per compute unit supported by the
|
||||||
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
|
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
|
||||||
|
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Minimum flat work group size supported by the subtarget.
|
||||||
|
unsigned getMinFlatWorkGroupSize() const override {
|
||||||
|
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Maximum flat work group size supported by the subtarget.
|
||||||
|
unsigned getMaxFlatWorkGroupSize() const override {
|
||||||
|
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Maximum number of waves per execution unit supported by the
|
||||||
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
|
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
|
||||||
|
return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Minimum number of waves per execution unit supported by the
|
||||||
|
/// subtarget.
|
||||||
|
unsigned getMinWavesPerEU() const override {
|
||||||
|
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class R600Subtarget final : public R600GenSubtargetInfo,
|
class R600Subtarget final : public R600GenSubtargetInfo,
|
||||||
|
@ -1061,6 +1064,34 @@ public:
|
||||||
bool enableSubRegLiveness() const override {
|
bool enableSubRegLiveness() const override {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \returns Maximum number of work groups per compute unit supported by the
|
||||||
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
|
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
|
||||||
|
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Minimum flat work group size supported by the subtarget.
|
||||||
|
unsigned getMinFlatWorkGroupSize() const override {
|
||||||
|
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Maximum flat work group size supported by the subtarget.
|
||||||
|
unsigned getMaxFlatWorkGroupSize() const override {
|
||||||
|
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Maximum number of waves per execution unit supported by the
|
||||||
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
|
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
|
||||||
|
return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \returns Minimum number of waves per execution unit supported by the
|
||||||
|
/// subtarget.
|
||||||
|
unsigned getMinWavesPerEU() const override {
|
||||||
|
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
|
@ -49,6 +49,7 @@
|
||||||
#include "llvm/Support/MachineValueType.h"
|
#include "llvm/Support/MachineValueType.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
#include "llvm/Support/SMLoc.h"
|
#include "llvm/Support/SMLoc.h"
|
||||||
|
#include "llvm/Support/TargetParser.h"
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -917,8 +918,7 @@ public:
|
||||||
// Currently there is none suitable machinery in the core llvm-mc for this.
|
// Currently there is none suitable machinery in the core llvm-mc for this.
|
||||||
// MCSymbol::isRedefinable is intended for another purpose, and
|
// MCSymbol::isRedefinable is intended for another purpose, and
|
||||||
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
|
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
|
||||||
AMDGPU::IsaInfo::IsaVersion ISA =
|
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
||||||
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
|
||||||
MCContext &Ctx = getContext();
|
MCContext &Ctx = getContext();
|
||||||
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
|
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
|
||||||
MCSymbol *Sym =
|
MCSymbol *Sym =
|
||||||
|
@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
|
||||||
unsigned DwordRegIndex,
|
unsigned DwordRegIndex,
|
||||||
unsigned RegWidth) {
|
unsigned RegWidth) {
|
||||||
// Symbols are only defined for GCN targets
|
// Symbols are only defined for GCN targets
|
||||||
if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
|
if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
auto SymbolName = getGprCountSymbolName(RegKind);
|
auto SymbolName = getGprCountSymbolName(RegKind);
|
||||||
|
@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
|
||||||
unsigned &SGPRBlocks) {
|
unsigned &SGPRBlocks) {
|
||||||
// TODO(scott.linder): These calculations are duplicated from
|
// TODO(scott.linder): These calculations are duplicated from
|
||||||
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
|
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
|
||||||
IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(getSTI().getCPU());
|
||||||
|
|
||||||
unsigned NumVGPRs = NextFreeVGPR;
|
unsigned NumVGPRs = NextFreeVGPR;
|
||||||
unsigned NumSGPRs = NextFreeSGPR;
|
unsigned NumSGPRs = NextFreeSGPR;
|
||||||
unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
|
unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
|
||||||
|
|
||||||
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
|
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
|
||||||
NumSGPRs > MaxAddressableNumSGPRs)
|
NumSGPRs > MaxAddressableNumSGPRs)
|
||||||
return OutOfRangeError(SGPRRange);
|
return OutOfRangeError(SGPRRange);
|
||||||
|
|
||||||
NumSGPRs +=
|
NumSGPRs +=
|
||||||
IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
|
IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
|
||||||
|
|
||||||
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
|
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
|
||||||
NumSGPRs > MaxAddressableNumSGPRs)
|
NumSGPRs > MaxAddressableNumSGPRs)
|
||||||
|
@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
|
||||||
if (Features.test(FeatureSGPRInitBug))
|
if (Features.test(FeatureSGPRInitBug))
|
||||||
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
||||||
|
|
||||||
VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
|
VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
|
||||||
SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
|
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -2678,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||||
|
|
||||||
StringSet<> Seen;
|
StringSet<> Seen;
|
||||||
|
|
||||||
IsaInfo::IsaVersion IVersion =
|
IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
|
||||||
IsaInfo::getIsaVersion(getSTI().getFeatureBits());
|
|
||||||
|
|
||||||
SMRange VGPRRange;
|
SMRange VGPRRange;
|
||||||
uint64_t NextFreeVGPR = 0;
|
uint64_t NextFreeVGPR = 0;
|
||||||
|
@ -2938,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
|
||||||
// If this directive has no arguments, then use the ISA version for the
|
// If this directive has no arguments, then use the ISA version for the
|
||||||
// targeted GPU.
|
// targeted GPU.
|
||||||
if (getLexer().is(AsmToken::EndOfStatement)) {
|
if (getLexer().is(AsmToken::EndOfStatement)) {
|
||||||
AMDGPU::IsaInfo::IsaVersion ISA =
|
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
||||||
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
|
||||||
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
|
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
|
||||||
ISA.Stepping,
|
ISA.Stepping,
|
||||||
"AMD", "AMDGPU");
|
"AMD", "AMDGPU");
|
||||||
|
@ -3001,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
|
||||||
|
|
||||||
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
|
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
|
||||||
amd_kernel_code_t Header;
|
amd_kernel_code_t Header;
|
||||||
AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
|
AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
// Lex EndOfStatement. This is in a while loop, because lexing a comment
|
// Lex EndOfStatement. This is in a while loop, because lexing a comment
|
||||||
|
@ -3679,12 +3677,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
encodeCnt(
|
encodeCnt(
|
||||||
const AMDGPU::IsaInfo::IsaVersion ISA,
|
const AMDGPU::IsaVersion ISA,
|
||||||
int64_t &IntVal,
|
int64_t &IntVal,
|
||||||
int64_t CntVal,
|
int64_t CntVal,
|
||||||
bool Saturate,
|
bool Saturate,
|
||||||
unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
|
unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
|
||||||
unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
|
unsigned (*decode)(const IsaVersion &Version, unsigned))
|
||||||
{
|
{
|
||||||
bool Failed = false;
|
bool Failed = false;
|
||||||
|
|
||||||
|
@ -3715,8 +3713,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
|
||||||
if (getParser().parseAbsoluteExpression(CntVal))
|
if (getParser().parseAbsoluteExpression(CntVal))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
AMDGPU::IsaInfo::IsaVersion ISA =
|
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
||||||
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
|
||||||
|
|
||||||
bool Failed = true;
|
bool Failed = true;
|
||||||
bool Sat = CntName.endswith("_sat");
|
bool Sat = CntName.endswith("_sat");
|
||||||
|
@ -3751,8 +3748,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
|
||||||
|
|
||||||
OperandMatchResultTy
|
OperandMatchResultTy
|
||||||
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
|
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
|
||||||
AMDGPU::IsaInfo::IsaVersion ISA =
|
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
||||||
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
|
||||||
int64_t Waitcnt = getWaitcntBitMask(ISA);
|
int64_t Waitcnt = getWaitcntBitMask(ISA);
|
||||||
SMLoc S = Parser.getTok().getLoc();
|
SMLoc S = Parser.getTok().getLoc();
|
||||||
|
|
||||||
|
|
|
@ -1155,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
|
||||||
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
|
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
|
||||||
const MCSubtargetInfo &STI,
|
const MCSubtargetInfo &STI,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
AMDGPU::IsaInfo::IsaVersion ISA =
|
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
|
||||||
AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
|
|
||||||
|
|
||||||
unsigned SImm16 = MI->getOperand(OpNo).getImm();
|
unsigned SImm16 = MI->getOperand(OpNo).getImm();
|
||||||
unsigned Vmcnt, Expcnt, Lgkmcnt;
|
unsigned Vmcnt, Expcnt, Lgkmcnt;
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "llvm/MC/MCObjectFileInfo.h"
|
#include "llvm/MC/MCObjectFileInfo.h"
|
||||||
#include "llvm/MC/MCSectionELF.h"
|
#include "llvm/MC/MCSectionELF.h"
|
||||||
#include "llvm/Support/FormattedStream.h"
|
#include "llvm/Support/FormattedStream.h"
|
||||||
|
#include "llvm/Support/TargetParser.h"
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
#include "AMDGPUPTNote.h"
|
#include "AMDGPUPTNote.h"
|
||||||
|
@ -39,84 +40,6 @@ using namespace llvm::AMDGPU;
|
||||||
// AMDGPUTargetStreamer
|
// AMDGPUTargetStreamer
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
static const struct {
|
|
||||||
const char *Name;
|
|
||||||
unsigned Mach;
|
|
||||||
} MachTable[] = {
|
|
||||||
// Radeon HD 2000/3000 Series (R600).
|
|
||||||
{ "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
|
|
||||||
{ "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
|
|
||||||
{ "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
|
|
||||||
{ "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
|
|
||||||
// Radeon HD 4000 Series (R700).
|
|
||||||
{ "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
|
|
||||||
{ "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
|
|
||||||
{ "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
|
|
||||||
// Radeon HD 5000 Series (Evergreen).
|
|
||||||
{ "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
|
|
||||||
{ "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
|
|
||||||
{ "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
|
|
||||||
{ "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
|
|
||||||
{ "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
|
|
||||||
// Radeon HD 6000 Series (Northern Islands).
|
|
||||||
{ "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
|
|
||||||
{ "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
|
|
||||||
{ "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
|
|
||||||
{ "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
|
|
||||||
// AMDGCN GFX6.
|
|
||||||
{ "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
|
|
||||||
{ "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
|
|
||||||
{ "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
|
||||||
{ "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
|
||||||
{ "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
|
||||||
{ "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
|
||||||
{ "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
|
||||||
// AMDGCN GFX7.
|
|
||||||
{ "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
|
|
||||||
{ "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
|
|
||||||
{ "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
|
|
||||||
{ "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
|
|
||||||
{ "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
|
|
||||||
{ "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
|
|
||||||
{ "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
|
|
||||||
{ "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
|
|
||||||
{ "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
|
|
||||||
{ "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
|
|
||||||
// AMDGCN GFX8.
|
|
||||||
{ "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
|
|
||||||
{ "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
|
|
||||||
{ "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
|
|
||||||
{ "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
|
|
||||||
{ "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
|
|
||||||
{ "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
|
||||||
{ "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
|
||||||
{ "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
|
||||||
{ "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
|
||||||
{ "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
|
|
||||||
{ "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
|
|
||||||
// AMDGCN GFX9.
|
|
||||||
{ "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
|
|
||||||
{ "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
|
|
||||||
{ "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
|
|
||||||
{ "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
|
|
||||||
// Not specified processor.
|
|
||||||
{ nullptr, ELF::EF_AMDGPU_MACH_NONE }
|
|
||||||
};
|
|
||||||
|
|
||||||
unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
|
|
||||||
auto Entry = MachTable;
|
|
||||||
for (; Entry->Name && GPU != Entry->Name; ++Entry)
|
|
||||||
;
|
|
||||||
return Entry->Mach;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
|
|
||||||
auto Entry = MachTable;
|
|
||||||
for (; Entry->Name && Mach != Entry->Mach; ++Entry)
|
|
||||||
;
|
|
||||||
return Entry->Name;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
|
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
|
||||||
HSAMD::Metadata HSAMetadata;
|
HSAMD::Metadata HSAMetadata;
|
||||||
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
|
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
|
||||||
|
@ -205,7 +128,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
||||||
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
|
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
|
||||||
amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
|
amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
|
||||||
|
|
||||||
IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
|
IsaVersion IVersion = getIsaVersion(STI.getCPU());
|
||||||
|
|
||||||
OS << "\t.amdhsa_kernel " << KernelName << '\n';
|
OS << "\t.amdhsa_kernel " << KernelName << '\n';
|
||||||
|
|
||||||
|
@ -342,7 +265,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
|
||||||
unsigned EFlags = MCA.getELFHeaderEFlags();
|
unsigned EFlags = MCA.getELFHeaderEFlags();
|
||||||
|
|
||||||
EFlags &= ~ELF::EF_AMDGPU_MACH;
|
EFlags &= ~ELF::EF_AMDGPU_MACH;
|
||||||
EFlags |= getMACH(STI.getCPU());
|
EFlags |= getElfMach(STI.getCPU());
|
||||||
|
|
||||||
EFlags &= ~ELF::EF_AMDGPU_XNACK;
|
EFlags &= ~ELF::EF_AMDGPU_XNACK;
|
||||||
if (AMDGPU::hasXNACK(STI))
|
if (AMDGPU::hasXNACK(STI))
|
||||||
|
|
|
@ -31,13 +31,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
|
||||||
protected:
|
protected:
|
||||||
MCContext &getContext() const { return Streamer.getContext(); }
|
MCContext &getContext() const { return Streamer.getContext(); }
|
||||||
|
|
||||||
/// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
|
|
||||||
unsigned getMACH(StringRef GPU) const;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
|
|
||||||
static const char *getMachName(unsigned Mach);
|
|
||||||
|
|
||||||
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
|
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
|
||||||
|
|
||||||
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
|
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
|
||||||
|
|
|
@ -369,7 +369,7 @@ private:
|
||||||
const SIRegisterInfo *TRI = nullptr;
|
const SIRegisterInfo *TRI = nullptr;
|
||||||
const MachineRegisterInfo *MRI = nullptr;
|
const MachineRegisterInfo *MRI = nullptr;
|
||||||
const MachineLoopInfo *MLI = nullptr;
|
const MachineLoopInfo *MLI = nullptr;
|
||||||
AMDGPU::IsaInfo::IsaVersion IV;
|
AMDGPU::IsaVersion IV;
|
||||||
|
|
||||||
DenseSet<MachineBasicBlock *> BlockVisitedSet;
|
DenseSet<MachineBasicBlock *> BlockVisitedSet;
|
||||||
DenseSet<MachineInstr *> TrackedWaitcntSet;
|
DenseSet<MachineInstr *> TrackedWaitcntSet;
|
||||||
|
@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
||||||
TRI = &TII->getRegisterInfo();
|
TRI = &TII->getRegisterInfo();
|
||||||
MRI = &MF.getRegInfo();
|
MRI = &MF.getRegInfo();
|
||||||
MLI = &getAnalysis<MachineLoopInfo>();
|
MLI = &getAnalysis<MachineLoopInfo>();
|
||||||
IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
|
IV = AMDGPU::getIsaVersion(ST->getCPU());
|
||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
|
||||||
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
|
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
|
||||||
|
|
|
@ -253,7 +253,7 @@ protected:
|
||||||
/// Instruction info.
|
/// Instruction info.
|
||||||
const SIInstrInfo *TII = nullptr;
|
const SIInstrInfo *TII = nullptr;
|
||||||
|
|
||||||
IsaInfo::IsaVersion IV;
|
IsaVersion IV;
|
||||||
|
|
||||||
SICacheControl(const GCNSubtarget &ST);
|
SICacheControl(const GCNSubtarget &ST);
|
||||||
|
|
||||||
|
@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
||||||
|
|
||||||
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
|
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
|
||||||
TII = ST.getInstrInfo();
|
TII = ST.getInstrInfo();
|
||||||
IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
|
IV = getIsaVersion(ST.getCPU());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
|
|
|
@ -137,68 +137,18 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
|
||||||
|
|
||||||
namespace IsaInfo {
|
namespace IsaInfo {
|
||||||
|
|
||||||
IsaVersion getIsaVersion(const FeatureBitset &Features) {
|
|
||||||
// GCN GFX6 (Southern Islands (SI)).
|
|
||||||
if (Features.test(FeatureISAVersion6_0_0))
|
|
||||||
return {6, 0, 0};
|
|
||||||
if (Features.test(FeatureISAVersion6_0_1))
|
|
||||||
return {6, 0, 1};
|
|
||||||
|
|
||||||
// GCN GFX7 (Sea Islands (CI)).
|
|
||||||
if (Features.test(FeatureISAVersion7_0_0))
|
|
||||||
return {7, 0, 0};
|
|
||||||
if (Features.test(FeatureISAVersion7_0_1))
|
|
||||||
return {7, 0, 1};
|
|
||||||
if (Features.test(FeatureISAVersion7_0_2))
|
|
||||||
return {7, 0, 2};
|
|
||||||
if (Features.test(FeatureISAVersion7_0_3))
|
|
||||||
return {7, 0, 3};
|
|
||||||
if (Features.test(FeatureISAVersion7_0_4))
|
|
||||||
return {7, 0, 4};
|
|
||||||
if (Features.test(FeatureSeaIslands))
|
|
||||||
return {7, 0, 0};
|
|
||||||
|
|
||||||
// GCN GFX8 (Volcanic Islands (VI)).
|
|
||||||
if (Features.test(FeatureISAVersion8_0_1))
|
|
||||||
return {8, 0, 1};
|
|
||||||
if (Features.test(FeatureISAVersion8_0_2))
|
|
||||||
return {8, 0, 2};
|
|
||||||
if (Features.test(FeatureISAVersion8_0_3))
|
|
||||||
return {8, 0, 3};
|
|
||||||
if (Features.test(FeatureISAVersion8_1_0))
|
|
||||||
return {8, 1, 0};
|
|
||||||
if (Features.test(FeatureVolcanicIslands))
|
|
||||||
return {8, 0, 0};
|
|
||||||
|
|
||||||
// GCN GFX9.
|
|
||||||
if (Features.test(FeatureISAVersion9_0_0))
|
|
||||||
return {9, 0, 0};
|
|
||||||
if (Features.test(FeatureISAVersion9_0_2))
|
|
||||||
return {9, 0, 2};
|
|
||||||
if (Features.test(FeatureISAVersion9_0_4))
|
|
||||||
return {9, 0, 4};
|
|
||||||
if (Features.test(FeatureISAVersion9_0_6))
|
|
||||||
return {9, 0, 6};
|
|
||||||
if (Features.test(FeatureGFX9))
|
|
||||||
return {9, 0, 0};
|
|
||||||
|
|
||||||
if (Features.test(FeatureSouthernIslands))
|
|
||||||
return {0, 0, 0};
|
|
||||||
return {7, 0, 0};
|
|
||||||
}
|
|
||||||
|
|
||||||
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
|
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
|
||||||
auto TargetTriple = STI->getTargetTriple();
|
auto TargetTriple = STI->getTargetTriple();
|
||||||
auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
|
auto Version = getIsaVersion(STI->getCPU());
|
||||||
|
|
||||||
Stream << TargetTriple.getArchName() << '-'
|
Stream << TargetTriple.getArchName() << '-'
|
||||||
<< TargetTriple.getVendorName() << '-'
|
<< TargetTriple.getVendorName() << '-'
|
||||||
<< TargetTriple.getOSName() << '-'
|
<< TargetTriple.getOSName() << '-'
|
||||||
<< TargetTriple.getEnvironmentName() << '-'
|
<< TargetTriple.getEnvironmentName() << '-'
|
||||||
<< "gfx"
|
<< "gfx"
|
||||||
<< ISAVersion.Major
|
<< Version.Major
|
||||||
<< ISAVersion.Minor
|
<< Version.Minor
|
||||||
<< ISAVersion.Stepping;
|
<< Version.Stepping;
|
||||||
|
|
||||||
if (hasXNACK(*STI))
|
if (hasXNACK(*STI))
|
||||||
Stream << "+xnack";
|
Stream << "+xnack";
|
||||||
|
@ -210,49 +160,49 @@ bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
|
||||||
return STI->getFeatureBits().test(FeatureCodeObjectV3);
|
return STI->getFeatureBits().test(FeatureCodeObjectV3);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getWavefrontSize(const FeatureBitset &Features) {
|
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
|
||||||
if (Features.test(FeatureWavefrontSize16))
|
if (STI->getFeatureBits().test(FeatureWavefrontSize16))
|
||||||
return 16;
|
return 16;
|
||||||
if (Features.test(FeatureWavefrontSize32))
|
if (STI->getFeatureBits().test(FeatureWavefrontSize32))
|
||||||
return 32;
|
return 32;
|
||||||
|
|
||||||
return 64;
|
return 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getLocalMemorySize(const FeatureBitset &Features) {
|
unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
|
||||||
if (Features.test(FeatureLocalMemorySize32768))
|
if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
|
||||||
return 32768;
|
return 32768;
|
||||||
if (Features.test(FeatureLocalMemorySize65536))
|
if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
|
||||||
return 65536;
|
return 65536;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getEUsPerCU(const FeatureBitset &Features) {
|
unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
if (!Features.test(FeatureGCN))
|
if (!STI->getFeatureBits().test(FeatureGCN))
|
||||||
return 8;
|
return 8;
|
||||||
unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
|
unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
|
||||||
if (N == 1)
|
if (N == 1)
|
||||||
return 40;
|
return 40;
|
||||||
N = 40 / N;
|
N = 40 / N;
|
||||||
return std::min(N, 16u);
|
return std::min(N, 16u);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
|
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
|
||||||
return getMaxWavesPerEU() * getEUsPerCU(Features);
|
return getMaxWavesPerEU() * getEUsPerCU(STI);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
|
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
|
return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinWavesPerEU(const FeatureBitset &Features) {
|
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,89 +211,89 @@ unsigned getMaxWavesPerEU() {
|
||||||
return 10;
|
return 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerEU(const FeatureBitset &Features,
|
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
|
return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
|
||||||
getEUsPerCU(Features)) / getEUsPerCU(Features);
|
getEUsPerCU(STI)) / getEUsPerCU(STI);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
|
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
|
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
|
||||||
return 2048;
|
return 2048;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
|
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
|
return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
|
||||||
getWavefrontSize(Features);
|
getWavefrontSize(STI);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
|
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
|
||||||
IsaVersion Version = getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||||
if (Version.Major >= 8)
|
if (Version.Major >= 8)
|
||||||
return 16;
|
return 16;
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
|
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
|
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
|
||||||
IsaVersion Version = getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||||
if (Version.Major >= 8)
|
if (Version.Major >= 8)
|
||||||
return 800;
|
return 800;
|
||||||
return 512;
|
return 512;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
|
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
|
||||||
if (Features.test(FeatureSGPRInitBug))
|
if (STI->getFeatureBits().test(FeatureSGPRInitBug))
|
||||||
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||||
if (Version.Major >= 8)
|
if (Version.Major >= 8)
|
||||||
return 102;
|
return 102;
|
||||||
return 104;
|
return 104;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
if (WavesPerEU >= getMaxWavesPerEU())
|
if (WavesPerEU >= getMaxWavesPerEU())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
|
unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
|
||||||
if (Features.test(FeatureTrapHandler))
|
if (STI->getFeatureBits().test(FeatureTrapHandler))
|
||||||
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
||||||
MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
|
MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
|
||||||
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
|
return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
|
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
|
||||||
bool Addressable) {
|
bool Addressable) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||||
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
|
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
|
||||||
if (Version.Major >= 8 && !Addressable)
|
if (Version.Major >= 8 && !Addressable)
|
||||||
AddressableNumSGPRs = 112;
|
AddressableNumSGPRs = 112;
|
||||||
unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
|
unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
|
||||||
if (Features.test(FeatureTrapHandler))
|
if (STI->getFeatureBits().test(FeatureTrapHandler))
|
||||||
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
||||||
MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
|
MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
|
||||||
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
|
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
||||||
bool FlatScrUsed, bool XNACKUsed) {
|
bool FlatScrUsed, bool XNACKUsed) {
|
||||||
unsigned ExtraSGPRs = 0;
|
unsigned ExtraSGPRs = 0;
|
||||||
if (VCCUsed)
|
if (VCCUsed)
|
||||||
ExtraSGPRs = 2;
|
ExtraSGPRs = 2;
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||||
if (Version.Major < 8) {
|
if (Version.Major < 8) {
|
||||||
if (FlatScrUsed)
|
if (FlatScrUsed)
|
||||||
ExtraSGPRs = 4;
|
ExtraSGPRs = 4;
|
||||||
|
@ -358,74 +308,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
||||||
return ExtraSGPRs;
|
return ExtraSGPRs;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
||||||
bool FlatScrUsed) {
|
bool FlatScrUsed) {
|
||||||
return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
|
return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
|
||||||
Features[AMDGPU::FeatureXNACK]);
|
STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
|
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
|
||||||
NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
|
NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
|
||||||
// SGPRBlocks is actual number of SGPR blocks minus 1.
|
// SGPRBlocks is actual number of SGPR blocks minus 1.
|
||||||
return NumSGPRs / getSGPREncodingGranule(Features) - 1;
|
return NumSGPRs / getSGPREncodingGranule(STI) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
|
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
|
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
|
||||||
return getVGPRAllocGranule(Features);
|
return getVGPRAllocGranule(STI);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
|
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
|
||||||
return 256;
|
return 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
|
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
|
||||||
return getTotalNumVGPRs(Features);
|
return getTotalNumVGPRs(STI);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
if (WavesPerEU >= getMaxWavesPerEU())
|
if (WavesPerEU >= getMaxWavesPerEU())
|
||||||
return 0;
|
return 0;
|
||||||
unsigned MinNumVGPRs =
|
unsigned MinNumVGPRs =
|
||||||
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
|
alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
|
||||||
getVGPRAllocGranule(Features)) + 1;
|
getVGPRAllocGranule(STI)) + 1;
|
||||||
return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
|
return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
|
unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
|
||||||
getVGPRAllocGranule(Features));
|
getVGPRAllocGranule(STI));
|
||||||
unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
|
unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
|
||||||
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
|
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
|
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
|
||||||
NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
|
NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
|
||||||
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
||||||
return NumVGPRs / getVGPREncodingGranule(Features) - 1;
|
return NumVGPRs / getVGPREncodingGranule(STI) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace IsaInfo
|
} // end namespace IsaInfo
|
||||||
|
|
||||||
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
||||||
const FeatureBitset &Features) {
|
const MCSubtargetInfo *STI) {
|
||||||
IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||||
|
|
||||||
memset(&Header, 0, sizeof(Header));
|
memset(&Header, 0, sizeof(Header));
|
||||||
|
|
||||||
Header.amd_kernel_code_version_major = 1;
|
Header.amd_kernel_code_version_major = 1;
|
||||||
Header.amd_kernel_code_version_minor = 2;
|
Header.amd_kernel_code_version_minor = 2;
|
||||||
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
|
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
|
||||||
Header.amd_machine_version_major = ISA.Major;
|
Header.amd_machine_version_major = Version.Major;
|
||||||
Header.amd_machine_version_minor = ISA.Minor;
|
Header.amd_machine_version_minor = Version.Minor;
|
||||||
Header.amd_machine_version_stepping = ISA.Stepping;
|
Header.amd_machine_version_stepping = Version.Stepping;
|
||||||
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
||||||
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
|
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
|
||||||
Header.wavefront_size = 6;
|
Header.wavefront_size = 6;
|
||||||
|
@ -513,7 +463,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
||||||
return Ints;
|
return Ints;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
unsigned getVmcntBitMask(const IsaVersion &Version) {
|
||||||
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
|
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
|
||||||
if (Version.Major < 9)
|
if (Version.Major < 9)
|
||||||
return VmcntLo;
|
return VmcntLo;
|
||||||
|
@ -522,15 +472,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||||
return VmcntLo | VmcntHi;
|
return VmcntLo | VmcntHi;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
|
unsigned getExpcntBitMask(const IsaVersion &Version) {
|
||||||
return (1 << getExpcntBitWidth()) - 1;
|
return (1 << getExpcntBitWidth()) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
unsigned getLgkmcntBitMask(const IsaVersion &Version) {
|
||||||
return (1 << getLgkmcntBitWidth()) - 1;
|
return (1 << getLgkmcntBitWidth()) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
|
unsigned getWaitcntBitMask(const IsaVersion &Version) {
|
||||||
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||||
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
|
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
|
||||||
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
|
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||||
|
@ -542,7 +492,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||||
return Waitcnt | VmcntHi;
|
return Waitcnt | VmcntHi;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
||||||
unsigned VmcntLo =
|
unsigned VmcntLo =
|
||||||
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||||
if (Version.Major < 9)
|
if (Version.Major < 9)
|
||||||
|
@ -554,22 +504,22 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
||||||
return VmcntLo | VmcntHi;
|
return VmcntLo | VmcntHi;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
||||||
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
||||||
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
|
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
|
||||||
Vmcnt = decodeVmcnt(Version, Waitcnt);
|
Vmcnt = decodeVmcnt(Version, Waitcnt);
|
||||||
Expcnt = decodeExpcnt(Version, Waitcnt);
|
Expcnt = decodeExpcnt(Version, Waitcnt);
|
||||||
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
|
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Vmcnt) {
|
unsigned Vmcnt) {
|
||||||
Waitcnt =
|
Waitcnt =
|
||||||
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||||
|
@ -580,17 +530,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Expcnt) {
|
unsigned Expcnt) {
|
||||||
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Lgkmcnt) {
|
unsigned Lgkmcnt) {
|
||||||
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
|
unsigned encodeWaitcnt(const IsaVersion &Version,
|
||||||
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
|
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
|
||||||
unsigned Waitcnt = getWaitcntBitMask(Version);
|
unsigned Waitcnt = getWaitcntBitMask(Version);
|
||||||
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
|
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
|
#include "llvm/Support/TargetParser.h"
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -56,16 +57,6 @@ enum {
|
||||||
TRAP_NUM_SGPRS = 16
|
TRAP_NUM_SGPRS = 16
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Instruction set architecture version.
|
|
||||||
struct IsaVersion {
|
|
||||||
unsigned Major;
|
|
||||||
unsigned Minor;
|
|
||||||
unsigned Stepping;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// \returns Isa version for given subtarget \p Features.
|
|
||||||
IsaVersion getIsaVersion(const FeatureBitset &Features);
|
|
||||||
|
|
||||||
/// Streams isa version string for given subtarget \p STI into \p Stream.
|
/// Streams isa version string for given subtarget \p STI into \p Stream.
|
||||||
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
|
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
|
||||||
|
|
||||||
|
@ -73,114 +64,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
|
||||||
/// false otherwise.
|
/// false otherwise.
|
||||||
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
|
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Wavefront size for given subtarget \p Features.
|
/// \returns Wavefront size for given subtarget \p STI.
|
||||||
unsigned getWavefrontSize(const FeatureBitset &Features);
|
unsigned getWavefrontSize(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Local memory size in bytes for given subtarget \p Features.
|
/// \returns Local memory size in bytes for given subtarget \p STI.
|
||||||
unsigned getLocalMemorySize(const FeatureBitset &Features);
|
unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Number of execution units per compute unit for given subtarget \p
|
/// \returns Number of execution units per compute unit for given subtarget \p
|
||||||
/// Features.
|
/// STI.
|
||||||
unsigned getEUsPerCU(const FeatureBitset &Features);
|
unsigned getEUsPerCU(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Maximum number of work groups per compute unit for given subtarget
|
/// \returns Maximum number of work groups per compute unit for given subtarget
|
||||||
/// \p Features and limited by given \p FlatWorkGroupSize.
|
/// \p STI and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
||||||
/// Features without any kind of limitation.
|
/// STI without any kind of limitation.
|
||||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features);
|
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
||||||
/// Features and limited by given \p FlatWorkGroupSize.
|
/// STI and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
|
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns Minimum number of waves per execution unit for given subtarget \p
|
/// \returns Minimum number of waves per execution unit for given subtarget \p
|
||||||
/// Features.
|
/// STI.
|
||||||
unsigned getMinWavesPerEU(const FeatureBitset &Features);
|
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
||||||
/// Features without any kind of limitation.
|
/// STI without any kind of limitation.
|
||||||
unsigned getMaxWavesPerEU();
|
unsigned getMaxWavesPerEU();
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
||||||
/// Features and limited by given \p FlatWorkGroupSize.
|
/// STI and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerEU(const FeatureBitset &Features,
|
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns Minimum flat work group size for given subtarget \p Features.
|
/// \returns Minimum flat work group size for given subtarget \p STI.
|
||||||
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
|
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Maximum flat work group size for given subtarget \p Features.
|
/// \returns Maximum flat work group size for given subtarget \p STI.
|
||||||
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
|
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Number of waves per work group for given subtarget \p Features and
|
/// \returns Number of waves per work group for given subtarget \p STI and
|
||||||
/// limited by given \p FlatWorkGroupSize.
|
/// limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
|
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns SGPR allocation granularity for given subtarget \p Features.
|
/// \returns SGPR allocation granularity for given subtarget \p STI.
|
||||||
unsigned getSGPRAllocGranule(const FeatureBitset &Features);
|
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns SGPR encoding granularity for given subtarget \p Features.
|
/// \returns SGPR encoding granularity for given subtarget \p STI.
|
||||||
unsigned getSGPREncodingGranule(const FeatureBitset &Features);
|
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Total number of SGPRs for given subtarget \p Features.
|
/// \returns Total number of SGPRs for given subtarget \p STI.
|
||||||
unsigned getTotalNumSGPRs(const FeatureBitset &Features);
|
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Addressable number of SGPRs for given subtarget \p Features.
|
/// \returns Addressable number of SGPRs for given subtarget \p STI.
|
||||||
unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
|
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p Features.
|
/// execution unit requirement for given subtarget \p STI.
|
||||||
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p Features.
|
/// execution unit requirement for given subtarget \p STI.
|
||||||
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
|
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
|
||||||
bool Addressable);
|
bool Addressable);
|
||||||
|
|
||||||
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
||||||
/// Features when the given special registers are used.
|
/// STI when the given special registers are used.
|
||||||
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
||||||
bool FlatScrUsed, bool XNACKUsed);
|
bool FlatScrUsed, bool XNACKUsed);
|
||||||
|
|
||||||
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
||||||
/// Features when the given special registers are used. XNACK is inferred from
|
/// STI when the given special registers are used. XNACK is inferred from
|
||||||
/// \p Features.
|
/// \p STI.
|
||||||
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
||||||
bool FlatScrUsed);
|
bool FlatScrUsed);
|
||||||
|
|
||||||
/// \returns Number of SGPR blocks needed for given subtarget \p Features when
|
/// \returns Number of SGPR blocks needed for given subtarget \p STI when
|
||||||
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
|
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
|
||||||
/// register counts.
|
/// register counts.
|
||||||
unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
|
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
||||||
|
|
||||||
/// \returns VGPR allocation granularity for given subtarget \p Features.
|
/// \returns VGPR allocation granularity for given subtarget \p STI.
|
||||||
unsigned getVGPRAllocGranule(const FeatureBitset &Features);
|
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns VGPR encoding granularity for given subtarget \p Features.
|
/// \returns VGPR encoding granularity for given subtarget \p STI.
|
||||||
unsigned getVGPREncodingGranule(const FeatureBitset &Features);
|
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Total number of VGPRs for given subtarget \p Features.
|
/// \returns Total number of VGPRs for given subtarget \p STI.
|
||||||
unsigned getTotalNumVGPRs(const FeatureBitset &Features);
|
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Addressable number of VGPRs for given subtarget \p Features.
|
/// \returns Addressable number of VGPRs for given subtarget \p STI.
|
||||||
unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
|
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Minimum number of VGPRs that meets given number of waves per
|
/// \returns Minimum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p Features.
|
/// execution unit requirement for given subtarget \p STI.
|
||||||
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p Features.
|
/// execution unit requirement for given subtarget \p STI.
|
||||||
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Number of VGPR blocks needed for given subtarget \p Features when
|
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
|
||||||
/// \p NumVGPRs are used.
|
/// \p NumVGPRs are used.
|
||||||
unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
|
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
||||||
|
|
||||||
} // end namespace IsaInfo
|
} // end namespace IsaInfo
|
||||||
|
|
||||||
|
@ -233,7 +224,7 @@ LLVM_READONLY
|
||||||
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
||||||
|
|
||||||
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
||||||
const FeatureBitset &Features);
|
const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
|
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
|
||||||
|
|
||||||
|
@ -268,25 +259,25 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
||||||
bool OnlyFirstRequired = false);
|
bool OnlyFirstRequired = false);
|
||||||
|
|
||||||
/// \returns Vmcnt bit mask for given isa \p Version.
|
/// \returns Vmcnt bit mask for given isa \p Version.
|
||||||
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
|
unsigned getVmcntBitMask(const IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Expcnt bit mask for given isa \p Version.
|
/// \returns Expcnt bit mask for given isa \p Version.
|
||||||
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
|
unsigned getExpcntBitMask(const IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Lgkmcnt bit mask for given isa \p Version.
|
/// \returns Lgkmcnt bit mask for given isa \p Version.
|
||||||
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
|
unsigned getLgkmcntBitMask(const IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Waitcnt bit mask for given isa \p Version.
|
/// \returns Waitcnt bit mask for given isa \p Version.
|
||||||
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
|
unsigned getWaitcntBitMask(const IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
|
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
|
||||||
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
|
||||||
|
|
||||||
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
|
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
|
||||||
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
|
||||||
|
|
||||||
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
|
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
|
||||||
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
|
||||||
|
|
||||||
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
|
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
|
||||||
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
|
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
|
||||||
|
@ -297,19 +288,19 @@ unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
||||||
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
|
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
|
||||||
/// \p Expcnt = \p Waitcnt[6:4]
|
/// \p Expcnt = \p Waitcnt[6:4]
|
||||||
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
||||||
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
|
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
|
||||||
|
|
||||||
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
|
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
|
||||||
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Vmcnt);
|
unsigned Vmcnt);
|
||||||
|
|
||||||
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
|
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
|
||||||
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Expcnt);
|
unsigned Expcnt);
|
||||||
|
|
||||||
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
|
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
|
||||||
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Lgkmcnt);
|
unsigned Lgkmcnt);
|
||||||
|
|
||||||
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
|
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
|
||||||
|
@ -324,7 +315,7 @@ unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
///
|
///
|
||||||
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
||||||
/// isa \p Version.
|
/// isa \p Version.
|
||||||
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
|
unsigned encodeWaitcnt(const IsaVersion &Version,
|
||||||
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
|
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
|
||||||
|
|
||||||
unsigned getInitialPSInputAddr(const Function &F);
|
unsigned getInitialPSInputAddr(const Function &F);
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
|
||||||
|
define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
|
||||||
|
store float 0.0, float addrspace(1)* %out0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
|
@ -2,5 +2,5 @@
|
||||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
|
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
|
||||||
|
|
||||||
.hsa_code_object_isa
|
.hsa_code_object_isa
|
||||||
// GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
|
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
|
||||||
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
||||||
|
|
Loading…
Reference in New Issue