2015-06-27 05:15:07 +08:00
|
|
|
//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUBaseInfo.h"
|
2015-12-03 01:00:42 +08:00
|
|
|
#include "AMDGPU.h"
|
2015-12-16 00:26:16 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#include "llvm/IR/Function.h"
|
2015-12-03 01:00:42 +08:00
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2015-09-26 05:41:28 +08:00
|
|
|
#include "llvm/MC/MCContext.h"
|
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
2015-12-22 02:44:27 +08:00
|
|
|
#include "llvm/MC/MCSubtargetInfo.h"
|
2015-06-27 05:15:07 +08:00
|
|
|
#include "llvm/MC/SubtargetFeature.h"
|
|
|
|
|
|
|
|
#define GET_SUBTARGETINFO_ENUM
|
|
|
|
#include "AMDGPUGenSubtargetInfo.inc"
|
|
|
|
#undef GET_SUBTARGETINFO_ENUM
|
|
|
|
|
2015-12-22 02:44:27 +08:00
|
|
|
#define GET_REGINFO_ENUM
|
|
|
|
#include "AMDGPUGenRegisterInfo.inc"
|
|
|
|
#undef GET_REGINFO_ENUM
|
|
|
|
|
2015-06-27 05:15:07 +08:00
|
|
|
namespace llvm {
|
|
|
|
namespace AMDGPU {
|
|
|
|
|
|
|
|
IsaVersion getIsaVersion(const FeatureBitset &Features) {
|
|
|
|
|
|
|
|
if (Features.test(FeatureISAVersion7_0_0))
|
|
|
|
return {7, 0, 0};
|
|
|
|
|
|
|
|
if (Features.test(FeatureISAVersion7_0_1))
|
|
|
|
return {7, 0, 1};
|
|
|
|
|
|
|
|
if (Features.test(FeatureISAVersion8_0_0))
|
|
|
|
return {8, 0, 0};
|
|
|
|
|
|
|
|
if (Features.test(FeatureISAVersion8_0_1))
|
|
|
|
return {8, 0, 1};
|
|
|
|
|
2016-01-14 04:39:25 +08:00
|
|
|
if (Features.test(FeatureISAVersion8_0_3))
|
|
|
|
return {8, 0, 3};
|
|
|
|
|
2015-06-27 05:15:07 +08:00
|
|
|
return {0, 0, 0};
|
|
|
|
}
|
|
|
|
|
2015-06-27 05:58:31 +08:00
|
|
|
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
|
|
|
const FeatureBitset &Features) {
|
|
|
|
|
|
|
|
IsaVersion ISA = getIsaVersion(Features);
|
|
|
|
|
|
|
|
memset(&Header, 0, sizeof(Header));
|
|
|
|
|
|
|
|
Header.amd_kernel_code_version_major = 1;
|
|
|
|
Header.amd_kernel_code_version_minor = 0;
|
|
|
|
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
|
|
|
|
Header.amd_machine_version_major = ISA.Major;
|
|
|
|
Header.amd_machine_version_minor = ISA.Minor;
|
|
|
|
Header.amd_machine_version_stepping = ISA.Stepping;
|
|
|
|
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
|
|
|
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
|
|
|
|
Header.wavefront_size = 6;
|
|
|
|
// These alignment values are specified in powers of two, so alignment =
|
|
|
|
// 2^n. The minimum alignment is 2^4 = 16.
|
|
|
|
Header.kernarg_segment_alignment = 4;
|
|
|
|
Header.group_segment_alignment = 4;
|
|
|
|
Header.private_segment_alignment = 4;
|
|
|
|
}
|
|
|
|
|
2015-09-26 05:41:28 +08:00
|
|
|
MCSection *getHSATextSection(MCContext &Ctx) {
|
|
|
|
return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
|
|
|
|
ELF::SHF_ALLOC | ELF::SHF_WRITE |
|
|
|
|
ELF::SHF_EXECINSTR |
|
|
|
|
ELF::SHF_AMDGPU_HSA_AGENT |
|
|
|
|
ELF::SHF_AMDGPU_HSA_CODE);
|
|
|
|
}
|
|
|
|
|
2015-12-03 03:47:57 +08:00
|
|
|
MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
|
|
|
|
return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
|
|
|
|
ELF::SHF_ALLOC | ELF::SHF_WRITE |
|
|
|
|
ELF::SHF_AMDGPU_HSA_GLOBAL |
|
|
|
|
ELF::SHF_AMDGPU_HSA_AGENT);
|
|
|
|
}
|
|
|
|
|
|
|
|
MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
|
|
|
|
return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
|
|
|
|
ELF::SHF_ALLOC | ELF::SHF_WRITE |
|
|
|
|
ELF::SHF_AMDGPU_HSA_GLOBAL);
|
|
|
|
}
|
|
|
|
|
2015-12-03 11:34:32 +08:00
|
|
|
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
|
|
|
|
return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
|
|
|
|
ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
|
|
|
|
ELF::SHF_AMDGPU_HSA_AGENT);
|
|
|
|
}
|
|
|
|
|
2015-12-03 01:00:42 +08:00
|
|
|
bool isGroupSegment(const GlobalValue *GV) {
|
|
|
|
return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
|
|
|
}
|
|
|
|
|
2015-12-03 03:47:57 +08:00
|
|
|
bool isGlobalSegment(const GlobalValue *GV) {
|
|
|
|
return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isReadOnlySegment(const GlobalValue *GV) {
|
|
|
|
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
|
|
|
|
}
|
|
|
|
|
2016-05-12 10:45:18 +08:00
|
|
|
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
|
2016-01-13 19:45:36 +08:00
|
|
|
Attribute A = F.getFnAttribute(Name);
|
2016-05-12 10:45:18 +08:00
|
|
|
int Result = Default;
|
2015-12-16 00:26:16 +08:00
|
|
|
|
|
|
|
if (A.isStringAttribute()) {
|
|
|
|
StringRef Str = A.getValueAsString();
|
2016-01-13 19:45:36 +08:00
|
|
|
if (Str.getAsInteger(0, Result)) {
|
2015-12-16 00:26:16 +08:00
|
|
|
LLVMContext &Ctx = F.getContext();
|
2016-05-12 10:45:18 +08:00
|
|
|
Ctx.emitError("can't parse integer attribute " + Name);
|
2015-12-16 00:26:16 +08:00
|
|
|
}
|
|
|
|
}
|
2016-05-12 10:45:18 +08:00
|
|
|
|
2016-01-13 19:45:36 +08:00
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
AMDGPU: allow specifying a workgroup size that needs to fit in a compute unit
Summary:
For GL_ARB_compute_shader we need to support workgroup sizes of at least 1024. However, if we want to allow large workgroup sizes, we may need to use less registers, as we have to run more waves per SIMD.
This patch adds an attribute to specify the maximum work group size the compiled program needs to support. It defaults, to 256, as that has no wave restrictions.
Reducing the number of registers available is done similarly to how the registers were reserved for chips with the sgpr init bug.
Reviewers: mareko, arsenm, tstellarAMD, nhaehnle
Subscribers: FireBurn, kerberizer, llvm-commits, arsenm
Differential Revision: http://reviews.llvm.org/D18340
Patch By: Bas Nieuwenhuizen
llvm-svn: 266337
2016-04-15 00:27:07 +08:00
|
|
|
unsigned getMaximumWorkGroupSize(const Function &F) {
|
|
|
|
return getIntegerAttribute(F, "amdgpu-max-work-group-size", 256);
|
|
|
|
}
|
|
|
|
|
2016-01-13 19:45:36 +08:00
|
|
|
unsigned getInitialPSInputAddr(const Function &F) {
|
|
|
|
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
|
2015-12-16 00:26:16 +08:00
|
|
|
}
|
|
|
|
|
2016-04-07 03:40:20 +08:00
|
|
|
bool isShader(CallingConv::ID cc) {
|
|
|
|
switch(cc) {
|
|
|
|
case CallingConv::AMDGPU_VS:
|
|
|
|
case CallingConv::AMDGPU_GS:
|
|
|
|
case CallingConv::AMDGPU_PS:
|
|
|
|
case CallingConv::AMDGPU_CS:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isCompute(CallingConv::ID cc) {
|
|
|
|
return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
|
|
|
|
}
|
|
|
|
|
2015-12-22 02:44:27 +08:00
|
|
|
bool isSI(const MCSubtargetInfo &STI) {
|
|
|
|
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isCI(const MCSubtargetInfo &STI) {
|
|
|
|
return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isVI(const MCSubtargetInfo &STI) {
|
|
|
|
return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
|
|
|
|
|
|
|
|
switch(Reg) {
|
|
|
|
default: break;
|
|
|
|
case AMDGPU::FLAT_SCR:
|
|
|
|
assert(!isSI(STI));
|
|
|
|
return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
|
|
|
|
|
|
|
|
case AMDGPU::FLAT_SCR_LO:
|
|
|
|
assert(!isSI(STI));
|
|
|
|
return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
|
|
|
|
|
|
|
|
case AMDGPU::FLAT_SCR_HI:
|
|
|
|
assert(!isSI(STI));
|
|
|
|
return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
|
|
|
|
}
|
|
|
|
return Reg;
|
|
|
|
}
|
|
|
|
|
2015-06-27 05:15:07 +08:00
|
|
|
} // End namespace AMDGPU
|
|
|
|
} // End namespace llvm
|