[AMDGPU] Add gfx1010 target definitions

Differential Revision: https://reviews.llvm.org/D61041

llvm-svn: 359113
This commit is contained in:
Stanislav Mekhanoshin 2019-04-24 17:03:15 +00:00
parent c60a4099a1
commit cee607e414
22 changed files with 570 additions and 118 deletions

View File

@ -703,14 +703,17 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
// AMDGCN GFX10.
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
// Reserved for AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
EF_AMDGPU_MACH_AMDGCN_RESERVED2 = 0x032,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1010,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.

View File

@ -88,8 +88,11 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
COMPUTE_PGM_RSRC1(RESERVED0, 27, 5),
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
};
#undef COMPUTE_PGM_RSRC1
@ -119,6 +122,15 @@ enum : int32_t {
};
#undef COMPUTE_PGM_RSRC2
// Compute program resource register 3. Must match hardware definition.
#define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
};
#undef COMPUTE_PGM_RSRC3
// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
@ -130,7 +142,8 @@ enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
KERNEL_CODE_PROPERTY(RESERVED0, 7, 9),
KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
};
#undef KERNEL_CODE_PROPERTY
@ -140,7 +153,8 @@ struct kernel_descriptor_t {
uint32_t private_segment_fixed_size;
uint8_t reserved0[8];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[24];
uint8_t reserved1[20];
uint32_t compute_pgm_rsrc3; // GFX10+
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint16_t kernel_code_properties;
@ -165,6 +179,9 @@ static_assert(
static_assert(
offsetof(kernel_descriptor_t, reserved1) == 24,
"invalid offset for reserved1");
static_assert(
offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
"invalid offset for compute_pgm_rsrc3");
static_assert(
offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
"invalid offset for compute_pgm_rsrc1");

View File

@ -123,8 +123,10 @@ enum GPUKind : uint32_t {
GK_GFX906 = 63,
GK_GFX909 = 65,
GK_GFX1010 = 71,
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX909,
GK_AMDGCN_LAST = GK_GFX1010,
};
/// Instruction set architecture version.

View File

@ -411,6 +411,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;

View File

@ -62,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
constexpr GPUInfo AMDGCNGPUs[33] = {
constexpr GPUInfo AMDGCNGPUs[34] = {
// Name Canonical Kind Features
// Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
@ -98,6 +98,7 @@ constexpr GPUInfo AMDGCNGPUs[33] = {
{{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@ -179,22 +180,23 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
}
switch (AK) {
case GK_GFX600: return {6, 0, 0};
case GK_GFX601: return {6, 0, 1};
case GK_GFX700: return {7, 0, 0};
case GK_GFX701: return {7, 0, 1};
case GK_GFX702: return {7, 0, 2};
case GK_GFX703: return {7, 0, 3};
case GK_GFX704: return {7, 0, 4};
case GK_GFX801: return {8, 0, 1};
case GK_GFX802: return {8, 0, 2};
case GK_GFX803: return {8, 0, 3};
case GK_GFX810: return {8, 1, 0};
case GK_GFX900: return {9, 0, 0};
case GK_GFX902: return {9, 0, 2};
case GK_GFX904: return {9, 0, 4};
case GK_GFX906: return {9, 0, 6};
case GK_GFX909: return {9, 0, 9};
default: return {0, 0, 0};
case GK_GFX600: return {6, 0, 0};
case GK_GFX601: return {6, 0, 1};
case GK_GFX700: return {7, 0, 0};
case GK_GFX701: return {7, 0, 1};
case GK_GFX702: return {7, 0, 2};
case GK_GFX703: return {7, 0, 3};
case GK_GFX704: return {7, 0, 4};
case GK_GFX801: return {8, 0, 1};
case GK_GFX802: return {8, 0, 2};
case GK_GFX803: return {8, 0, 3};
case GK_GFX810: return {8, 1, 0};
case GK_GFX900: return {9, 0, 0};
case GK_GFX902: return {9, 0, 2};
case GK_GFX904: return {9, 0, 4};
case GK_GFX906: return {9, 0, 6};
case GK_GFX909: return {9, 0, 9};
case GK_GFX1010: return {10, 1, 0};
default: return {0, 0, 0};
}
}

View File

@ -60,6 +60,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
"Have scratch_* flat memory instructions"
>;
def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
"ScalarFlatScratchInsts",
"true",
"Have s_scratch_* flat memory instructions"
>;
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
"AddNoCarryInsts",
"true",
@ -115,12 +121,72 @@ def FeatureXNACK : SubtargetFeature<"xnack",
"Enable XNACK support"
>;
def FeatureCuMode : SubtargetFeature<"cumode",
"EnableCuMode",
"true",
"Enable CU wavefront execution mode"
>;
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"SGPRInitBug",
"true",
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
"LDSMisalignedBug",
"true",
"Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
>;
def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
"HasVcmpxPermlaneHazard",
"true",
"TODO: describe me"
>;
def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
"HasVMEMtoScalarWriteHazard",
"true",
"VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
>;
def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
"HasSMEMtoVectorWriteHazard",
"true",
"s_load_dword followed by v_cmp page faults"
>;
def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
"HasInstFwdPrefetchBug",
"true",
"S_INST_PREFETCH instruction causes shader to hang"
>;
def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
"HasVcmpxExecWARHazard",
"true",
"V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
>;
def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
"HasLdsBranchVmemWARHazard",
"true",
"Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
>;
def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
"HasNSAtoVMEMBug",
"true",
"MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
>;
def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
"HasFlatSegmentOffsetBug",
"true",
"GFX10 bug, inst_offset ignored in flat segment"
>;
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@ -155,6 +221,12 @@ def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
"Additional instructions for GFX9+"
>;
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
"Additional instructions for GFX10+"
>;
def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
"GFX7GFX8GFX9Insts",
"true",
@ -257,6 +329,12 @@ def FeatureR128A16 : SubtargetFeature<"r128-a16",
"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
>;
def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
"HasNSAEncoding",
"true",
"Support NSA encoding for image instructions"
>;
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@ -299,6 +377,36 @@ def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
"Enable SRAM ECC"
>;
def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
"HasNoSdstCMPX",
"true",
"V_CMPX does not write VCC/SGPR in addition to EXEC"
>;
def FeatureVscnt : SubtargetFeature<"vscnt",
"HasVscnt",
"true",
"Has separate store vscnt counter"
>;
def FeatureRegisterBanking : SubtargetFeature<"register-banking",
"HasRegisterBanking",
"true",
"Has register banking"
>;
def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
"HasVOP3Literal",
"true",
"Can use one literal in VOP3"
>;
def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
"HasNoDataDepHazard",
"true",
"Does not need SW waitstates"
>;
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@ -487,7 +595,24 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
FeatureScalarAtomics, FeatureR128A16
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
]
>;
def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
"gfx10",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureFlatAddressSpace,
FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
FeatureVOP3Literal, FeatureNoDataDepHazard,
FeatureDoesNotSupportSRAMECC
]
>;
@ -601,6 +726,34 @@ def FeatureISAVersion9_0_9 : FeatureSet<
FeatureXNACK,
FeatureCodeObjectV3]>;
// TODO: Organize more features into groups.
def FeatureGroup {
// Bugs present on gfx10.1.
list<SubtargetFeature> GFX10_1_Bugs = [
FeatureVcmpxPermlaneHazard,
FeatureVMEMtoScalarWriteHazard,
FeatureSMEMtoVectorWriteHazard,
FeatureInstFwdPrefetchBug,
FeatureVcmpxExecWARHazard,
FeatureLdsBranchVmemWARHazard,
FeatureNSAtoVMEMBug,
FeatureFlatSegmentOffsetBug
];
}
def FeatureISAVersion10_1_0 : FeatureSet<
!listconcat(FeatureGroup.GFX10_1_Bugs,
[FeatureGFX10,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureNSAEncoding,
FeatureWavefrontSize64,
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
FeatureLdsMisalignedBug,
FeatureCodeObjectV3])>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@ -687,10 +840,21 @@ def isGFX6 :
def isGFX6GFX7 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
def isGFX6GFX7GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
AssemblerPredicate<"!FeatureGCN3Encoding">;
def isGFX7Only :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
def isGFX7GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
def isGFX7GFX8GFX9 :
@ -699,6 +863,13 @@ def isGFX7GFX8GFX9 :
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
def isGFX6GFX7GFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"!FeatureGFX10Insts">;
def isGFX7Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<"FeatureCIInsts">;
@ -724,6 +895,10 @@ def isGFX8GFX9 :
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
def isGFX10Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
AssemblerPredicate<"FeatureGFX10Insts">;
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<"FeatureFlatAddressSpace">;
@ -731,6 +906,8 @@ def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
AssemblerPredicate<"FeatureFlatGlobalInsts">;
def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
AssemblerPredicate<"FeatureFlatScratchInsts">;
def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<"FeatureGFX9Insts">;
@ -766,6 +943,10 @@ def HasSDWA9 :
Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
def HasSDWA10 :
Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
@ -778,9 +959,18 @@ def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
AssemblerPredicate<"FeatureMadMixInsts">;
def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
AssemblerPredicate<"FeatureScalarStores">;
def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
AssemblerPredicate<"FeatureScalarAtomics">;
def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
AssemblerPredicate<"FeatureNoSdstCMPX">;
def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
AssemblerPredicate<"!FeatureNoSdstCMPX">;
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,

View File

@ -181,6 +181,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasApertureRegs(false),
EnableXNACK(false),
EnableCuMode(false),
TrapHandler(false),
EnableHugePrivateBuffer(false),
@ -196,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
CIInsts(false),
GFX8Insts(false),
GFX9Insts(false),
GFX10Insts(false),
GFX7GFX8GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
@ -212,20 +214,37 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAOutModsVOPC(false),
HasDPP(false),
HasR128A16(false),
HasNSAEncoding(false),
HasDLInsts(false),
HasDot1Insts(false),
HasDot2Insts(false),
EnableSRAMECC(false),
DoesNotSupportSRAMECC(false),
HasNoSdstCMPX(false),
HasVscnt(false),
HasRegisterBanking(false),
HasVOP3Literal(false),
HasNoDataDepHazard(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
FlatScratchInsts(false),
ScalarFlatScratchInsts(false),
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
ScalarizeGlobal(false),
HasVcmpxPermlaneHazard(false),
HasVMEMtoScalarWriteHazard(false),
HasSMEMtoVectorWriteHazard(false),
HasInstFwdPrefetchBug(false),
HasVcmpxExecWARHazard(false),
HasLdsBranchVmemWARHazard(false),
HasNSAtoVMEMBug(false),
HasFlatSegmentOffsetBug(false),
FeatureDisable(false),
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
@ -243,6 +262,8 @@ unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
return getLocalMemorySize();
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
if (!WorkGroupsPerCu)
return 0;
unsigned MaxWaves = getMaxWavesPerEU();
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
@ -251,6 +272,8 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
const Function &F) const {
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
if (!WorkGroupsPerCu)
return 0;
unsigned MaxWaves = getMaxWavesPerEU();
unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
@ -271,7 +294,8 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
return std::make_pair(getWavefrontSize() * 2,
std::max(getWavefrontSize() * 4, 256u));
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_LS:
case CallingConv::AMDGPU_HS:
@ -496,7 +520,14 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackLaneMasks = true;
}
bool GCNSubtarget::hasMadF16() const {
return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
}
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return 10;
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
return 10;
@ -543,6 +574,9 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
if (MFI.hasFlatScratchInit()) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).

View File

@ -55,7 +55,8 @@ public:
SOUTHERN_ISLANDS = 4,
SEA_ISLANDS = 5,
VOLCANIC_ISLANDS = 6,
GFX9 = 7
GFX9 = 7,
GFX10 = 8
};
private:
@ -293,6 +294,7 @@ protected:
bool UnalignedBufferAccess;
bool HasApertureRegs;
bool EnableXNACK;
bool EnableCuMode;
bool TrapHandler;
// Used as options.
@ -313,6 +315,7 @@ protected:
bool CIInsts;
bool GFX8Insts;
bool GFX9Insts;
bool GFX10Insts;
bool GFX7GFX8GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@ -329,24 +332,41 @@ protected:
bool HasSDWAOutModsVOPC;
bool HasDPP;
bool HasR128A16;
bool HasNSAEncoding;
bool HasDLInsts;
bool HasDot1Insts;
bool HasDot2Insts;
bool EnableSRAMECC;
bool DoesNotSupportSRAMECC;
bool HasNoSdstCMPX;
bool HasVscnt;
bool HasRegisterBanking;
bool HasVOP3Literal;
bool HasNoDataDepHazard;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
bool FlatScratchInsts;
bool ScalarFlatScratchInsts;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
bool LDSMisalignedBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool ScalarizeGlobal;
bool HasVcmpxPermlaneHazard;
bool HasVMEMtoScalarWriteHazard;
bool HasSMEMtoVectorWriteHazard;
bool HasInstFwdPrefetchBug;
bool HasVcmpxExecWARHazard;
bool HasLdsBranchVmemWARHazard;
bool HasNSAtoVMEMBug;
bool HasFlatSegmentOffsetBug;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
@ -583,6 +603,10 @@ public:
return EnableXNACK;
}
bool isCuModeEnabled() const {
return EnableCuMode;
}
bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
@ -599,6 +623,14 @@ public:
return FlatScratchInsts;
}
bool hasScalarFlatScratchInsts() const {
return ScalarFlatScratchInsts;
}
bool hasFlatSegmentOffsetBug() const {
return HasFlatSegmentOffsetBug;
}
bool hasFlatLgkmVMemCountInOrder() const {
return getGeneration() > GFX9;
}
@ -654,10 +686,6 @@ public:
return HasSDWAOutModsVOPC;
}
bool vmemWriteNeedsExpWaitcnt() const {
return getGeneration() < SEA_ISLANDS;
}
bool hasDLInsts() const {
return HasDLInsts;
}
@ -674,6 +702,30 @@ public:
return EnableSRAMECC;
}
bool hasNoSdstCMPX() const {
return HasNoSdstCMPX;
}
bool hasVscnt() const {
return HasVscnt;
}
bool hasRegisterBanking() const {
return HasRegisterBanking;
}
bool hasVOP3Literal() const {
return HasVOP3Literal;
}
bool hasNoDataDepHazard() const {
return HasNoDataDepHazard;
}
bool vmemWriteNeedsExpWaitcnt() const {
return getGeneration() < SEA_ISLANDS;
}
// Scratch is allocated in 256 dword per wave blocks for the entire
// wavefront. When viewed from the perspecive of an arbitrary workitem, this
// is 4-byte aligned.
@ -782,6 +834,12 @@ public:
return HasR128A16;
}
bool hasNSAEncoding() const {
return HasNSAEncoding;
}
bool hasMadF16() const;
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@ -816,6 +874,38 @@ public:
getGeneration() <= AMDGPUSubtarget::GFX9;
}
bool hasVcmpxPermlaneHazard() const {
return HasVcmpxPermlaneHazard;
}
bool hasVMEMtoScalarWriteHazard() const {
return HasVMEMtoScalarWriteHazard;
}
bool hasSMEMtoVectorWriteHazard() const {
return HasSMEMtoVectorWriteHazard;
}
bool hasLDSMisalignedBug() const {
return LDSMisalignedBug && !EnableCuMode;
}
bool hasInstFwdPrefetchBug() const {
return HasInstFwdPrefetchBug;
}
bool hasVcmpxExecWARHazard() const {
return HasVcmpxExecWARHazard;
}
bool hasLdsBranchVmemWARHazard() const {
return HasLdsBranchVmemWARHazard;
}
bool hasNSAtoVMEMBug() const {
return HasNSAtoVMEMBug;
}
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

View File

@ -999,6 +999,10 @@ public:
return AMDGPU::isGFX9(getSTI());
}
bool isGFX10() const {
return AMDGPU::isGFX10(getSTI());
}
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@ -1407,7 +1411,7 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
else if (AsmParser->isGFX9())
else if (AsmParser->isGFX9() || AsmParser->isGFX10())
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
else
return false;
@ -2953,7 +2957,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
StringSet<> Seen;

View File

@ -164,3 +164,10 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
FeatureISAVersion9_0_9.Features
>;
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
def : ProcessorModel<"gfx1010", GFX10SpeedModel,
FeatureISAVersion10_1_0.Features
>;

View File

@ -60,39 +60,40 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
AMDGPU::GPUKind AK;
switch (ElfMach) {
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
StringRef GPUName = getArchNameAMDGCN(AK);
@ -139,6 +140,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@ -324,6 +326,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
if (IVersion.Major >= 10) {
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
}
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,

View File

@ -523,6 +523,15 @@ enum DppCtrl : unsigned {
#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
#define C_00B848_IEEE_MODE 0xFF7FFFFF
#define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29)
#define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1)
#define C_00B848_WGP_MODE 0xDFFFFFFF
#define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30)
#define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1)
#define C_00B848_MEM_ORDERED 0xBFFFFFFF
#define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31)
#define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1)
#define C_00B848_FWD_PROGRESS 0x7FFFFFFF
// Helpers for setting FLOAT_MODE
@ -553,6 +562,15 @@ enum DppCtrl : unsigned {
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
#define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22)
#define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23)
#define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8
#define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15)
#define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
#define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15)
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
} // End namespace llvm

View File

@ -5591,7 +5591,9 @@ enum SIEncodingFamily {
SDWA = 2,
SDWA9 = 3,
GFX80 = 4,
GFX9 = 5
GFX9 = 5,
GFX10 = 6,
SDWA10 = 7
};
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
@ -5604,6 +5606,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
case AMDGPUSubtarget::GFX10:
return SIEncodingFamily::GFX10;
}
llvm_unreachable("Unknown subtarget generation!");
}

View File

@ -23,6 +23,8 @@ def SIEncodingFamily {
int SDWA9 = 3;
int GFX80 = 4;
int GFX9 = 5;
int GFX10 = 6;
int SDWA10 = 7;
}
//===----------------------------------------------------------------------===//

View File

@ -112,9 +112,9 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
}
foreach Index = 0-15 in {
def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
def TTMP#Index : SIReg<"", 0>;
def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
def TTMP#Index : SIReg<"", 0>;
}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
@ -311,8 +311,8 @@ class TmpRegTuples<string tgt,
getSubRegs<size>.ret>;
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
}
foreach Index = {0, 4, 8, 12} in {
@ -321,7 +321,7 @@ foreach Index = {0, 4, 8, 12} in {
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#
_TTMP#!add(Index,2)#
_TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
_TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
}
foreach Index = {0, 4, 8} in {
@ -338,7 +338,7 @@ foreach Index = {0, 4, 8} in {
_TTMP#!add(Index,4)#
_TTMP#!add(Index,5)#
_TTMP#!add(Index,6)#
_TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
_TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
}
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
@ -348,12 +348,12 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
TmpRegTuplesBase<0, 16,
[TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
[TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
// VGPR 32-bit registers

View File

@ -37,6 +37,9 @@ def WriteDouble : SchedWrite;
// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
// Conversion to or from f64 instruction
def WriteDoubleCvt : SchedWrite;
// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;
@ -61,6 +64,7 @@ class SISchedMachineModel : SchedMachineModel {
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
def GFX10SpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
@ -81,6 +85,9 @@ def HWVMEM : ProcResource<1> {
def HWVALU : ProcResource<1> {
let BufferSize = 1;
}
def HWRC : ProcResource<1> { // Register destination cache
let BufferSize = 1;
}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
int latency> : WriteRes<write, resources> {
@ -124,6 +131,7 @@ defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@ -136,7 +144,32 @@ defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
} // End SchedModel = SIQuarterSpeedModel
let SchedModel = GFX10SpeedModel in {
// The latency values are 1 / (operations / cycle).
// Add 1 stall cycle for VGPR read.
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
def : InstRW<[WriteCopy], (instrs COPY)>;
} // End SchedModel = GFX10SpeedModel

View File

@ -435,11 +435,21 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
if (Version.Major >= 10) {
Header.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
}
}
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
const MCSubtargetInfo *STI) {
IsaVersion Version = getIsaVersion(STI->getCPU());
amdhsa::kernel_descriptor_t KD;
memset(&KD, 0, sizeof(KD));
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
@ -449,6 +459,13 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
if (Version.Major >= 10) {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
}
return KD;
}
@ -679,6 +696,10 @@ bool isGFX9(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
bool isGFX10(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
}
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
@ -704,46 +725,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
CASE_CI_VI(FLAT_SCR) \
CASE_CI_VI(FLAT_SCR_LO) \
CASE_CI_VI(FLAT_SCR_HI) \
CASE_VI_GFX9(TTMP0) \
CASE_VI_GFX9(TTMP1) \
CASE_VI_GFX9(TTMP2) \
CASE_VI_GFX9(TTMP3) \
CASE_VI_GFX9(TTMP4) \
CASE_VI_GFX9(TTMP5) \
CASE_VI_GFX9(TTMP6) \
CASE_VI_GFX9(TTMP7) \
CASE_VI_GFX9(TTMP8) \
CASE_VI_GFX9(TTMP9) \
CASE_VI_GFX9(TTMP10) \
CASE_VI_GFX9(TTMP11) \
CASE_VI_GFX9(TTMP12) \
CASE_VI_GFX9(TTMP13) \
CASE_VI_GFX9(TTMP14) \
CASE_VI_GFX9(TTMP15) \
CASE_VI_GFX9(TTMP0_TTMP1) \
CASE_VI_GFX9(TTMP2_TTMP3) \
CASE_VI_GFX9(TTMP4_TTMP5) \
CASE_VI_GFX9(TTMP6_TTMP7) \
CASE_VI_GFX9(TTMP8_TTMP9) \
CASE_VI_GFX9(TTMP10_TTMP11) \
CASE_VI_GFX9(TTMP12_TTMP13) \
CASE_VI_GFX9(TTMP14_TTMP15) \
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
CASE_VI_GFX9_GFX10(TTMP0) \
CASE_VI_GFX9_GFX10(TTMP1) \
CASE_VI_GFX9_GFX10(TTMP2) \
CASE_VI_GFX9_GFX10(TTMP3) \
CASE_VI_GFX9_GFX10(TTMP4) \
CASE_VI_GFX9_GFX10(TTMP5) \
CASE_VI_GFX9_GFX10(TTMP6) \
CASE_VI_GFX9_GFX10(TTMP7) \
CASE_VI_GFX9_GFX10(TTMP8) \
CASE_VI_GFX9_GFX10(TTMP9) \
CASE_VI_GFX9_GFX10(TTMP10) \
CASE_VI_GFX9_GFX10(TTMP11) \
CASE_VI_GFX9_GFX10(TTMP12) \
CASE_VI_GFX9_GFX10(TTMP13) \
CASE_VI_GFX9_GFX10(TTMP14) \
CASE_VI_GFX9_GFX10(TTMP15) \
CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
}
#define CASE_CI_VI(node) \
assert(!isSI(STI)); \
case node: return isCI(STI) ? node##_ci : node##_vi;
#define CASE_VI_GFX9(node) \
case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
#define CASE_VI_GFX9_GFX10(node) \
case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
@ -752,17 +773,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
}
#undef CASE_CI_VI
#undef CASE_VI_GFX9
#undef CASE_VI_GFX9_GFX10
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
unsigned mc2PseudoReg(unsigned Reg) {
MAP_REG2REG
}
#undef CASE_CI_VI
#undef CASE_VI_GFX9
#undef CASE_VI_GFX9_GFX10
#undef MAP_REG2REG
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@ -1030,5 +1051,6 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}
} // namespace AMDGPU
} // namespace llvm

View File

@ -244,7 +244,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
const MCSubtargetInfo *STI);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
@ -398,6 +399,7 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);

View File

@ -82,6 +82,9 @@ COMPPGM1(priv, compute_pgm_rsrc1_priv, PRIV
COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP),
COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE),
COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE),
COMPPGM1(enable_wgp_mode, compute_pgm_rsrc1_wgp_mode, WGP_MODE),
COMPPGM1(enable_mem_ordered, compute_pgm_rsrc1_mem_ordered, MEM_ORDERED),
COMPPGM1(enable_fwd_progress, compute_pgm_rsrc1_fwd_progress, FWD_PROGRESS),
// TODO: bulky
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),

View File

@ -47,6 +47,7 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; ARCH-R600: Arch: r600
; ARCH-GCN: Arch: amdgcn
@ -87,6 +88,7 @@
; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; ALL: ]
define amdgpu_kernel void @elf_header() {

View File

@ -24,6 +24,7 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
; HSA: .hsa_code_object_version 2,1
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
@ -42,3 +43,4 @@
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"

View File

@ -1275,6 +1275,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};