forked from OSchip/llvm-project
[AMDGPU] Add gfx1010 target definitions
Differential Revision: https://reviews.llvm.org/D61041 llvm-svn: 359113
This commit is contained in:
parent
c60a4099a1
commit
cee607e414
|
@ -703,14 +703,17 @@ enum : unsigned {
|
|||
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
|
||||
// AMDGCN GFX10.
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
|
||||
|
||||
// Reserved for AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED2 = 0x032,
|
||||
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1010,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
|
|
|
@ -88,8 +88,11 @@ enum : int32_t {
|
|||
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
|
||||
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
|
||||
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
|
||||
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
|
||||
COMPUTE_PGM_RSRC1(RESERVED0, 27, 5),
|
||||
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
|
||||
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
|
||||
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
|
||||
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
|
||||
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
|
||||
};
|
||||
#undef COMPUTE_PGM_RSRC1
|
||||
|
||||
|
@ -119,6 +122,15 @@ enum : int32_t {
|
|||
};
|
||||
#undef COMPUTE_PGM_RSRC2
|
||||
|
||||
// Compute program resource register 3. Must match hardware definition.
|
||||
#define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
|
||||
enum : int32_t {
|
||||
COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
|
||||
COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
|
||||
};
|
||||
#undef COMPUTE_PGM_RSRC3
|
||||
|
||||
// Kernel code properties. Must be kept backwards compatible.
|
||||
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
|
||||
|
@ -130,7 +142,8 @@ enum : int32_t {
|
|||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
|
||||
KERNEL_CODE_PROPERTY(RESERVED0, 7, 9),
|
||||
KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
|
||||
KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
|
||||
};
|
||||
#undef KERNEL_CODE_PROPERTY
|
||||
|
||||
|
@ -140,7 +153,8 @@ struct kernel_descriptor_t {
|
|||
uint32_t private_segment_fixed_size;
|
||||
uint8_t reserved0[8];
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
uint8_t reserved1[24];
|
||||
uint8_t reserved1[20];
|
||||
uint32_t compute_pgm_rsrc3; // GFX10+
|
||||
uint32_t compute_pgm_rsrc1;
|
||||
uint32_t compute_pgm_rsrc2;
|
||||
uint16_t kernel_code_properties;
|
||||
|
@ -165,6 +179,9 @@ static_assert(
|
|||
static_assert(
|
||||
offsetof(kernel_descriptor_t, reserved1) == 24,
|
||||
"invalid offset for reserved1");
|
||||
static_assert(
|
||||
offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
|
||||
"invalid offset for compute_pgm_rsrc3");
|
||||
static_assert(
|
||||
offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
|
||||
"invalid offset for compute_pgm_rsrc1");
|
||||
|
|
|
@ -123,8 +123,10 @@ enum GPUKind : uint32_t {
|
|||
GK_GFX906 = 63,
|
||||
GK_GFX909 = 65,
|
||||
|
||||
GK_GFX1010 = 71,
|
||||
|
||||
GK_AMDGCN_FIRST = GK_GFX600,
|
||||
GK_AMDGCN_LAST = GK_GFX909,
|
||||
GK_AMDGCN_LAST = GK_GFX1010,
|
||||
};
|
||||
|
||||
/// Instruction set architecture version.
|
||||
|
|
|
@ -411,6 +411,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
|
|||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
|
||||
BCase(EF_AMDGPU_XNACK);
|
||||
BCase(EF_AMDGPU_SRAM_ECC);
|
||||
break;
|
||||
|
|
|
@ -62,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
|
|||
|
||||
// This table should be sorted by the value of GPUKind
|
||||
// Don't bother listing the implicitly true features
|
||||
constexpr GPUInfo AMDGCNGPUs[33] = {
|
||||
constexpr GPUInfo AMDGCNGPUs[34] = {
|
||||
// Name Canonical Kind Features
|
||||
// Name
|
||||
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
||||
|
@ -98,6 +98,7 @@ constexpr GPUInfo AMDGCNGPUs[33] = {
|
|||
{{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
};
|
||||
|
||||
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
|
||||
|
@ -179,22 +180,23 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
|||
}
|
||||
|
||||
switch (AK) {
|
||||
case GK_GFX600: return {6, 0, 0};
|
||||
case GK_GFX601: return {6, 0, 1};
|
||||
case GK_GFX700: return {7, 0, 0};
|
||||
case GK_GFX701: return {7, 0, 1};
|
||||
case GK_GFX702: return {7, 0, 2};
|
||||
case GK_GFX703: return {7, 0, 3};
|
||||
case GK_GFX704: return {7, 0, 4};
|
||||
case GK_GFX801: return {8, 0, 1};
|
||||
case GK_GFX802: return {8, 0, 2};
|
||||
case GK_GFX803: return {8, 0, 3};
|
||||
case GK_GFX810: return {8, 1, 0};
|
||||
case GK_GFX900: return {9, 0, 0};
|
||||
case GK_GFX902: return {9, 0, 2};
|
||||
case GK_GFX904: return {9, 0, 4};
|
||||
case GK_GFX906: return {9, 0, 6};
|
||||
case GK_GFX909: return {9, 0, 9};
|
||||
default: return {0, 0, 0};
|
||||
case GK_GFX600: return {6, 0, 0};
|
||||
case GK_GFX601: return {6, 0, 1};
|
||||
case GK_GFX700: return {7, 0, 0};
|
||||
case GK_GFX701: return {7, 0, 1};
|
||||
case GK_GFX702: return {7, 0, 2};
|
||||
case GK_GFX703: return {7, 0, 3};
|
||||
case GK_GFX704: return {7, 0, 4};
|
||||
case GK_GFX801: return {8, 0, 1};
|
||||
case GK_GFX802: return {8, 0, 2};
|
||||
case GK_GFX803: return {8, 0, 3};
|
||||
case GK_GFX810: return {8, 1, 0};
|
||||
case GK_GFX900: return {9, 0, 0};
|
||||
case GK_GFX902: return {9, 0, 2};
|
||||
case GK_GFX904: return {9, 0, 4};
|
||||
case GK_GFX906: return {9, 0, 6};
|
||||
case GK_GFX909: return {9, 0, 9};
|
||||
case GK_GFX1010: return {10, 1, 0};
|
||||
default: return {0, 0, 0};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,6 +60,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
|
|||
"Have scratch_* flat memory instructions"
|
||||
>;
|
||||
|
||||
def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
|
||||
"ScalarFlatScratchInsts",
|
||||
"true",
|
||||
"Have s_scratch_* flat memory instructions"
|
||||
>;
|
||||
|
||||
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
|
||||
"AddNoCarryInsts",
|
||||
"true",
|
||||
|
@ -115,12 +121,72 @@ def FeatureXNACK : SubtargetFeature<"xnack",
|
|||
"Enable XNACK support"
|
||||
>;
|
||||
|
||||
def FeatureCuMode : SubtargetFeature<"cumode",
|
||||
"EnableCuMode",
|
||||
"true",
|
||||
"Enable CU wavefront execution mode"
|
||||
>;
|
||||
|
||||
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
|
||||
"SGPRInitBug",
|
||||
"true",
|
||||
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
|
||||
>;
|
||||
|
||||
def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
|
||||
"LDSMisalignedBug",
|
||||
"true",
|
||||
"Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
|
||||
>;
|
||||
|
||||
def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
|
||||
"HasVcmpxPermlaneHazard",
|
||||
"true",
|
||||
"TODO: describe me"
|
||||
>;
|
||||
|
||||
def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
|
||||
"HasVMEMtoScalarWriteHazard",
|
||||
"true",
|
||||
"VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
|
||||
>;
|
||||
|
||||
def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
|
||||
"HasSMEMtoVectorWriteHazard",
|
||||
"true",
|
||||
"s_load_dword followed by v_cmp page faults"
|
||||
>;
|
||||
|
||||
def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
|
||||
"HasInstFwdPrefetchBug",
|
||||
"true",
|
||||
"S_INST_PREFETCH instruction causes shader to hang"
|
||||
>;
|
||||
|
||||
def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
|
||||
"HasVcmpxExecWARHazard",
|
||||
"true",
|
||||
"V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
|
||||
>;
|
||||
|
||||
def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
|
||||
"HasLdsBranchVmemWARHazard",
|
||||
"true",
|
||||
"Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
|
||||
>;
|
||||
|
||||
def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
|
||||
"HasNSAtoVMEMBug",
|
||||
"true",
|
||||
"MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
|
||||
>;
|
||||
|
||||
def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
|
||||
"HasFlatSegmentOffsetBug",
|
||||
"true",
|
||||
"GFX10 bug, inst_offset ignored in flat segment"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
|
||||
"ldsbankcount"#Value,
|
||||
"LDSBankCount",
|
||||
|
@ -155,6 +221,12 @@ def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
|
|||
"Additional instructions for GFX9+"
|
||||
>;
|
||||
|
||||
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
|
||||
"GFX10Insts",
|
||||
"true",
|
||||
"Additional instructions for GFX10+"
|
||||
>;
|
||||
|
||||
def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
|
||||
"GFX7GFX8GFX9Insts",
|
||||
"true",
|
||||
|
@ -257,6 +329,12 @@ def FeatureR128A16 : SubtargetFeature<"r128-a16",
|
|||
"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
|
||||
>;
|
||||
|
||||
def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
|
||||
"HasNSAEncoding",
|
||||
"true",
|
||||
"Support NSA encoding for image instructions"
|
||||
>;
|
||||
|
||||
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
|
||||
"HasIntClamp",
|
||||
"true",
|
||||
|
@ -299,6 +377,36 @@ def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
|
|||
"Enable SRAM ECC"
|
||||
>;
|
||||
|
||||
def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
|
||||
"HasNoSdstCMPX",
|
||||
"true",
|
||||
"V_CMPX does not write VCC/SGPR in addition to EXEC"
|
||||
>;
|
||||
|
||||
def FeatureVscnt : SubtargetFeature<"vscnt",
|
||||
"HasVscnt",
|
||||
"true",
|
||||
"Has separate store vscnt counter"
|
||||
>;
|
||||
|
||||
def FeatureRegisterBanking : SubtargetFeature<"register-banking",
|
||||
"HasRegisterBanking",
|
||||
"true",
|
||||
"Has register banking"
|
||||
>;
|
||||
|
||||
def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
|
||||
"HasVOP3Literal",
|
||||
"true",
|
||||
"Can use one literal in VOP3"
|
||||
>;
|
||||
|
||||
def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
|
||||
"HasNoDataDepHazard",
|
||||
"true",
|
||||
"Does not need SW waitstates"
|
||||
>;
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
|
@ -487,7 +595,24 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
|
|||
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
|
||||
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
|
||||
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
|
||||
FeatureScalarAtomics, FeatureR128A16
|
||||
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
|
||||
]
|
||||
>;
|
||||
|
||||
def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
|
||||
"gfx10",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureFlatAddressSpace,
|
||||
FeatureCIInsts, Feature16BitInsts,
|
||||
FeatureSMemRealTime, FeatureInv2PiInlineImm,
|
||||
FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
|
||||
FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
|
||||
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
|
||||
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
|
||||
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
|
||||
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
|
||||
FeatureVOP3Literal, FeatureNoDataDepHazard,
|
||||
FeatureDoesNotSupportSRAMECC
|
||||
]
|
||||
>;
|
||||
|
||||
|
@ -601,6 +726,34 @@ def FeatureISAVersion9_0_9 : FeatureSet<
|
|||
FeatureXNACK,
|
||||
FeatureCodeObjectV3]>;
|
||||
|
||||
// TODO: Organize more features into groups.
|
||||
def FeatureGroup {
|
||||
// Bugs present on gfx10.1.
|
||||
list<SubtargetFeature> GFX10_1_Bugs = [
|
||||
FeatureVcmpxPermlaneHazard,
|
||||
FeatureVMEMtoScalarWriteHazard,
|
||||
FeatureSMEMtoVectorWriteHazard,
|
||||
FeatureInstFwdPrefetchBug,
|
||||
FeatureVcmpxExecWARHazard,
|
||||
FeatureLdsBranchVmemWARHazard,
|
||||
FeatureNSAtoVMEMBug,
|
||||
FeatureFlatSegmentOffsetBug
|
||||
];
|
||||
}
|
||||
|
||||
def FeatureISAVersion10_1_0 : FeatureSet<
|
||||
!listconcat(FeatureGroup.GFX10_1_Bugs,
|
||||
[FeatureGFX10,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize64,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
FeatureScalarFlatScratchInsts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureCodeObjectV3])>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
@ -687,10 +840,21 @@ def isGFX6 :
|
|||
def isGFX6GFX7 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
|
||||
AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
|
||||
|
||||
def isGFX6GFX7GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<"!FeatureGCN3Encoding">;
|
||||
|
||||
def isGFX7Only :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
|
||||
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
|
||||
|
||||
def isGFX7GFX10 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
|
||||
|
||||
def isGFX7GFX8GFX9 :
|
||||
|
@ -699,6 +863,13 @@ def isGFX7GFX8GFX9 :
|
|||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
|
||||
AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
|
||||
|
||||
def isGFX6GFX7GFX8GFX9 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
|
||||
AssemblerPredicate<"!FeatureGFX10Insts">;
|
||||
|
||||
def isGFX7Plus :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
|
||||
AssemblerPredicate<"FeatureCIInsts">;
|
||||
|
@ -724,6 +895,10 @@ def isGFX8GFX9 :
|
|||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
|
||||
AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
|
||||
|
||||
def isGFX10Plus :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
|
||||
AssemblerPredicate<"FeatureGFX10Insts">;
|
||||
|
||||
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
|
||||
AssemblerPredicate<"FeatureFlatAddressSpace">;
|
||||
|
||||
|
@ -731,6 +906,8 @@ def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
|
|||
AssemblerPredicate<"FeatureFlatGlobalInsts">;
|
||||
def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
|
||||
AssemblerPredicate<"FeatureFlatScratchInsts">;
|
||||
def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
|
||||
AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
|
||||
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
|
||||
AssemblerPredicate<"FeatureGFX9Insts">;
|
||||
|
||||
|
@ -766,6 +943,10 @@ def HasSDWA9 :
|
|||
Predicate<"Subtarget->hasSDWA()">,
|
||||
AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
|
||||
|
||||
def HasSDWA10 :
|
||||
Predicate<"Subtarget->hasSDWA()">,
|
||||
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
|
||||
|
||||
def HasDPP : Predicate<"Subtarget->hasDPP()">,
|
||||
AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
|
||||
|
||||
|
@ -778,9 +959,18 @@ def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
|
|||
def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
|
||||
AssemblerPredicate<"FeatureMadMixInsts">;
|
||||
|
||||
def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
|
||||
AssemblerPredicate<"FeatureScalarStores">;
|
||||
|
||||
def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
|
||||
AssemblerPredicate<"FeatureScalarAtomics">;
|
||||
|
||||
def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
|
||||
AssemblerPredicate<"FeatureNoSdstCMPX">;
|
||||
|
||||
def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
|
||||
AssemblerPredicate<"!FeatureNoSdstCMPX">;
|
||||
|
||||
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
|
||||
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
|
||||
def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
|
||||
|
|
|
@ -181,6 +181,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
|
||||
HasApertureRegs(false),
|
||||
EnableXNACK(false),
|
||||
EnableCuMode(false),
|
||||
TrapHandler(false),
|
||||
|
||||
EnableHugePrivateBuffer(false),
|
||||
|
@ -196,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
CIInsts(false),
|
||||
GFX8Insts(false),
|
||||
GFX9Insts(false),
|
||||
GFX10Insts(false),
|
||||
GFX7GFX8GFX9Insts(false),
|
||||
SGPRInitBug(false),
|
||||
HasSMemRealTime(false),
|
||||
|
@ -212,20 +214,37 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
HasSDWAOutModsVOPC(false),
|
||||
HasDPP(false),
|
||||
HasR128A16(false),
|
||||
HasNSAEncoding(false),
|
||||
HasDLInsts(false),
|
||||
HasDot1Insts(false),
|
||||
HasDot2Insts(false),
|
||||
EnableSRAMECC(false),
|
||||
DoesNotSupportSRAMECC(false),
|
||||
HasNoSdstCMPX(false),
|
||||
HasVscnt(false),
|
||||
HasRegisterBanking(false),
|
||||
HasVOP3Literal(false),
|
||||
HasNoDataDepHazard(false),
|
||||
FlatAddressSpace(false),
|
||||
FlatInstOffsets(false),
|
||||
FlatGlobalInsts(false),
|
||||
FlatScratchInsts(false),
|
||||
ScalarFlatScratchInsts(false),
|
||||
AddNoCarryInsts(false),
|
||||
HasUnpackedD16VMem(false),
|
||||
LDSMisalignedBug(false),
|
||||
|
||||
ScalarizeGlobal(false),
|
||||
|
||||
HasVcmpxPermlaneHazard(false),
|
||||
HasVMEMtoScalarWriteHazard(false),
|
||||
HasSMEMtoVectorWriteHazard(false),
|
||||
HasInstFwdPrefetchBug(false),
|
||||
HasVcmpxExecWARHazard(false),
|
||||
HasLdsBranchVmemWARHazard(false),
|
||||
HasNSAtoVMEMBug(false),
|
||||
HasFlatSegmentOffsetBug(false),
|
||||
|
||||
FeatureDisable(false),
|
||||
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
|
||||
TLInfo(TM, *this),
|
||||
|
@ -243,6 +262,8 @@ unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
|||
return getLocalMemorySize();
|
||||
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
|
||||
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
|
||||
if (!WorkGroupsPerCu)
|
||||
return 0;
|
||||
unsigned MaxWaves = getMaxWavesPerEU();
|
||||
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
|
||||
}
|
||||
|
@ -251,6 +272,8 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
|||
const Function &F) const {
|
||||
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
|
||||
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
|
||||
if (!WorkGroupsPerCu)
|
||||
return 0;
|
||||
unsigned MaxWaves = getMaxWavesPerEU();
|
||||
unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
|
||||
unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
|
||||
|
@ -271,7 +294,8 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
|||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
case CallingConv::SPIR_KERNEL:
|
||||
return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
|
||||
return std::make_pair(getWavefrontSize() * 2,
|
||||
std::max(getWavefrontSize() * 4, 256u));
|
||||
case CallingConv::AMDGPU_VS:
|
||||
case CallingConv::AMDGPU_LS:
|
||||
case CallingConv::AMDGPU_HS:
|
||||
|
@ -496,7 +520,14 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
|||
Policy.ShouldTrackLaneMasks = true;
|
||||
}
|
||||
|
||||
bool GCNSubtarget::hasMadF16() const {
|
||||
return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::GFX10)
|
||||
return 10;
|
||||
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
if (SGPRs <= 80)
|
||||
return 10;
|
||||
|
@ -543,6 +574,9 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
|
|||
|
||||
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (getGeneration() >= AMDGPUSubtarget::GFX10)
|
||||
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
|
||||
|
||||
if (MFI.hasFlatScratchInit()) {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
|
||||
|
|
|
@ -55,7 +55,8 @@ public:
|
|||
SOUTHERN_ISLANDS = 4,
|
||||
SEA_ISLANDS = 5,
|
||||
VOLCANIC_ISLANDS = 6,
|
||||
GFX9 = 7
|
||||
GFX9 = 7,
|
||||
GFX10 = 8
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -293,6 +294,7 @@ protected:
|
|||
bool UnalignedBufferAccess;
|
||||
bool HasApertureRegs;
|
||||
bool EnableXNACK;
|
||||
bool EnableCuMode;
|
||||
bool TrapHandler;
|
||||
|
||||
// Used as options.
|
||||
|
@ -313,6 +315,7 @@ protected:
|
|||
bool CIInsts;
|
||||
bool GFX8Insts;
|
||||
bool GFX9Insts;
|
||||
bool GFX10Insts;
|
||||
bool GFX7GFX8GFX9Insts;
|
||||
bool SGPRInitBug;
|
||||
bool HasSMemRealTime;
|
||||
|
@ -329,24 +332,41 @@ protected:
|
|||
bool HasSDWAOutModsVOPC;
|
||||
bool HasDPP;
|
||||
bool HasR128A16;
|
||||
bool HasNSAEncoding;
|
||||
bool HasDLInsts;
|
||||
bool HasDot1Insts;
|
||||
bool HasDot2Insts;
|
||||
bool EnableSRAMECC;
|
||||
bool DoesNotSupportSRAMECC;
|
||||
bool HasNoSdstCMPX;
|
||||
bool HasVscnt;
|
||||
bool HasRegisterBanking;
|
||||
bool HasVOP3Literal;
|
||||
bool HasNoDataDepHazard;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatInstOffsets;
|
||||
bool FlatGlobalInsts;
|
||||
bool FlatScratchInsts;
|
||||
bool ScalarFlatScratchInsts;
|
||||
bool AddNoCarryInsts;
|
||||
bool HasUnpackedD16VMem;
|
||||
bool R600ALUInst;
|
||||
bool CaymanISA;
|
||||
bool CFALUBug;
|
||||
bool LDSMisalignedBug;
|
||||
bool HasVertexCache;
|
||||
short TexVTXClauseSize;
|
||||
bool ScalarizeGlobal;
|
||||
|
||||
bool HasVcmpxPermlaneHazard;
|
||||
bool HasVMEMtoScalarWriteHazard;
|
||||
bool HasSMEMtoVectorWriteHazard;
|
||||
bool HasInstFwdPrefetchBug;
|
||||
bool HasVcmpxExecWARHazard;
|
||||
bool HasLdsBranchVmemWARHazard;
|
||||
bool HasNSAtoVMEMBug;
|
||||
bool HasFlatSegmentOffsetBug;
|
||||
|
||||
// Dummy feature to use for assembler in tablegen.
|
||||
bool FeatureDisable;
|
||||
|
||||
|
@ -583,6 +603,10 @@ public:
|
|||
return EnableXNACK;
|
||||
}
|
||||
|
||||
bool isCuModeEnabled() const {
|
||||
return EnableCuMode;
|
||||
}
|
||||
|
||||
bool hasFlatAddressSpace() const {
|
||||
return FlatAddressSpace;
|
||||
}
|
||||
|
@ -599,6 +623,14 @@ public:
|
|||
return FlatScratchInsts;
|
||||
}
|
||||
|
||||
bool hasScalarFlatScratchInsts() const {
|
||||
return ScalarFlatScratchInsts;
|
||||
}
|
||||
|
||||
bool hasFlatSegmentOffsetBug() const {
|
||||
return HasFlatSegmentOffsetBug;
|
||||
}
|
||||
|
||||
bool hasFlatLgkmVMemCountInOrder() const {
|
||||
return getGeneration() > GFX9;
|
||||
}
|
||||
|
@ -654,10 +686,6 @@ public:
|
|||
return HasSDWAOutModsVOPC;
|
||||
}
|
||||
|
||||
bool vmemWriteNeedsExpWaitcnt() const {
|
||||
return getGeneration() < SEA_ISLANDS;
|
||||
}
|
||||
|
||||
bool hasDLInsts() const {
|
||||
return HasDLInsts;
|
||||
}
|
||||
|
@ -674,6 +702,30 @@ public:
|
|||
return EnableSRAMECC;
|
||||
}
|
||||
|
||||
bool hasNoSdstCMPX() const {
|
||||
return HasNoSdstCMPX;
|
||||
}
|
||||
|
||||
bool hasVscnt() const {
|
||||
return HasVscnt;
|
||||
}
|
||||
|
||||
bool hasRegisterBanking() const {
|
||||
return HasRegisterBanking;
|
||||
}
|
||||
|
||||
bool hasVOP3Literal() const {
|
||||
return HasVOP3Literal;
|
||||
}
|
||||
|
||||
bool hasNoDataDepHazard() const {
|
||||
return HasNoDataDepHazard;
|
||||
}
|
||||
|
||||
bool vmemWriteNeedsExpWaitcnt() const {
|
||||
return getGeneration() < SEA_ISLANDS;
|
||||
}
|
||||
|
||||
// Scratch is allocated in 256 dword per wave blocks for the entire
|
||||
// wavefront. When viewed from the perspecive of an arbitrary workitem, this
|
||||
// is 4-byte aligned.
|
||||
|
@ -782,6 +834,12 @@ public:
|
|||
return HasR128A16;
|
||||
}
|
||||
|
||||
bool hasNSAEncoding() const {
|
||||
return HasNSAEncoding;
|
||||
}
|
||||
|
||||
bool hasMadF16() const;
|
||||
|
||||
bool enableSIScheduler() const {
|
||||
return EnableSIScheduler;
|
||||
}
|
||||
|
@ -816,6 +874,38 @@ public:
|
|||
getGeneration() <= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
bool hasVcmpxPermlaneHazard() const {
|
||||
return HasVcmpxPermlaneHazard;
|
||||
}
|
||||
|
||||
bool hasVMEMtoScalarWriteHazard() const {
|
||||
return HasVMEMtoScalarWriteHazard;
|
||||
}
|
||||
|
||||
bool hasSMEMtoVectorWriteHazard() const {
|
||||
return HasSMEMtoVectorWriteHazard;
|
||||
}
|
||||
|
||||
bool hasLDSMisalignedBug() const {
|
||||
return LDSMisalignedBug && !EnableCuMode;
|
||||
}
|
||||
|
||||
bool hasInstFwdPrefetchBug() const {
|
||||
return HasInstFwdPrefetchBug;
|
||||
}
|
||||
|
||||
bool hasVcmpxExecWARHazard() const {
|
||||
return HasVcmpxExecWARHazard;
|
||||
}
|
||||
|
||||
bool hasLdsBranchVmemWARHazard() const {
|
||||
return HasLdsBranchVmemWARHazard;
|
||||
}
|
||||
|
||||
bool hasNSAtoVMEMBug() const {
|
||||
return HasNSAtoVMEMBug;
|
||||
}
|
||||
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
|
||||
/// SGPRs
|
||||
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
|
||||
|
|
|
@ -999,6 +999,10 @@ public:
|
|||
return AMDGPU::isGFX9(getSTI());
|
||||
}
|
||||
|
||||
bool isGFX10() const {
|
||||
return AMDGPU::isGFX10(getSTI());
|
||||
}
|
||||
|
||||
bool hasInv2PiInlineImm() const {
|
||||
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
|
||||
}
|
||||
|
@ -1407,7 +1411,7 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
|
|||
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
|
||||
if (AsmParser->isVI())
|
||||
return isVReg32();
|
||||
else if (AsmParser->isGFX9())
|
||||
else if (AsmParser->isGFX9() || AsmParser->isGFX10())
|
||||
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
|
||||
else
|
||||
return false;
|
||||
|
@ -2953,7 +2957,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
|||
if (getParser().parseIdentifier(KernelName))
|
||||
return true;
|
||||
|
||||
kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
|
||||
kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
|
||||
|
||||
StringSet<> Seen;
|
||||
|
||||
|
|
|
@ -164,3 +164,10 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
|
|||
FeatureISAVersion9_0_9.Features
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GCN GFX10.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : ProcessorModel<"gfx1010", GFX10SpeedModel,
|
||||
FeatureISAVersion10_1_0.Features
|
||||
>;
|
||||
|
|
|
@ -60,39 +60,40 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
|
|||
AMDGPU::GPUKind AK;
|
||||
|
||||
switch (ElfMach) {
|
||||
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
|
||||
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
|
||||
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
|
||||
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
|
||||
}
|
||||
|
||||
StringRef GPUName = getArchNameAMDGCN(AK);
|
||||
|
@ -139,6 +140,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
|
|||
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
|
||||
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
|
||||
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
|
||||
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
|
||||
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
|
||||
}
|
||||
|
||||
|
@ -324,6 +326,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
|||
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
|
||||
compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
|
||||
if (IVersion.Major >= 10) {
|
||||
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
|
||||
compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
|
||||
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
|
||||
compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
|
||||
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
|
||||
compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
|
||||
}
|
||||
PRINT_FIELD(
|
||||
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
|
||||
compute_pgm_rsrc2,
|
||||
|
|
|
@ -523,6 +523,15 @@ enum DppCtrl : unsigned {
|
|||
#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
|
||||
#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
|
||||
#define C_00B848_IEEE_MODE 0xFF7FFFFF
|
||||
#define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29)
|
||||
#define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1)
|
||||
#define C_00B848_WGP_MODE 0xDFFFFFFF
|
||||
#define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30)
|
||||
#define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1)
|
||||
#define C_00B848_MEM_ORDERED 0xBFFFFFFF
|
||||
#define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31)
|
||||
#define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1)
|
||||
#define C_00B848_FWD_PROGRESS 0x7FFFFFFF
|
||||
|
||||
|
||||
// Helpers for setting FLOAT_MODE
|
||||
|
@ -553,6 +562,15 @@ enum DppCtrl : unsigned {
|
|||
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
|
||||
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||
|
||||
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
|
||||
#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
|
||||
#define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22)
|
||||
#define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23)
|
||||
#define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8
|
||||
#define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15)
|
||||
#define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
|
||||
#define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15)
|
||||
|
||||
#define R_SPILLED_SGPRS 0x4
|
||||
#define R_SPILLED_VGPRS 0x8
|
||||
} // End namespace llvm
|
||||
|
|
|
@ -5591,7 +5591,9 @@ enum SIEncodingFamily {
|
|||
SDWA = 2,
|
||||
SDWA9 = 3,
|
||||
GFX80 = 4,
|
||||
GFX9 = 5
|
||||
GFX9 = 5,
|
||||
GFX10 = 6,
|
||||
SDWA10 = 7
|
||||
};
|
||||
|
||||
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
|
||||
|
@ -5604,6 +5606,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
|
|||
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
||||
case AMDGPUSubtarget::GFX9:
|
||||
return SIEncodingFamily::VI;
|
||||
case AMDGPUSubtarget::GFX10:
|
||||
return SIEncodingFamily::GFX10;
|
||||
}
|
||||
llvm_unreachable("Unknown subtarget generation!");
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ def SIEncodingFamily {
|
|||
int SDWA9 = 3;
|
||||
int GFX80 = 4;
|
||||
int GFX9 = 5;
|
||||
int GFX10 = 6;
|
||||
int SDWA10 = 7;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -112,9 +112,9 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
|
|||
}
|
||||
|
||||
foreach Index = 0-15 in {
|
||||
def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
|
||||
def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
|
||||
def TTMP#Index : SIReg<"", 0>;
|
||||
def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
|
||||
def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
|
||||
def TTMP#Index : SIReg<"", 0>;
|
||||
}
|
||||
|
||||
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
|
||||
|
@ -311,8 +311,8 @@ class TmpRegTuples<string tgt,
|
|||
getSubRegs<size>.ret>;
|
||||
|
||||
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
|
||||
}
|
||||
|
||||
foreach Index = {0, 4, 8, 12} in {
|
||||
|
@ -321,7 +321,7 @@ foreach Index = {0, 4, 8, 12} in {
|
|||
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#
|
||||
_TTMP#!add(Index,2)#
|
||||
_TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
|
||||
_TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
|
||||
}
|
||||
|
||||
foreach Index = {0, 4, 8} in {
|
||||
|
@ -338,7 +338,7 @@ foreach Index = {0, 4, 8} in {
|
|||
_TTMP#!add(Index,4)#
|
||||
_TTMP#!add(Index,5)#
|
||||
_TTMP#!add(Index,6)#
|
||||
_TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
|
||||
_TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
|
||||
}
|
||||
|
||||
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
|
||||
|
@ -348,12 +348,12 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
|
|||
TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
|
||||
TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
|
||||
|
||||
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
|
||||
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
|
||||
TmpRegTuplesBase<0, 16,
|
||||
[TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
|
||||
TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
|
||||
TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
|
||||
TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
|
||||
[TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
|
||||
TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
|
||||
TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
|
||||
TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
|
||||
|
||||
|
||||
// VGPR 32-bit registers
|
||||
|
|
|
@ -37,6 +37,9 @@ def WriteDouble : SchedWrite;
|
|||
// half rate f64 instruction (same as v_add_f64)
|
||||
def WriteDoubleAdd : SchedWrite;
|
||||
|
||||
// Conversion to or from f64 instruction
|
||||
def WriteDoubleCvt : SchedWrite;
|
||||
|
||||
// Half rate 64-bit instructions.
|
||||
def Write64Bit : SchedWrite;
|
||||
|
||||
|
@ -61,6 +64,7 @@ class SISchedMachineModel : SchedMachineModel {
|
|||
|
||||
def SIFullSpeedModel : SISchedMachineModel;
|
||||
def SIQuarterSpeedModel : SISchedMachineModel;
|
||||
def GFX10SpeedModel : SISchedMachineModel;
|
||||
|
||||
// XXX: Are the resource counts correct?
|
||||
def HWBranch : ProcResource<1> {
|
||||
|
@ -81,6 +85,9 @@ def HWVMEM : ProcResource<1> {
|
|||
def HWVALU : ProcResource<1> {
|
||||
let BufferSize = 1;
|
||||
}
|
||||
def HWRC : ProcResource<1> { // Register destination cache
|
||||
let BufferSize = 1;
|
||||
}
|
||||
|
||||
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
|
||||
int latency> : WriteRes<write, resources> {
|
||||
|
@ -124,6 +131,7 @@ defm : SICommonWriteRes;
|
|||
def : HWVALUWriteRes<WriteFloatFMA, 1>;
|
||||
def : HWVALUWriteRes<WriteDouble, 4>;
|
||||
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
|
||||
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
|
||||
|
||||
def : InstRW<[WriteCopy], (instrs COPY)>;
|
||||
|
||||
|
@ -136,7 +144,32 @@ defm : SICommonWriteRes;
|
|||
def : HWVALUWriteRes<WriteFloatFMA, 16>;
|
||||
def : HWVALUWriteRes<WriteDouble, 16>;
|
||||
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
|
||||
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
|
||||
|
||||
def : InstRW<[WriteCopy], (instrs COPY)>;
|
||||
|
||||
} // End SchedModel = SIQuarterSpeedModel
|
||||
|
||||
let SchedModel = GFX10SpeedModel in {
|
||||
|
||||
// The latency values are 1 / (operations / cycle).
|
||||
// Add 1 stall cycle for VGPR read.
|
||||
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
|
||||
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
|
||||
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
|
||||
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
|
||||
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
|
||||
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
|
||||
|
||||
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
|
||||
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
|
||||
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
|
||||
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>;
|
||||
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
|
||||
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
|
||||
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
|
||||
|
||||
def : InstRW<[WriteCopy], (instrs COPY)>;
|
||||
|
||||
} // End SchedModel = GFX10SpeedModel
|
||||
|
|
|
@ -435,11 +435,21 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
|||
Header.kernarg_segment_alignment = 4;
|
||||
Header.group_segment_alignment = 4;
|
||||
Header.private_segment_alignment = 4;
|
||||
|
||||
if (Version.Major >= 10) {
|
||||
Header.compute_pgm_resource_registers |=
|
||||
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
|
||||
S_00B848_MEM_ORDERED(1);
|
||||
}
|
||||
}
|
||||
|
||||
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
|
||||
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo *STI) {
|
||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
||||
|
||||
amdhsa::kernel_descriptor_t KD;
|
||||
memset(&KD, 0, sizeof(KD));
|
||||
|
||||
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
|
||||
amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
|
||||
|
@ -449,6 +459,13 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
|
|||
amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
|
||||
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
|
||||
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
|
||||
if (Version.Major >= 10) {
|
||||
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
|
||||
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
|
||||
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
|
||||
}
|
||||
return KD;
|
||||
}
|
||||
|
||||
|
@ -679,6 +696,10 @@ bool isGFX9(const MCSubtargetInfo &STI) {
|
|||
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
|
||||
}
|
||||
|
||||
bool isGFX10(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
|
||||
}
|
||||
|
||||
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
|
||||
}
|
||||
|
@ -704,46 +725,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
|
|||
CASE_CI_VI(FLAT_SCR) \
|
||||
CASE_CI_VI(FLAT_SCR_LO) \
|
||||
CASE_CI_VI(FLAT_SCR_HI) \
|
||||
CASE_VI_GFX9(TTMP0) \
|
||||
CASE_VI_GFX9(TTMP1) \
|
||||
CASE_VI_GFX9(TTMP2) \
|
||||
CASE_VI_GFX9(TTMP3) \
|
||||
CASE_VI_GFX9(TTMP4) \
|
||||
CASE_VI_GFX9(TTMP5) \
|
||||
CASE_VI_GFX9(TTMP6) \
|
||||
CASE_VI_GFX9(TTMP7) \
|
||||
CASE_VI_GFX9(TTMP8) \
|
||||
CASE_VI_GFX9(TTMP9) \
|
||||
CASE_VI_GFX9(TTMP10) \
|
||||
CASE_VI_GFX9(TTMP11) \
|
||||
CASE_VI_GFX9(TTMP12) \
|
||||
CASE_VI_GFX9(TTMP13) \
|
||||
CASE_VI_GFX9(TTMP14) \
|
||||
CASE_VI_GFX9(TTMP15) \
|
||||
CASE_VI_GFX9(TTMP0_TTMP1) \
|
||||
CASE_VI_GFX9(TTMP2_TTMP3) \
|
||||
CASE_VI_GFX9(TTMP4_TTMP5) \
|
||||
CASE_VI_GFX9(TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9(TTMP8_TTMP9) \
|
||||
CASE_VI_GFX9(TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9(TTMP12_TTMP13) \
|
||||
CASE_VI_GFX9(TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
|
||||
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9_GFX10(TTMP0) \
|
||||
CASE_VI_GFX9_GFX10(TTMP1) \
|
||||
CASE_VI_GFX9_GFX10(TTMP2) \
|
||||
CASE_VI_GFX9_GFX10(TTMP3) \
|
||||
CASE_VI_GFX9_GFX10(TTMP4) \
|
||||
CASE_VI_GFX9_GFX10(TTMP5) \
|
||||
CASE_VI_GFX9_GFX10(TTMP6) \
|
||||
CASE_VI_GFX9_GFX10(TTMP7) \
|
||||
CASE_VI_GFX9_GFX10(TTMP8) \
|
||||
CASE_VI_GFX9_GFX10(TTMP9) \
|
||||
CASE_VI_GFX9_GFX10(TTMP10) \
|
||||
CASE_VI_GFX9_GFX10(TTMP11) \
|
||||
CASE_VI_GFX9_GFX10(TTMP12) \
|
||||
CASE_VI_GFX9_GFX10(TTMP13) \
|
||||
CASE_VI_GFX9_GFX10(TTMP14) \
|
||||
CASE_VI_GFX9_GFX10(TTMP15) \
|
||||
CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
|
||||
CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
|
||||
CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
|
||||
CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
|
||||
CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
|
||||
CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
|
||||
CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
}
|
||||
|
||||
#define CASE_CI_VI(node) \
|
||||
assert(!isSI(STI)); \
|
||||
case node: return isCI(STI) ? node##_ci : node##_vi;
|
||||
|
||||
#define CASE_VI_GFX9(node) \
|
||||
case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
|
||||
#define CASE_VI_GFX9_GFX10(node) \
|
||||
case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
|
||||
|
||||
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
|
||||
if (STI.getTargetTriple().getArch() == Triple::r600)
|
||||
|
@ -752,17 +773,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
|
|||
}
|
||||
|
||||
#undef CASE_CI_VI
|
||||
#undef CASE_VI_GFX9
|
||||
#undef CASE_VI_GFX9_GFX10
|
||||
|
||||
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
|
||||
#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
|
||||
#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
|
||||
|
||||
unsigned mc2PseudoReg(unsigned Reg) {
|
||||
MAP_REG2REG
|
||||
}
|
||||
|
||||
#undef CASE_CI_VI
|
||||
#undef CASE_VI_GFX9
|
||||
#undef CASE_VI_GFX9_GFX10
|
||||
#undef MAP_REG2REG
|
||||
|
||||
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
|
||||
|
@ -1030,5 +1051,6 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
|
|||
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
|
||||
return lookupSourceOfDivergence(IntrID);
|
||||
}
|
||||
|
||||
} // namespace AMDGPU
|
||||
} // namespace llvm
|
||||
|
|
|
@ -244,7 +244,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
|||
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
||||
const MCSubtargetInfo *STI);
|
||||
|
||||
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
|
||||
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
|
||||
const MCSubtargetInfo *STI);
|
||||
|
||||
bool isGroupSegment(const GlobalValue *GV);
|
||||
bool isGlobalSegment(const GlobalValue *GV);
|
||||
|
@ -398,6 +399,7 @@ bool isSI(const MCSubtargetInfo &STI);
|
|||
bool isCI(const MCSubtargetInfo &STI);
|
||||
bool isVI(const MCSubtargetInfo &STI);
|
||||
bool isGFX9(const MCSubtargetInfo &STI);
|
||||
bool isGFX10(const MCSubtargetInfo &STI);
|
||||
|
||||
/// Is Reg - scalar register
|
||||
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
|
||||
|
|
|
@ -82,6 +82,9 @@ COMPPGM1(priv, compute_pgm_rsrc1_priv, PRIV
|
|||
COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP),
|
||||
COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE),
|
||||
COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE),
|
||||
COMPPGM1(enable_wgp_mode, compute_pgm_rsrc1_wgp_mode, WGP_MODE),
|
||||
COMPPGM1(enable_mem_ordered, compute_pgm_rsrc1_mem_ordered, MEM_ORDERED),
|
||||
COMPPGM1(enable_fwd_progress, compute_pgm_rsrc1_fwd_progress, FWD_PROGRESS),
|
||||
// TODO: bulky
|
||||
// TODO: cdbg_user
|
||||
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
|
||||
|
||||
; ARCH-R600: Arch: r600
|
||||
; ARCH-GCN: Arch: amdgcn
|
||||
|
@ -87,6 +88,7 @@
|
|||
; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
|
||||
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
|
||||
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
|
||||
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
|
||||
; ALL: ]
|
||||
|
||||
define amdgpu_kernel void @elf_header() {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
|
||||
|
||||
; HSA: .hsa_code_object_version 2,1
|
||||
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
|
||||
|
@ -42,3 +43,4 @@
|
|||
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
|
||||
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
|
||||
; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
|
||||
; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
|
||||
|
|
|
@ -1275,6 +1275,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
|
|||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue