[AMDGPU] gfx1011/gfx1012 targets

Differential Revision: https://reviews.llvm.org/D63307

llvm-svn: 363344
This commit is contained in:
Stanislav Mekhanoshin 2019-06-14 00:33:31 +00:00
parent e4147ea1ef
commit c43e67bfff
29 changed files with 814 additions and 42 deletions

View File

@ -705,6 +705,8 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
// AMDGCN GFX10.
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
// Reserved for AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
@ -713,7 +715,7 @@ enum : unsigned {
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1010,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1012,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.

View File

@ -124,9 +124,11 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
GK_GFX1012 = 73,
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX1010,
GK_AMDGCN_LAST = GK_GFX1012,
};
/// Instruction set architecture version.

View File

@ -412,6 +412,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;

View File

@ -62,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
constexpr GPUInfo AMDGCNGPUs[34] = {
constexpr GPUInfo AMDGCNGPUs[36] = {
// Name Canonical Kind Features
// Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
@ -99,6 +99,8 @@ constexpr GPUInfo AMDGCNGPUs[34] = {
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@ -197,6 +199,8 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX906: return {9, 0, 6};
case GK_GFX909: return {9, 0, 9};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
default: return {0, 0, 0};
}
}

View File

@ -378,6 +378,18 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
>;
def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
"HasDot5Insts",
"true",
"Has v_dot2c_f32_f16 instruction"
>;
def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
"HasDot6Insts",
"true",
"Has v_dot4c_i32_i8 instruction"
>;
def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
"DoesNotSupportSRAMECC",
"true",
@ -773,6 +785,41 @@ def FeatureISAVersion10_1_0 : FeatureSet<
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
def FeatureISAVersion10_1_1 : FeatureSet<
!listconcat(FeatureGroup.GFX10_1_Bugs,
[FeatureGFX10,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureNSAEncoding,
FeatureWavefrontSize64,
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
def FeatureISAVersion10_1_2 : FeatureSet<
!listconcat(FeatureGroup.GFX10_1_Bugs,
[FeatureGFX10,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureNSAEncoding,
FeatureWavefrontSize64,
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
FeatureLdsMisalignedBug,
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@ -1015,6 +1062,11 @@ def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
AssemblerPredicate<"FeatureDot2Insts">;
def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
AssemblerPredicate<"FeatureDot5Insts">;
def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
AssemblerPredicate<"FeatureDot6Insts">;
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;

View File

@ -234,6 +234,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDLInsts(false),
HasDot1Insts(false),
HasDot2Insts(false),
HasDot5Insts(false),
HasDot6Insts(false),
EnableSRAMECC(false),
DoesNotSupportSRAMECC(false),
HasNoSdstCMPX(false),

View File

@ -337,6 +337,8 @@ protected:
bool HasDLInsts;
bool HasDot1Insts;
bool HasDot2Insts;
bool HasDot5Insts;
bool HasDot6Insts;
bool EnableSRAMECC;
bool DoesNotSupportSRAMECC;
bool HasNoSdstCMPX;
@ -705,6 +707,14 @@ public:
return HasDot2Insts;
}
bool hasDot5Insts() const {
return HasDot5Insts;
}
bool hasDot6Insts() const {
return HasDot6Insts;
}
bool isSRAMECCEnabled() const {
return EnableSRAMECC;
}

View File

@ -171,3 +171,11 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
def : ProcessorModel<"gfx1010", GFX10SpeedModel,
FeatureISAVersion10_1_0.Features
>;
def : ProcessorModel<"gfx1011", GFX10SpeedModel,
FeatureISAVersion10_1_1.Features
>;
def : ProcessorModel<"gfx1012", GFX10SpeedModel,
FeatureISAVersion10_1_2.Features
>;

View File

@ -93,6 +93,8 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@ -141,6 +143,8 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}

View File

@ -318,6 +318,20 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
def VOP_MAC_F16 : VOP_MAC <f16>;
def VOP_MAC_F32 : VOP_MAC <f32>;
class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
let HasClamp = 0;
let HasExtSDWA = 0;
let HasModifiers = 1;
let HasOpSel = 0;
let IsPacked = 0;
}
def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
let Src0ModDPP = FPVRegInputMods;
let Src1ModDPP = FPVRegInputMods;
}
def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>;
// Write out to vcc or arbitrary SGPR.
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
let Asm32 = "$vdst, vcc, $src0, $src1";
@ -634,6 +648,31 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
} // End SubtargetPredicate = HasDLInsts
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in {
let SubtargetPredicate = HasDot5Insts in
defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
let SubtargetPredicate = HasDot6Insts in
defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
}
let AddedComplexity = 30 in {
def : GCNPat<
(f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
(f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
> {
let SubtargetPredicate = HasDot5Insts;
}
def : GCNPat<
(i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
(i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
> {
let SubtargetPredicate = HasDot6Insts;
}
} // End AddedComplexity = 30
let SubtargetPredicate = isGFX10Plus in {
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
@ -1492,3 +1531,18 @@ defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
} // End SubtargetPredicate = HasDLInsts
multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
VOP2_Real_e32_gfx10<op>,
VOP2_Real_dpp_gfx10<op>,
VOP2_Real_dpp8_gfx10<op>;
let SubtargetPredicate = HasDot5Insts in {
// NB: Opcode conflicts with V_DOT8C_I32_I4
// This opcode exists in gfx 10.1* only
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
}
let SubtargetPredicate = HasDot6Insts in {
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
}

View File

@ -412,3 +412,20 @@ defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;
let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>;
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot1Insts in {
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>;
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>;
} // End SubtargetPredicate = HasDot1Insts

View File

@ -48,6 +48,8 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
; ARCH-R600: Arch: r600
; ARCH-GCN: Arch: amdgcn
@ -89,6 +91,8 @@
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
; ALL: ]
define amdgpu_kernel void @elf_header() {

View File

@ -1,5 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-UNSAFE
; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
; RUN: llc -march=amdgcn -mcpu=gfx1011 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
; RUN: llc -march=amdgcn -mcpu=gfx1012 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
@ -14,7 +16,8 @@
; GFX906: v_mul_f16_e32
; GFX906: v_mul_f16_e32
; GFX906-UNSAFE: v_fma_f16
; GFX906-DL-UNSAFE: v_fma_f16
; GFX10-CONTRACT: v_fmac_f16
; GFX906-CONTRACT: v_mac_f16_e32
; GFX906-DENORM-CONTRACT: v_fma_f16
@ -50,7 +53,8 @@ entry:
; GFX906: v_mad_f32
; GFX906: v_mac_f32_e32
; GFX906-UNSAFE: v_dot2_f32_f16
; GFX906-DL-UNSAFE: v_dot2_f32_f16
; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
; GFX906-CONTRACT: v_dot2_f32_f16
@ -90,7 +94,8 @@ entry:
; GFX906: v_mad_f32
; GFX906: v_mac_f32_e32
; GFX906-UNSAFE: v_dot2_f32_f16
; GFX906-DL-UNSAFE: v_dot2_f32_f16
; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
; GFX906-CONTRACT: v_dot2_f32_f16
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
@ -127,7 +132,7 @@ entry:
; GFX906: v_mad_f32
; GFX906: v_mac_f32_e32
; GFX906-UNSAFE: v_fma_mix_f32
; GCN-DL-UNSAFE: v_fma_mix_f32
; GFX906-CONTRACT: v_fma_mix_f32
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
@ -164,7 +169,7 @@ entry:
; GFX906: v_mad_f32
; GFX906: v_mac_f32_e32
; GFX906-UNSAFE: v_fma_mix_f32
; GCN-DL-UNSAFE: v_fma_mix_f32
; GFX906-CONTRACT: v_fma_mix_f32
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
@ -201,7 +206,7 @@ entry:
; GFX906: v_mad_f32
; GFX906: v_mac_f32_e32
; GFX906-UNSAFE: v_fma_mix_f32
; GCN-DL-UNSAFE: v_fma_mix_f32
; GFX906-CONTRACT: v_fma_mix_f32
; GFX906-DENORM-CONTRACT: v_fma_mix_f32

View File

@ -25,6 +25,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1011 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1011 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1012 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1012 %s
; HSA: .hsa_code_object_version 2,1
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
@ -44,3 +46,5 @@
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
; HSA-GFX1011: .hsa_code_object_isa 10,1,1,"AMD","AMDGPU"
; HSA-GFX1012: .hsa_code_object_isa 10,1,2,"AMD","AMDGPU"

View File

@ -1,4 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VECT %s
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s
; GCN-LABEL: test_local_misaligned_v2:
@ -112,15 +114,17 @@ bb:
ret void
}
; TODO: Reinstate the test below once v3i32/v3f32 is reinstated.
; GCN-LABEL: test_flat_misaligned_v3:
; VECT-DAG: flat_load_dwordx3 v
; VECT-DAG: flat_store_dwordx3 v
; SPLIT-DAG: flat_load_dword v
; SPLIT-DAG: flat_load_dword v
; SPLIT-DAG: flat_load_dword v
; SPLIT-DAG: flat_store_dword v
; SPLIT-DAG: flat_store_dword v
; SPLIT-DAG: flat_store_dword v
; xVECT-DAG: flat_load_dwordx3 v
; xVECT-DAG: flat_store_dwordx3 v
; xSPLIT-DAG: flat_load_dword v
; xSPLIT-DAG: flat_load_dword v
; xSPLIT-DAG: flat_load_dword v
; xSPLIT-DAG: flat_store_dword v
; xSPLIT-DAG: flat_store_dword v
; xSPLIT-DAG: flat_store_dword v
define amdgpu_kernel void @test_flat_misaligned_v3(i32* %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp)
; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp
; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp(
float addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
@ -18,8 +21,9 @@ entry:
ret void
}
; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp
; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
float addrspace(1)* %r,
<2 x half> addrspace(1)* %a,

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_clamp
; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot2_clamp(
i32 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
@ -20,6 +23,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_no_clamp
; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot2_no_clamp(
i32 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp(
i32 addrspace(1)* %r,
<4 x i8> addrspace(1)* %a,
@ -22,6 +25,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
i32 addrspace(1)* %r,
<4 x i8> addrspace(1)* %a,

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10,GFX1011
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10,GFX1011
declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_clamp
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp(
i32 addrspace(1)* %r,
<8 x i4> addrspace(1)* %a,
@ -22,6 +25,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_no_clamp
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX1011: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp(
i32 addrspace(1)* %r,
<8 x i4> addrspace(1)* %a,

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp
; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp(
i32 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
@ -20,6 +23,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp
; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp(
i32 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_clamp
; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot4_clamp(
i32 addrspace(1)* %r,
<4 x i8> addrspace(1)* %a,
@ -22,6 +25,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_no_clamp
; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot4_no_clamp(
i32 addrspace(1)* %r,
<4 x i8> addrspace(1)* %a,

View File

@ -1,9 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_clamp
; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot8_clamp(
i32 addrspace(1)* %r,
<8 x i4> addrspace(1)* %a,
@ -22,6 +25,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_no_clamp
; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot8_no_clamp(
i32 addrspace(1)* %r,
<8 x i4> addrspace(1)* %a,

View File

@ -0,0 +1,53 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
v_dot2_f32_f16 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c]
v_dot2_i32_i16 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x14,0xcc,0x01,0x05,0x0e,0x1c]
v_dot2_u32_u16 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x15,0xcc,0x01,0x05,0x0e,0x1c]
v_dot4_i32_i8 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x16,0xcc,0x01,0x05,0x0e,0x1c]
v_dot4_u32_u8 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x17,0xcc,0x01,0x05,0x0e,0x1c]
v_dot8_i32_i4 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x18,0xcc,0x01,0x05,0x0e,0x1c]
v_dot8_u32_u4 v0, v1, v2, v3
// GFX10: encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
v_dot2c_f32_f16 v5, v1, v2
// GFX10: encoding: [0x01,0x05,0x0a,0x04]
v_dot2c_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX10: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
v_dot2c_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX10: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00]
v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX10: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05]
v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX10: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05]
v_dot4c_i32_i8 v5, v1, v2
// GFX10: encoding: [0x01,0x05,0x0a,0x1a]
v_dot4c_i32_i8 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX10: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00]
v_dot4c_i32_i8 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX10: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x04,0x00]
v_dot4c_i32_i8 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX10: encoding: [0xe9,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05]
v_dot4c_i32_i8 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX10: encoding: [0xea,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05]

View File

@ -0,0 +1,50 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s
v_dot8c_i32_i4 v5, v1, v2
// GFX10: error:
v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX10: error:
v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX10: error:
v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX10: error:
v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX10: error:
s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES)
// GFX10: error:
v_fma_legacy_f32 v0, v1, v2, v3
// GFX10: error:
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
// GFX10: error:
image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
// GFX10: error:
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7]
// GFX10: error:
image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16
// GFX10: error:
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
// GFX10: error:
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc
// GFX10: error:
image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16
// GFX10: error:
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
// GFX10: error:
image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
// GFX10: error:

View File

@ -2,38 +2,46 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX1012 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 %s
s_dcache_wb
// GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xf4,0x00,0x00,0x00,0x00]
// NOSICI: error: instruction not supported on this GPU
s_dcache_wb_vol
// GFX89: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
// NOSICI: error: instruction not supported on this GPU
// NOSICIGFX10: error: instruction not supported on this GPU
s_atc_probe 0x7, s[4:5], s0
// GFX89: s_atc_probe 7, s[4:5], s0 ; encoding: [0xc2,0x01,0x98,0xc0,0x00,0x00,0x00,0x00]
// GFX10: s_atc_probe 7, s[4:5], s0 ; encoding: [0xc2,0x01,0x98,0xf4,0x00,0x00,0x00,0x00]
// NOSICI: error: instruction not supported on this GPU
s_atc_probe 0x0, s[4:5], 0x0
// GFX89: s_atc_probe 0, s[4:5], 0x0 ; encoding: [0x02,0x00,0x9a,0xc0,0x00,0x00,0x00,0x00]
// GFX10: s_atc_probe 0, s[4:5], 0x0 ; encoding: [0x02,0x00,0x98,0xf4,0x00,0x00,0x00,0xfa]
// NOSICI: error: instruction not supported on this GPU
s_atc_probe_buffer 0x1, s[8:11], s0
// GFX89: s_atc_probe_buffer 1, s[8:11], s0 ; encoding: [0x44,0x00,0x9c,0xc0,0x00,0x00,0x00,0x00]
// GFX10: s_atc_probe_buffer 1, s[8:11], s0 ; encoding: [0x44,0x00,0x9c,0xf4,0x00,0x00,0x00,0x00]
// NOSICI: error: instruction not supported on this GPU
s_atc_probe_buffer 0x0, s[8:11], s101
// GFX89: s_atc_probe_buffer 0, s[8:11], s101 ; encoding: [0x04,0x00,0x9c,0xc0,0x65,0x00,0x00,0x00]
// GFX10: s_atc_probe_buffer 0, s[8:11], s101 ; encoding: [0x04,0x00,0x9c,0xf4,0x00,0x00,0x00,0xca]
// NOSICI: error: instruction not supported on this GPU
s_memrealtime s[4:5]
// GFX89: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xc0,0x00,0x00,0x00,0x00]
// GFX10: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xf4,0x00,0x00,0x00,0x00]
// NOSICI: error: instruction not supported on this GPU
s_memrealtime tba
@ -47,25 +55,30 @@ s_memrealtime tma
// NOGFX9: error: not a valid operand.
s_memrealtime ttmp[0:1]
// VI: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1c,0x94,0xc0,0x00,0x00,0x00,0x00]
// GFX9: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00]
// VI: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1c,0x94,0xc0,0x00,0x00,0x00,0x00]
// GFX9: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00]
// GFX10: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1b,0x94,0xf4,0x00,0x00,0x00,0x00]
// NOSICI: error: instruction not supported on this GPU
// FIXME: Should error about instruction on GPU
s_store_dword s1, s[2:3], 0xfc
// GFX89: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00]
// GFX1012: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x40,0xf4,0xfc,0x00,0x00,0xfa]
// NOSICI: error: instruction not supported on this GPU
s_store_dword s1, s[2:3], 0xfc glc
// GFX89: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00]
// GFX1012: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x41,0xf4,0xfc,0x00,0x00,0xfa]
// NOSICI: error: invalid operand for instruction
s_store_dword s1, s[2:3], s4
// GFX89: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00]
// GFX1012: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xf4,0x00,0x00,0x00,0x08]
// NOSICI: error: instruction not supported on this GPU
s_store_dword s1, s[2:3], s4 glc
// GFX89: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00]
// GFX1012: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xf4,0x00,0x00,0x00,0x08]
// NOSICI: error: invalid operand for instruction
s_store_dword tba_lo, s[2:3], s4
@ -91,9 +104,11 @@ s_store_dword tma_hi, s[2:3], s4
// FIXME: Should error on SI instead of silently ignoring glc
s_load_dword s1, s[2:3], 0xfc glc
// GFX89: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00]
// GFX10: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x01,0xf4,0xfc,0x00,0x00,0xfa]
s_load_dword s1, s[2:3], s4 glc
// GFX89: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00]
// GFX10: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0x08]
s_buffer_store_dword s10, s[92:95], m0
// GFX89: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xc0,0x7c,0x00,0x00,0x00]
@ -139,7 +154,7 @@ s_buffer_store_dwordx2 tba, s[92:95], m0 glc
s_buffer_load_dword s10, s[92:95], m0
// GFX89: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xc0,0x7c,0x00,0x00,0x00]
// SICI: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2]
// SICIGFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2]
s_buffer_load_dword tba_lo, s[92:95], m0
// VI: s_buffer_load_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00]
@ -162,13 +177,15 @@ s_buffer_load_dword tma_hi, s[92:95], m0
// NOGFX9: error: not a valid operand.
s_buffer_load_dword ttmp0, s[92:95], m0
// VI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x20,0xc0,0x7c,0x00,0x00,0x00]
// GFX9: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00]
// SICI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x7c,0x5c,0x38,0xc2]
// VI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x20,0xc0,0x7c,0x00,0x00,0x00]
// GFX9: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00]
// SICI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x7c,0x5c,0x38,0xc2]
// GFX10: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xf4,0x00,0x00,0x00,0xf8]
s_buffer_load_dwordx2 s[10:11], s[92:95], m0
// GFX89: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x24,0xc0,0x7c,0x00,0x00,0x00]
// SICI: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0x7c,0x5c,0x45,0xc2]
// SICI: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0x7c,0x5c,0x45,0xc2]
// GFX10: s_buffer_load_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8]
s_buffer_load_dwordx2 tba, s[92:95], m0
// VI: s_buffer_load_dwordx2 tba, s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00]
@ -181,13 +198,15 @@ s_buffer_load_dwordx2 tma, s[92:95], m0
// NOGFX9: error: not a valid operand.
s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0
// VI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1c,0x24,0xc0,0x7c,0x00,0x00,0x00]
// GFX9: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00]
// SICI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x7c,0x5c,0x78,0xc2]
// VI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1c,0x24,0xc0,0x7c,0x00,0x00,0x00]
// GFX9: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00]
// SICI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x7c,0x5c,0x78,0xc2]
// GFX10: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xf4,0x00,0x00,0x00,0xf8]
// FIXME: Should error on SI instead of silently ignoring glc
s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc
// GFX89: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xc0,0x7c,0x00,0x00,0x00]
// GFX10: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xf4,0x00,0x00,0x00,0xf8]
//===----------------------------------------------------------------------===//
// s_scratch instructions
@ -195,38 +214,47 @@ s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc
s_scratch_load_dword s5, s[2:3], s101
// GFX9: s_scratch_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc0,0x65,0x00,0x00,0x00]
// GFX1012: s_scratch_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf4,0x00,0x00,0x00,0xca]
// NOSICIVI: error: instruction not supported on this GPU
s_scratch_load_dword s5, s[2:3], s0 glc
// GFX9: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xf4,0x00,0x00,0x00,0x00]
// NOSICIVI: error
s_scratch_load_dwordx2 s[100:101], s[2:3], s0
// GFX9: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xf4,0x00,0x00,0x00,0x00]
// NOSICIVI: error: instruction not supported on this GPU
s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc
// GFX9: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x1b,0xc0,0x01,0x00,0x00,0x00]
// GFX1012: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x19,0xf4,0x01,0x00,0x00,0xfa]
// NOSICIVI: error
s_scratch_load_dwordx4 s[20:23], s[4:5], s0
// GFX9: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xf4,0x00,0x00,0x00,0x00]
// NOSICIVI: error: instruction not supported on this GPU
s_scratch_store_dword s101, s[4:5], s0
// GFX9: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00]
// NOSICIVI: error: instruction not supported on this GPU
s_scratch_store_dword s1, s[4:5], 0x123 glc
// GFX9: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x57,0xc0,0x23,0x01,0x00,0x00]
// GFX1012: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
// NOSICIVI: error
s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc
// GFX9: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xc0,0x65,0x00,0x00,0x00]
// GFX1012: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca]
// NOSICIVI: error
s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc
// GFX9: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xf4,0x00,0x00,0x00,0x00]
// NOSICIVI: error
//===----------------------------------------------------------------------===//
@ -235,18 +263,22 @@ s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc
s_dcache_discard s[2:3], s0
// GFX9: s_dcache_discard s[2:3], s0 ; encoding: [0x01,0x00,0xa0,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_dcache_discard s[2:3], s0 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0x00]
// NOSICIVI: error: instruction not supported on this GPU
s_dcache_discard s[2:3], 0x0
// GFX9: s_dcache_discard s[2:3], 0x0 ; encoding: [0x01,0x00,0xa2,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_dcache_discard s[2:3], 0x0 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0xfa]
// NOSICIVI: error: instruction not supported on this GPU
s_dcache_discard_x2 s[2:3], s101
// GFX9: s_dcache_discard_x2 s[2:3], s101 ; encoding: [0x01,0x00,0xa4,0xc0,0x65,0x00,0x00,0x00]
// GFX1012: s_dcache_discard_x2 s[2:3], s101 ; encoding: [0x01,0x00,0xa4,0xf4,0x00,0x00,0x00,0xca]
// NOSICIVI: error: instruction not supported on this GPU
s_dcache_discard_x2 s[2:3], 0x0
// GFX9: s_dcache_discard_x2 s[2:3], 0x0 ; encoding: [0x01,0x00,0xa6,0xc0,0x00,0x00,0x00,0x00]
// GFX1012: s_dcache_discard_x2 s[2:3], 0x0 ; encoding: [0x01,0x00,0xa4,0xf4,0x00,0x00,0x00,0xfa]
// NOSICIVI: error: instruction not supported on this GPU
//===----------------------------------------------------------------------===//
@ -255,130 +287,162 @@ s_dcache_discard_x2 s[2:3], 0x0
s_atomic_add s5, s[2:3], s101
// GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_add s5, s[2:3], 0x0
// GFX9: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_atomic_add s5, s[2:3], s0 glc
// GFX9: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_add_x2 s[10:11], s[2:3], s101
// GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_and s5, s[2:3], s101
// GFX9: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_and_x2 s[10:11], s[2:3], 0x0
// GFX9: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa2,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa0,0xf6,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_atomic_cmpswap s[10:11], s[2:3], s101
// GFX9: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_cmpswap s[10:11], s[2:3], 0x0
// GFX9: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_atomic_cmpswap s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_cmpswap_x2 s[20:23], s[2:3], s101
// GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0
// GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x86,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc
// GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_dec s5, s[2:3], s0 glc
// GFX9: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_dec_x2 s[10:11], s[2:3], s101
// GFX9: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_inc s5, s[2:3], s0 glc
// GFX9: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_inc_x2 s[10:11], s[2:3], s101
// GFX9: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_or s5, s[2:3], 0x0
// GFX9: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x26,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x24,0xf6,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_atomic_or_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_smax s5, s[2:3], s101
// GFX9: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_smax_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_smin s5, s[2:3], s101
// GFX9: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_smin_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_sub s5, s[2:3], s101
// GFX9: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_sub_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_swap s5, s[2:3], s101
// GFX9: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_swap_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_umax s5, s[2:3], s0 glc
// GFX9: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_umax_x2 s[10:11], s[2:3], s101
// GFX9: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_umin s5, s[2:3], s101
// GFX9: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_umin_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_atomic_xor s5, s[2:3], s101
// GFX9: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xc2,0x65,0x00,0x00,0x00]
// GFX1012: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xf6,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_atomic_xor_x2 s[10:11], s[2:3], s0 glc
// GFX9: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xc2,0x00,0x00,0x00,0x00]
// GFX1012: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xf6,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
//===----------------------------------------------------------------------===//
@ -387,128 +451,160 @@ s_atomic_xor_x2 s[10:11], s[2:3], s0 glc
s_buffer_atomic_add s5, s[4:7], s101
// GFX9: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xc1,0x65,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_buffer_atomic_add s5, s[4:7], 0x0
// GFX9: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x0a,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_buffer_atomic_add s5, s[4:7], s0 glc
// GFX9: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_add_x2 s[10:11], s[4:7], s0
// GFX9: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_and s101, s[4:7], s0
// GFX9: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_and_x2 s[10:11], s[8:11], s0
// GFX9: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_cmpswap s[10:11], s[4:7], s0
// GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0
// GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x06,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc
// GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101
// GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xc1,0x65,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0
// GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x86,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc
// GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_dec s5, s[4:7], s0
// GFX9: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc
// GFX9: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_inc s101, s[4:7], s0
// GFX9: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0
// GFX9: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xae,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xac,0xf5,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_buffer_atomic_or s5, s[8:11], s0
// GFX9: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_or_x2 s[10:11], s[96:99], s0
// GFX9: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_smax s5, s[4:7], s101
// GFX9: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xc1,0x65,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xf5,0x00,0x00,0x00,0xca]
// NOSICIVI: error:
s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0
// GFX9: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_smin s5, s[4:7], 0x0
// GFX9: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x12,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x10,0xf5,0x00,0x00,0x00,0xfa]
// NOSICIVI: error:
s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0
// GFX9: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_sub s5, s[4:7], s0 glc
// GFX9: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0
// GFX9: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_swap s5, s[4:7], s0
// GFX9: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc
// GFX9: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_umax s5, s[4:7], s0
// GFX9: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc
// GFX9: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_umin s5, s[4:7], s0
// GFX9: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc
// GFX9: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_xor s5, s[4:7], s0
// GFX9: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:
s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc
// GFX9: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xc1,0x00,0x00,0x00,0x00]
// GFX1012: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xf5,0x00,0x00,0x00,0x00]
// NOSICIVI: error:

View File

@ -0,0 +1,158 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s | FileCheck %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck %s
v_dot2c_f32_f16_e32 v5, v1, v2
// CHECK: encoding: [0x01,0x05,0x0a,0x04]
v_dot2c_f32_f16_e32 v255, v1, v2
// CHECK: encoding: [0x01,0x05,0xfe,0x05]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0xfe,0x05,0x01,0xe4,0x00,0x00]
v_dot2c_f32_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0xff,0xe4,0x00,0x00]
v_dot2c_f32_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0xfe,0x0b,0x04,0x01,0xe4,0x00,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0x00]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x10]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x30]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x01]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x03]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f]
v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x08,0x00]
v_dot2c_f32_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x10,0x00]
v_dot2c_f32_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x20,0x00]
v_dot2c_f32_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x40,0x00]
v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00]
v_dot4c_i32_i8_e32 v5, v1, v2
// CHECK: encoding: [0x01,0x05,0x0a,0x1a]
v_dot4c_i32_i8_e32 v255, v1, v2
// CHECK: encoding: [0x01,0x05,0xfe,0x1b]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00]
v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0xfe,0x1b,0x01,0xe4,0x00,0x00]
v_dot4c_i32_i8_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0xff,0xe4,0x00,0x00]
v_dot4c_i32_i8_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0xfe,0x0b,0x1a,0x01,0xe4,0x00,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1b,0x00,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x40,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x41,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x01,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x0f,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x11,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1f,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x21,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x2f,0x01,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x10]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x30]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x01]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x03]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
// CHECK: encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x08,0x00]

View File

@ -0,0 +1,53 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s
# GFX10: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot2_i32_i16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x14,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x14,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot2_u32_u16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x15,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x15,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot4_i32_i8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x16,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x16,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot4_u32_u8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x17,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x17,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot8_i32_i4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x18,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x18,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
# GFX10: v_dot2c_f32_f16_e32 v5, v1, v2
0x01,0x05,0x0a,0x04
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05
# GFX10: v_dot4c_i32_i8_e32 v5, v1, v2
0x01,0x05,0x0a,0x1a
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x04,0x00
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
0xe9,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
0xea,0x04,0x0a,0x1a,0x01,0x77,0x39,0x05

View File

@ -0,0 +1,158 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck %s
# CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
0x01,0x05,0x0a,0x04
# CHECK: v_dot2c_f32_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
0x01,0x05,0xfe,0x05
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00
# CHECK: v_dot2c_f32_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x05,0x01,0xe4,0x00,0x00]
0xfa,0x04,0xfe,0x05,0x01,0xe4,0x00,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0xff,0xe4,0x00,0x00]
0xfa,0x04,0x0a,0x04,0xff,0xe4,0x00,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x04,0x01,0xe4,0x00,0x00]
0xfa,0xfe,0x0b,0x04,0x01,0xe4,0x00,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0x00]
0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x10]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x10
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x30]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x30
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xf0
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x01]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x01
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x03]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x03
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x0f
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x08,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x08,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x10,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x10,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x20,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x20,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x40,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x40,0x00
# CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00
# CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
0x01,0x05,0x0a,0x1a
# CHECK: v_dot4c_i32_i8_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
0x01,0x05,0xfe,0x1b
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00
# CHECK: v_dot4c_i32_i8_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x1b,0x01,0xe4,0x00,0x00]
0xfa,0x04,0xfe,0x1b,0x01,0xe4,0x00,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0xff,0xe4,0x00,0x00]
0xfa,0x04,0x0a,0x1a,0xff,0xe4,0x00,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x1a,0x01,0xe4,0x00,0x00]
0xfa,0xfe,0x0b,0x1a,0x01,0xe4,0x00,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1b,0x00,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x1b,0x00,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x40,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x40,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x41,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x41,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x01,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x01,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x0f,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x0f,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x11,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x11,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x1f,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x1f,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x21,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x21,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0x2f,0x01,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0x2f,0x01,0x00
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x10]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x10
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x30]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x30
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0xf0
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x01]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x01
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x03]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x03
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x0f
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x08,0x00]
0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x08,0x00

View File

@ -1297,6 +1297,8 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};