[AMDGPU] Support for gfx940 fp8 conversions

Differential Revision: https://reviews.llvm.org/D129902
This commit is contained in:
Stanislav Mekhanoshin 2022-07-15 13:20:08 -07:00
parent 30e53b8c03
commit 9fa5a6b7e8
15 changed files with 902 additions and 7 deletions

View File

@ -346,5 +346,14 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_bf16, "V16fV4sV8sV16fiIiIi",
TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts")
TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
#undef BUILTIN
#undef TARGET_BUILTIN

View File

@ -250,6 +250,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
break;
case GK_GFX940:
Features["gfx940-insts"] = true;
Features["fp8-insts"] = true;
LLVM_FALLTHROUGH;
case GK_GFX90A:
Features["gfx90a-insts"] = true;

View File

@ -64,7 +64,7 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"

View File

@ -0,0 +1,60 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940
typedef float v2f __attribute__((ext_vector_type(2)));
// CHECK-GFX940-LABEL: @test_cvt_f32_bf8
// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
void test_cvt_f32_bf8(global int* out, int a)
{
*out = __builtin_amdgcn_cvt_f32_bf8(a, 0);
}
// CHECK-GFX940-LABEL: @test_cvt_f32_fp8
// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
void test_cvt_f32_fp8(global int* out, int a)
{
*out = __builtin_amdgcn_cvt_f32_fp8(a, 1);
}
// CHECK-GFX940-LABEL: @test_cvt_pk_f32_bf8
// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
void test_cvt_pk_f32_bf8(global v2f* out, int a)
{
*out = __builtin_amdgcn_cvt_pk_f32_bf8(a, false);
}
// CHECK-GFX940-LABEL: @test_cvt_pk_f32_fp8
// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
void test_cvt_pk_f32_fp8(global v2f* out, int a)
{
*out = __builtin_amdgcn_cvt_pk_f32_fp8(a, true);
}
// CHECK-GFX940-LABEL: @test_cvt_pk_bf8_f32
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false)
void test_cvt_pk_bf8_f32(global int* out, int old, float a, float b)
{
*out = __builtin_amdgcn_cvt_pk_bf8_f32(a, b, old, false);
}
// CHECK-GFX940-LABEL: @test_cvt_pk_fp8_f32
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true)
void test_cvt_pk_fp8_f32(global int* out, int old, float a, float b)
{
*out = __builtin_amdgcn_cvt_pk_fp8_f32(a, b, old, true);
}
// CHECK-GFX940-LABEL: @test_cvt_sr_bf8_f32
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2)
void test_cvt_sr_bf8_f32(global int* out, int old, float a, int b)
{
*out = __builtin_amdgcn_cvt_sr_bf8_f32(a, b, old, 2);
}
// CHECK-GFX940-LABEL: @test_cvt_sr_fp8_f32
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3)
void test_cvt_sr_fp8_f32(global int* out, int old, float a, int b)
{
*out = __builtin_amdgcn_cvt_sr_fp8_f32(a, b, old, 3);
}

View File

@ -2320,6 +2320,58 @@ def int_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUMSmfmacIntrinsic<llvm_v16f32_ty,
def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic<llvm_v4i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>;
def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic<llvm_v16i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>;
// llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3]
// byte_sel selects byte from srcA.
def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">,
Intrinsic<[llvm_float_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
// llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3]
def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">,
Intrinsic<[llvm_float_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
// llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel
// word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes.
def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">,
Intrinsic<[llvm_v2f32_ty],
[llvm_i32_ty, llvm_i1_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
// llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel.
def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">,
Intrinsic<[llvm_v2f32_ty],
[llvm_i32_ty, llvm_i1_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
// llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel
// word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes.
def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">,
Intrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel
def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">,
Intrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
// byte_sel selects byte to write into vdst.
def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">,
Intrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
Intrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.

View File

@ -585,6 +585,12 @@ def FeatureMAIInsts : SubtargetFeature<"mai-insts",
"Has mAI instructions"
>;
def FeatureFP8Insts : SubtargetFeature<"fp8-insts",
"HasFP8Insts",
"true",
"Has fp8 and bf8 instructions"
>;
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"HasPkFmacF16Inst",
"true",
@ -1124,6 +1130,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
Feature64BitDPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeatureFP8Insts,
FeaturePkFmacF16Inst,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
@ -1704,6 +1711,9 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">,
AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>;
def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
AssemblerPredicate<(all_of FeatureFP8Insts)>;
def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>;

View File

@ -8257,6 +8257,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
Inst.addOperand(Inst.getOperand(0));
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
@ -9061,12 +9067,27 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::clamp) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyClampSI, 0);
}
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOModSI, 0);
}
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::dst_sel) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
}
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::dst_unused) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTySdwaDstUnused,
DstUnused::UNUSED_PRESERVE);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;

View File

@ -145,6 +145,7 @@ protected:
bool HasDot7Insts = false;
bool HasDot8Insts = false;
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasPkFmacF16Inst = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
@ -721,6 +722,10 @@ public:
return HasMAIInsts;
}
bool hasFP8Insts() const {
return HasFP8Insts;
}
bool hasPkFmacF16Inst() const {
return HasPkFmacF16Inst;
}

View File

@ -324,7 +324,8 @@ class isFloatType<ValueType SrcVT> {
// XXX - do v2i16 instructions?
class isIntType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, i16.Value),
bit ret = !or(!eq(SrcVT.Value, i8.Value),
!eq(SrcVT.Value, i16.Value),
!eq(SrcVT.Value, i32.Value),
!eq(SrcVT.Value, i64.Value),
!eq(SrcVT.Value, v4i16.Value),
@ -1411,6 +1412,10 @@ class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
let Name = "SDWAWithBin32InputMods";
let ParserMethod = "parseRegOrImm";
}
class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
InputMods <matchClass> {
@ -1419,6 +1424,7 @@ class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
def IntVRegInputModsMatchClass : AsmOperandClass {
let Name = "VRegWithIntInputMods";

View File

@ -499,6 +499,59 @@ let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
} // End SubtargetPredicate = isGFX9Only
class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
let HasExt = 1;
let DstRCSDWA = getVALUDstForVT<vt>.ret;
let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0,
clampmod:$clamp, omod:$omod, src0_sel:$src0_sel);
let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel
let AsmSDWA9 = AsmSDWA;
let EmitDstSel = 0;
}
def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>;
def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>;
let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
SchedRW = [WriteFloatCvt] in {
defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>;
defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>;
defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>;
defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>;
}
class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
(f32 (node i32:$src, index)),
!if (index,
(inst_sdwa 0, $src, 0, 0, index),
(inst_e32 $src))
>;
foreach Index = [0, 1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
}
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
(v2f32 (node i32:$src, index)),
!if (index,
(inst_sdwa 0, $src, 0, 0, SDWA.WORD_1),
(inst_e32 $src))
>;
foreach Index = [0, -1] in {
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
}
let SubtargetPredicate = isGFX10Plus in {
defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>;
@ -1106,11 +1159,36 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
}
multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> {
let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
defm NAME : VOP1_Real_e32e64_vi <op>;
}
foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
let Inst{42-40} = 6;
}
foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
let OtherPredicates = [HasFP8Insts] in {
defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
}
//===----------------------------------------------------------------------===//
// GFX10
//===----------------------------------------------------------------------===//

View File

@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag<
}];
}
def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
VGPR_32:$vdst_in, op_sel0:$op_sel);
let HasClamp = 0;
let HasExtVOP3DPP = 0;
}
def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
op_sel0:$op_sel);
let HasClamp = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 1;
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
getAsmVOP3OpSel<3, HasClamp,
HasSrc0FloatMods, HasSrc1FloatMods,
HasSrc2FloatMods>.ret);
let HasExtVOP3DPP = 0;
}
let SubtargetPredicate = isGFX9Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
let SubtargetPredicate = isGFX940Plus in
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
SchedRW = [WriteFloatCvt] in {
let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>;
defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>;
}
// These instructions have non-standard use of op_sel. In particular they are
// using op_sel bits 2 and 3 while only having two sources. Therefore dummy
// src2 is used to hold the op_sel value.
let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in {
defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
}
}
class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat<
(i32 (node f32:$src0, f32:$src1, i32:$old, index)),
(inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0))
>;
class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat<
(i32 (node f32:$src0, i32:$src1, i32:$old, index)),
(inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1,
!if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0))
>;
foreach Index = [0, -1] in {
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
}
foreach Index = [0, 1, 2, 3] in {
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
}
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
@ -1161,6 +1222,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
let Inst{13} = src2_modifiers{2}; // op_sel(2)
}
}
multiclass VOP3Interp_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
@ -1352,3 +1420,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
let OtherPredicates = [HasFP8Insts] in {
defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
}

View File

@ -0,0 +1,190 @@
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32)
declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32)
declare <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32, i1)
declare <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32, i1)
declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1)
declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0:
; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}}
define float @test_cvt_f32_bf8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte1:
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
define float @test_cvt_f32_bf8_byte1(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte2:
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
define float @test_cvt_f32_bf8_byte2(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte3:
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
define float @test_cvt_f32_bf8_byte3(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 3)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0:
; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}}
define float @test_cvt_f32_fp8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte1:
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
define float @test_cvt_f32_fp8_byte1(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte2:
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
define float @test_cvt_f32_fp8_byte2(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte3:
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
define float @test_cvt_f32_fp8_byte3(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 3)
ret float %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_f32_bf8_word0:
; GCN: v_cvt_pk_f32_bf8_e32 v[0:1], v0{{$}}
define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
ret <2 x float> %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_f32_bf8_word1:
; GCN: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
ret <2 x float> %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_f32_fp8_word0:
; GCN: v_cvt_pk_f32_fp8_e32 v[0:1], v0{{$}}
define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
ret <2 x float> %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_f32_fp8_word1:
; GCN: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
ret <2 x float> %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_bf8_f32_word0:
; GCN: v_cvt_pk_bf8_f32 v2, v0, v1{{$}}
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 false)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_bf8_f32_word1:
; GCN: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 true)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_fp8_f32_word0:
; GCN: v_cvt_pk_fp8_f32 v2, v0, v1{{$}}
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 false)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_pk_fp8_f32_word1:
; GCN: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 true)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte0:
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1{{$}}
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 0)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte1:
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 1)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte2:
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 2)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte3:
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 3)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte0:
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1{{$}}
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 0)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte1:
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 1)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte2:
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 2)
ret i32 %ret
}
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte3:
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
; GCN: v_mov_b32_e32 v0, v2
define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
ret i32 %ret
}

View File

@ -401,3 +401,211 @@ buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1
// GFX10: error: instruction not supported on this GPU
// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e]
v_cvt_f32_bf8 v1, s3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e]
v_cvt_f32_bf8 v1, 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e]
v_cvt_f32_bf8 v1, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00]
v_cvt_f32_bf8 v1, s3 src0_sel:BYTE_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff]
v_cvt_f32_bf8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08]
v_cvt_f32_bf8 v1, s3 mul:2
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00]
v_cvt_f32_bf8 v1, s3 clamp mul:2 src0_sel:BYTE_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00]
v_cvt_f32_bf8 v1, s3 clamp
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e]
v_cvt_f32_fp8 v1, s3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e]
v_cvt_f32_fp8 v1, 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e]
v_cvt_f32_fp8 v1, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00]
v_cvt_f32_fp8 v1, s3 src0_sel:BYTE_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff]
v_cvt_f32_fp8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08]
v_cvt_f32_fp8 v1, s3 mul:2
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00]
v_cvt_f32_fp8 v1, s3 clamp mul:2 src0_sel:BYTE_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00]
v_cvt_f32_fp8 v1, s3 clamp
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00]
v_cvt_f32_fp8 v1, 3 src0_sel:BYTE_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e]
v_cvt_pk_f32_bf8 v[2:3], s3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e]
v_cvt_pk_f32_bf8 v[2:3], 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e]
v_cvt_pk_f32_bf8 v[2:3], v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00]
v_cvt_pk_f32_bf8 v[2:3], s3 src0_sel:WORD_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff]
v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08]
v_cvt_pk_f32_bf8 v[2:3], s3 mul:2
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00]
v_cvt_pk_f32_bf8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00]
v_cvt_pk_f32_bf8 v[2:3], s3 clamp
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e]
v_cvt_pk_f32_fp8 v[2:3], s3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e]
v_cvt_pk_f32_fp8 v[2:3], 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e]
v_cvt_pk_f32_fp8 v[2:3], v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00]
v_cvt_pk_f32_fp8 v[2:3], s3 src0_sel:WORD_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00]
v_cvt_pk_f32_fp8 v[2:3], 3 src0_sel:WORD_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff]
v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08]
v_cvt_pk_f32_fp8 v[2:3], s3 mul:2
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00]
v_cvt_pk_f32_fp8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00]
v_cvt_pk_f32_fp8 v[2:3], s3 clamp
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00]
v_cvt_pk_bf8_f32 v1, v2, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20]
v_cvt_pk_bf8_f32 v1, -v2, |v3|
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00]
v_cvt_pk_bf8_f32 v1, s2, 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00]
v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1]
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00]
v_cvt_pk_fp8_f32 v1, v2, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20]
v_cvt_pk_fp8_f32 v1, -v2, |v3|
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00]
v_cvt_pk_fp8_f32 v1, s2, 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00]
v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1]
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f32 v1, v2, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00]
v_cvt_sr_bf8_f32 v1, s2, 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1]
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00]
v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1]
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20]
v_cvt_sr_bf8_f32 v1, -|s2|, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f32 v1, v2, v3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00]
v_cvt_sr_fp8_f32 v1, s2, 3
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1]
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00]
v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1]
// NOT-GFX940: error: instruction not supported on this GPU
// GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20]
v_cvt_sr_fp8_f32 v1, -|s2|, v3

View File

@ -72,6 +72,30 @@ buffer_wbl2 scc
v_dot2_u32_u16 v0, 1, v0, s2 op_sel:[0,1,0,1] op_sel_hi:[0,0,1,1]
// GFX940: error: invalid op_sel operand
v_cvt_f32_fp8 v1, sext(v3) src0_sel:BYTE_1
// GFX940: error: not a valid operand.
v_cvt_pk_f32_bf8 v[2:3], sext(v3) src0_sel:BYTE_1
// GFX940: error: not a valid operand.
v_cvt_sr_bf8_f32 v1, v2, -v3
// GFX940: error: not a valid operand.
v_cvt_sr_fp8_f32 v1, v2, -v3
// GFX940: error: not a valid operand.
v_cvt_sr_fp8_f32 v1, v2, v3 clamp
// GFX940: error: invalid operand for instruction
v_cvt_sr_fp8_f32 v1, v2, v3 mul:2
// GFX940: error: invalid operand for instruction
v_cvt_pk_fp8_f32 v1, v2, v3 clamp
// GFX940: error: invalid operand for instruction
v_cvt_pk_fp8_f32 v1, v2, v3 mul:2
// GFX940: error: invalid operand for instruction
s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_LO)
// GFX940: error: specified hardware register is not supported on this GPU

View File

@ -263,3 +263,159 @@
# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03
# GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e]
0x03,0xaa,0x02,0x7e
# GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e]
0x83,0xaa,0x02,0x7e
# GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e]
0x03,0xab,0x02,0x7e
# GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00]
0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00
# GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff]
0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff
# GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08]
0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08
# GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00]
0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00
# GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00]
0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00
# GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e]
0x03,0xa8,0x02,0x7e
# GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e]
0x83,0xa8,0x02,0x7e
# GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e]
0x03,0xa9,0x02,0x7e
# GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00]
0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00
# GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff]
0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff
# GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08]
0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08
# GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00]
0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00
# GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00]
0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00
# GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00]
0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00
# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e]
0x03,0xae,0x04,0x7e
# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e]
0x83,0xae,0x04,0x7e
# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e]
0x03,0xaf,0x04,0x7e
# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00]
0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00
# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff]
0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff
# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08]
0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08
# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00]
0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00
# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00]
0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00
# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e]
0x03,0xac,0x04,0x7e
# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e]
0x83,0xac,0x04,0x7e
# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e]
0x03,0xad,0x04,0x7e
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00]
0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00]
0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00
# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff]
0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff
# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08]
0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00]
0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00
# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00]
0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00
# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00]
0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20]
0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20
# GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00]
0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00
# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00]
0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00]
0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20]
0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20
# GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00]
0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00
# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00]
0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00]
0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00]
0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00
# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00]
0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00]
0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20]
0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20
# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00]
0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00]
0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00
# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00]
0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00]
0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00
# GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20]
0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20