forked from OSchip/llvm-project
[AMDGPU] Support for gfx940 fp8 conversions
Differential Revision: https://reviews.llvm.org/D129902
This commit is contained in:
parent
30e53b8c03
commit
9fa5a6b7e8
|
@ -346,5 +346,14 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_bf16, "V16fV4sV8sV16fiIiIi",
|
|||
TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts")
|
||||
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
|
||||
|
||||
#undef BUILTIN
|
||||
#undef TARGET_BUILTIN
|
||||
|
|
|
@ -250,6 +250,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
|
|||
break;
|
||||
case GK_GFX940:
|
||||
Features["gfx940-insts"] = true;
|
||||
Features["fp8-insts"] = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case GK_GFX90A:
|
||||
Features["gfx90a-insts"] = true;
|
||||
|
|
|
@ -64,7 +64,7 @@
|
|||
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940
|
||||
|
||||
typedef float v2f __attribute__((ext_vector_type(2)));
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_f32_bf8
|
||||
// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
|
||||
void test_cvt_f32_bf8(global int* out, int a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_f32_bf8(a, 0);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_f32_fp8
|
||||
// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
|
||||
void test_cvt_f32_fp8(global int* out, int a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_f32_fp8(a, 1);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_pk_f32_bf8
|
||||
// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
|
||||
void test_cvt_pk_f32_bf8(global v2f* out, int a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_pk_f32_bf8(a, false);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_pk_f32_fp8
|
||||
// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
|
||||
void test_cvt_pk_f32_fp8(global v2f* out, int a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_pk_f32_fp8(a, true);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_pk_bf8_f32
|
||||
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false)
|
||||
void test_cvt_pk_bf8_f32(global int* out, int old, float a, float b)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_pk_bf8_f32(a, b, old, false);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_pk_fp8_f32
|
||||
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true)
|
||||
void test_cvt_pk_fp8_f32(global int* out, int old, float a, float b)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_pk_fp8_f32(a, b, old, true);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_sr_bf8_f32
|
||||
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2)
|
||||
void test_cvt_sr_bf8_f32(global int* out, int old, float a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_sr_bf8_f32(a, b, old, 2);
|
||||
}
|
||||
|
||||
// CHECK-GFX940-LABEL: @test_cvt_sr_fp8_f32
|
||||
// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3)
|
||||
void test_cvt_sr_fp8_f32(global int* out, int old, float a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_cvt_sr_fp8_f32(a, b, old, 3);
|
||||
}
|
|
@ -2320,6 +2320,58 @@ def int_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUMSmfmacIntrinsic<llvm_v16f32_ty,
|
|||
def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic<llvm_v4i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>;
|
||||
def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic<llvm_v16i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>;
|
||||
|
||||
// llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3]
|
||||
// byte_sel selects byte from srcA.
|
||||
def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">,
|
||||
Intrinsic<[llvm_float_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3]
|
||||
def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">,
|
||||
Intrinsic<[llvm_float_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel
|
||||
// word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes.
|
||||
def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">,
|
||||
Intrinsic<[llvm_v2f32_ty],
|
||||
[llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel.
|
||||
def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">,
|
||||
Intrinsic<[llvm_v2f32_ty],
|
||||
[llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel
|
||||
// word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes.
|
||||
def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">,
|
||||
Intrinsic<[llvm_i32_ty],
|
||||
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel
|
||||
def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">,
|
||||
Intrinsic<[llvm_i32_ty],
|
||||
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
|
||||
// byte_sel selects byte to write into vdst.
|
||||
def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">,
|
||||
Intrinsic<[llvm_i32_ty],
|
||||
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
|
||||
def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
|
||||
Intrinsic<[llvm_i32_ty],
|
||||
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Special Intrinsics for backend internal use only. No frontend
|
||||
// should emit calls to these.
|
||||
|
|
|
@ -585,6 +585,12 @@ def FeatureMAIInsts : SubtargetFeature<"mai-insts",
|
|||
"Has mAI instructions"
|
||||
>;
|
||||
|
||||
def FeatureFP8Insts : SubtargetFeature<"fp8-insts",
|
||||
"HasFP8Insts",
|
||||
"true",
|
||||
"Has fp8 and bf8 instructions"
|
||||
>;
|
||||
|
||||
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
|
||||
"HasPkFmacF16Inst",
|
||||
"true",
|
||||
|
@ -1124,6 +1130,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
|
|||
Feature64BitDPP,
|
||||
FeaturePackedFP32Ops,
|
||||
FeatureMAIInsts,
|
||||
FeatureFP8Insts,
|
||||
FeaturePkFmacF16Inst,
|
||||
FeatureAtomicFaddRtnInsts,
|
||||
FeatureAtomicFaddNoRtnInsts,
|
||||
|
@ -1704,6 +1711,9 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
|
|||
def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">,
|
||||
AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>;
|
||||
|
||||
def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureFP8Insts)>;
|
||||
|
||||
def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
|
||||
AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>;
|
||||
|
||||
|
|
|
@ -8257,6 +8257,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
|
|||
|
||||
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
|
||||
|
||||
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
|
||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
|
||||
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
|
||||
Inst.addOperand(Inst.getOperand(0));
|
||||
}
|
||||
|
||||
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
|
||||
assert(!IsPacked);
|
||||
Inst.addOperand(Inst.getOperand(0));
|
||||
|
@ -9061,12 +9067,27 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
|
|||
// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
|
||||
switch (BasicInstType) {
|
||||
case SIInstrFlags::VOP1:
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
|
||||
AMDGPU::OpName::clamp) != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTyClampSI, 0);
|
||||
}
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
|
||||
AMDGPU::OpName::omod) != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTyOModSI, 0);
|
||||
}
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
|
||||
AMDGPU::OpName::dst_sel) != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
|
||||
}
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
|
||||
AMDGPU::OpName::dst_unused) != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTySdwaDstUnused,
|
||||
DstUnused::UNUSED_PRESERVE);
|
||||
}
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
|
||||
break;
|
||||
|
||||
|
|
|
@ -145,6 +145,7 @@ protected:
|
|||
bool HasDot7Insts = false;
|
||||
bool HasDot8Insts = false;
|
||||
bool HasMAIInsts = false;
|
||||
bool HasFP8Insts = false;
|
||||
bool HasPkFmacF16Inst = false;
|
||||
bool HasAtomicFaddRtnInsts = false;
|
||||
bool HasAtomicFaddNoRtnInsts = false;
|
||||
|
@ -721,6 +722,10 @@ public:
|
|||
return HasMAIInsts;
|
||||
}
|
||||
|
||||
bool hasFP8Insts() const {
|
||||
return HasFP8Insts;
|
||||
}
|
||||
|
||||
bool hasPkFmacF16Inst() const {
|
||||
return HasPkFmacF16Inst;
|
||||
}
|
||||
|
|
|
@ -324,7 +324,8 @@ class isFloatType<ValueType SrcVT> {
|
|||
|
||||
// XXX - do v2i16 instructions?
|
||||
class isIntType<ValueType SrcVT> {
|
||||
bit ret = !or(!eq(SrcVT.Value, i16.Value),
|
||||
bit ret = !or(!eq(SrcVT.Value, i8.Value),
|
||||
!eq(SrcVT.Value, i16.Value),
|
||||
!eq(SrcVT.Value, i32.Value),
|
||||
!eq(SrcVT.Value, i64.Value),
|
||||
!eq(SrcVT.Value, v4i16.Value),
|
||||
|
@ -1411,6 +1412,10 @@ class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
|
|||
|
||||
def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
|
||||
def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
|
||||
def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
|
||||
let Name = "SDWAWithBin32InputMods";
|
||||
let ParserMethod = "parseRegOrImm";
|
||||
}
|
||||
|
||||
class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
|
||||
InputMods <matchClass> {
|
||||
|
@ -1419,6 +1424,7 @@ class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
|
|||
|
||||
def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
|
||||
def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
|
||||
def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
|
||||
|
||||
def IntVRegInputModsMatchClass : AsmOperandClass {
|
||||
let Name = "VRegWithIntInputMods";
|
||||
|
|
|
@ -499,6 +499,59 @@ let SubtargetPredicate = isGFX9Only in {
|
|||
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
|
||||
} // End SubtargetPredicate = isGFX9Only
|
||||
|
||||
class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
|
||||
let HasExtSDWA = 1;
|
||||
let HasExtSDWA9 = 1;
|
||||
let HasExt = 1;
|
||||
let DstRCSDWA = getVALUDstForVT<vt>.ret;
|
||||
let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0,
|
||||
clampmod:$clamp, omod:$omod, src0_sel:$src0_sel);
|
||||
let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel
|
||||
let AsmSDWA9 = AsmSDWA;
|
||||
let EmitDstSel = 0;
|
||||
}
|
||||
|
||||
def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>;
|
||||
def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>;
|
||||
|
||||
let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
|
||||
SchedRW = [WriteFloatCvt] in {
|
||||
defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>;
|
||||
defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>;
|
||||
defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>;
|
||||
defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>;
|
||||
}
|
||||
|
||||
class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
|
||||
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
|
||||
(f32 (node i32:$src, index)),
|
||||
!if (index,
|
||||
(inst_sdwa 0, $src, 0, 0, index),
|
||||
(inst_e32 $src))
|
||||
>;
|
||||
|
||||
foreach Index = [0, 1, 2, 3] in {
|
||||
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
|
||||
V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
|
||||
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
|
||||
V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
|
||||
}
|
||||
|
||||
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
|
||||
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
|
||||
(v2f32 (node i32:$src, index)),
|
||||
!if (index,
|
||||
(inst_sdwa 0, $src, 0, 0, SDWA.WORD_1),
|
||||
(inst_e32 $src))
|
||||
>;
|
||||
|
||||
foreach Index = [0, -1] in {
|
||||
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
|
||||
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
|
||||
def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
|
||||
V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX10Plus in {
|
||||
defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>;
|
||||
|
||||
|
@ -1106,11 +1159,36 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
|
|||
|
||||
}
|
||||
|
||||
multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> {
|
||||
let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
|
||||
defm NAME : VOP1_Real_e32e64_vi <op>;
|
||||
}
|
||||
|
||||
foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
|
||||
def _sdwa_gfx9 :
|
||||
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
|
||||
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
|
||||
let Inst{42-40} = 6;
|
||||
}
|
||||
|
||||
foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
|
||||
def _dpp_gfx9 :
|
||||
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
|
||||
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
|
||||
}
|
||||
|
||||
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
|
||||
|
||||
let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in
|
||||
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
|
||||
|
||||
let OtherPredicates = [HasFP8Insts] in {
|
||||
defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
|
||||
defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
|
||||
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
|
||||
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX10
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag<
|
|||
}];
|
||||
}
|
||||
|
||||
def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
|
||||
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
|
||||
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
|
||||
VGPR_32:$vdst_in, op_sel0:$op_sel);
|
||||
let HasClamp = 0;
|
||||
let HasExtVOP3DPP = 0;
|
||||
}
|
||||
|
||||
def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
|
||||
VOP3_OPSEL> {
|
||||
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
|
||||
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
|
||||
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
|
||||
op_sel0:$op_sel);
|
||||
let HasClamp = 0;
|
||||
let HasSrc2 = 0;
|
||||
let HasSrc2Mods = 1;
|
||||
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
|
||||
getAsmVOP3OpSel<3, HasClamp,
|
||||
HasSrc0FloatMods, HasSrc1FloatMods,
|
||||
HasSrc2FloatMods>.ret);
|
||||
let HasExtVOP3DPP = 0;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX9Plus in {
|
||||
let isCommutable = 1, isReMaterializable = 1 in {
|
||||
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
|
@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
|
|||
let SubtargetPredicate = isGFX940Plus in
|
||||
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
|
||||
|
||||
let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
|
||||
SchedRW = [WriteFloatCvt] in {
|
||||
let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
|
||||
defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>;
|
||||
defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>;
|
||||
}
|
||||
|
||||
// These instructions have non-standard use of op_sel. In particular they are
|
||||
// using op_sel bits 2 and 3 while only having two sources. Therefore dummy
|
||||
// src2 is used to hold the op_sel value.
|
||||
let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in {
|
||||
defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
|
||||
defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
|
||||
}
|
||||
}
|
||||
|
||||
class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat<
|
||||
(i32 (node f32:$src0, f32:$src1, i32:$old, index)),
|
||||
(inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0))
|
||||
>;
|
||||
|
||||
class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat<
|
||||
(i32 (node f32:$src0, i32:$src1, i32:$old, index)),
|
||||
(inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1,
|
||||
!if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0))
|
||||
>;
|
||||
|
||||
foreach Index = [0, -1] in {
|
||||
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
|
||||
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
|
||||
}
|
||||
|
||||
foreach Index = [0, 1, 2, 3] in {
|
||||
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
|
||||
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
|
||||
}
|
||||
|
||||
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
|
||||
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
|
||||
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
|
||||
|
@ -1161,6 +1222,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
|
|||
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> {
|
||||
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
|
||||
let Inst{13} = src2_modifiers{2}; // op_sel(2)
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP3Interp_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
|
||||
|
@ -1352,3 +1420,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
|
|||
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
|
||||
|
||||
defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
|
||||
|
||||
let OtherPredicates = [HasFP8Insts] in {
|
||||
defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
|
||||
defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
|
||||
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
|
||||
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32)
|
||||
declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32)
|
||||
declare <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32, i1)
|
||||
declare <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32, i1)
|
||||
declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1)
|
||||
declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
|
||||
declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
|
||||
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0:
|
||||
; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}}
|
||||
define float @test_cvt_f32_bf8_byte0(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte1:
|
||||
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
|
||||
define float @test_cvt_f32_bf8_byte1(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte2:
|
||||
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
|
||||
define float @test_cvt_f32_bf8_byte2(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte3:
|
||||
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
|
||||
define float @test_cvt_f32_bf8_byte3(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 3)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0:
|
||||
; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}}
|
||||
define float @test_cvt_f32_fp8_byte0(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte1:
|
||||
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
|
||||
define float @test_cvt_f32_fp8_byte1(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte2:
|
||||
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
|
||||
define float @test_cvt_f32_fp8_byte2(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte3:
|
||||
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
|
||||
define float @test_cvt_f32_fp8_byte3(i32 %a) {
|
||||
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 3)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_f32_bf8_word0:
|
||||
; GCN: v_cvt_pk_f32_bf8_e32 v[0:1], v0{{$}}
|
||||
define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
|
||||
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
|
||||
ret <2 x float> %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_f32_bf8_word1:
|
||||
; GCN: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
|
||||
define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
|
||||
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
|
||||
ret <2 x float> %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_f32_fp8_word0:
|
||||
; GCN: v_cvt_pk_f32_fp8_e32 v[0:1], v0{{$}}
|
||||
define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
|
||||
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
|
||||
ret <2 x float> %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_f32_fp8_word1:
|
||||
; GCN: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
|
||||
define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
|
||||
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
|
||||
ret <2 x float> %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_bf8_f32_word0:
|
||||
; GCN: v_cvt_pk_bf8_f32 v2, v0, v1{{$}}
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 false)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_bf8_f32_word1:
|
||||
; GCN: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 true)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_fp8_f32_word0:
|
||||
; GCN: v_cvt_pk_fp8_f32 v2, v0, v1{{$}}
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 false)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_pk_fp8_f32_word1:
|
||||
; GCN: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 true)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte0:
|
||||
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1{{$}}
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 0)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte1:
|
||||
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte2:
|
||||
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 2)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_bf8_f32_byte3:
|
||||
; GCN: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 3)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte0:
|
||||
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1{{$}}
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 0)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte1:
|
||||
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte2:
|
||||
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 2)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_cvt_sr_fp8_f32_byte3:
|
||||
; GCN: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
|
||||
; GCN: v_mov_b32_e32 v0, v2
|
||||
define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
|
||||
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
|
||||
ret i32 %ret
|
||||
}
|
|
@ -401,3 +401,211 @@ buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1
|
|||
// GFX10: error: instruction not supported on this GPU
|
||||
// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
|
||||
buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e]
|
||||
v_cvt_f32_bf8 v1, s3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e]
|
||||
v_cvt_f32_bf8 v1, 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e]
|
||||
v_cvt_f32_bf8 v1, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00]
|
||||
v_cvt_f32_bf8 v1, s3 src0_sel:BYTE_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff]
|
||||
v_cvt_f32_bf8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08]
|
||||
v_cvt_f32_bf8 v1, s3 mul:2
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00]
|
||||
v_cvt_f32_bf8 v1, s3 clamp mul:2 src0_sel:BYTE_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00]
|
||||
v_cvt_f32_bf8 v1, s3 clamp
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e]
|
||||
v_cvt_f32_fp8 v1, s3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e]
|
||||
v_cvt_f32_fp8 v1, 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e]
|
||||
v_cvt_f32_fp8 v1, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00]
|
||||
v_cvt_f32_fp8 v1, s3 src0_sel:BYTE_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff]
|
||||
v_cvt_f32_fp8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08]
|
||||
v_cvt_f32_fp8 v1, s3 mul:2
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00]
|
||||
v_cvt_f32_fp8 v1, s3 clamp mul:2 src0_sel:BYTE_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00]
|
||||
v_cvt_f32_fp8 v1, s3 clamp
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00]
|
||||
v_cvt_f32_fp8 v1, 3 src0_sel:BYTE_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e]
|
||||
v_cvt_pk_f32_bf8 v[2:3], s3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e]
|
||||
v_cvt_pk_f32_bf8 v[2:3], 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e]
|
||||
v_cvt_pk_f32_bf8 v[2:3], v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00]
|
||||
v_cvt_pk_f32_bf8 v[2:3], s3 src0_sel:WORD_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff]
|
||||
v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08]
|
||||
v_cvt_pk_f32_bf8 v[2:3], s3 mul:2
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00]
|
||||
v_cvt_pk_f32_bf8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00]
|
||||
v_cvt_pk_f32_bf8 v[2:3], s3 clamp
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e]
|
||||
v_cvt_pk_f32_fp8 v[2:3], s3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e]
|
||||
v_cvt_pk_f32_fp8 v[2:3], 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e]
|
||||
v_cvt_pk_f32_fp8 v[2:3], v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00]
|
||||
v_cvt_pk_f32_fp8 v[2:3], s3 src0_sel:WORD_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00]
|
||||
v_cvt_pk_f32_fp8 v[2:3], 3 src0_sel:WORD_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff]
|
||||
v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08]
|
||||
v_cvt_pk_f32_fp8 v[2:3], s3 mul:2
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00]
|
||||
v_cvt_pk_f32_fp8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00]
|
||||
v_cvt_pk_f32_fp8 v[2:3], s3 clamp
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_bf8_f32 v1, v2, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20]
|
||||
v_cvt_pk_bf8_f32 v1, -v2, |v3|
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00]
|
||||
v_cvt_pk_bf8_f32 v1, s2, 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1]
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_fp8_f32 v1, v2, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20]
|
||||
v_cvt_pk_fp8_f32 v1, -v2, |v3|
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00]
|
||||
v_cvt_pk_fp8_f32 v1, s2, 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1]
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_sr_bf8_f32 v1, v2, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00]
|
||||
v_cvt_sr_bf8_f32 v1, s2, 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1]
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1]
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20]
|
||||
v_cvt_sr_bf8_f32 v1, -|s2|, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_sr_fp8_f32 v1, v2, v3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00]
|
||||
v_cvt_sr_fp8_f32 v1, s2, 3
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1]
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1]
|
||||
|
||||
// NOT-GFX940: error: instruction not supported on this GPU
|
||||
// GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20]
|
||||
v_cvt_sr_fp8_f32 v1, -|s2|, v3
|
||||
|
|
|
@ -72,6 +72,30 @@ buffer_wbl2 scc
|
|||
v_dot2_u32_u16 v0, 1, v0, s2 op_sel:[0,1,0,1] op_sel_hi:[0,0,1,1]
|
||||
// GFX940: error: invalid op_sel operand
|
||||
|
||||
v_cvt_f32_fp8 v1, sext(v3) src0_sel:BYTE_1
|
||||
// GFX940: error: not a valid operand.
|
||||
|
||||
v_cvt_pk_f32_bf8 v[2:3], sext(v3) src0_sel:BYTE_1
|
||||
// GFX940: error: not a valid operand.
|
||||
|
||||
v_cvt_sr_bf8_f32 v1, v2, -v3
|
||||
// GFX940: error: not a valid operand.
|
||||
|
||||
v_cvt_sr_fp8_f32 v1, v2, -v3
|
||||
// GFX940: error: not a valid operand.
|
||||
|
||||
v_cvt_sr_fp8_f32 v1, v2, v3 clamp
|
||||
// GFX940: error: invalid operand for instruction
|
||||
|
||||
v_cvt_sr_fp8_f32 v1, v2, v3 mul:2
|
||||
// GFX940: error: invalid operand for instruction
|
||||
|
||||
v_cvt_pk_fp8_f32 v1, v2, v3 clamp
|
||||
// GFX940: error: invalid operand for instruction
|
||||
|
||||
v_cvt_pk_fp8_f32 v1, v2, v3 mul:2
|
||||
// GFX940: error: invalid operand for instruction
|
||||
|
||||
s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_LO)
|
||||
// GFX940: error: specified hardware register is not supported on this GPU
|
||||
|
||||
|
|
|
@ -263,3 +263,159 @@
|
|||
|
||||
# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
|
||||
0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e]
|
||||
0x03,0xaa,0x02,0x7e
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e]
|
||||
0x83,0xaa,0x02,0x7e
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e]
|
||||
0x03,0xab,0x02,0x7e
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00]
|
||||
0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff]
|
||||
0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08]
|
||||
0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00]
|
||||
0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00
|
||||
|
||||
# GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00]
|
||||
0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e]
|
||||
0x03,0xa8,0x02,0x7e
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e]
|
||||
0x83,0xa8,0x02,0x7e
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e]
|
||||
0x03,0xa9,0x02,0x7e
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00]
|
||||
0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff]
|
||||
0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08]
|
||||
0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00]
|
||||
0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00]
|
||||
0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00
|
||||
|
||||
# GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00]
|
||||
0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e]
|
||||
0x03,0xae,0x04,0x7e
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e]
|
||||
0x83,0xae,0x04,0x7e
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e]
|
||||
0x03,0xaf,0x04,0x7e
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00]
|
||||
0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff]
|
||||
0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08]
|
||||
0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00]
|
||||
0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00]
|
||||
0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e]
|
||||
0x03,0xac,0x04,0x7e
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e]
|
||||
0x83,0xac,0x04,0x7e
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e]
|
||||
0x03,0xad,0x04,0x7e
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00]
|
||||
0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00]
|
||||
0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff]
|
||||
0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08]
|
||||
0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00]
|
||||
0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00]
|
||||
0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20]
|
||||
0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20
|
||||
|
||||
# GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00]
|
||||
0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20]
|
||||
0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20
|
||||
|
||||
# GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00]
|
||||
0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00
|
||||
|
||||
# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00]
|
||||
0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20]
|
||||
0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20
|
||||
|
||||
# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00]
|
||||
0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00
|
||||
|
||||
# GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20]
|
||||
0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20
|
||||
|
|
Loading…
Reference in New Issue