forked from OSchip/llvm-project
[AMDGPU][MC] Enable lds_direct operand for v_readfirstlane_b32, v_readlane_b32 and v_writelane_b32
See bug 40662: https://bugs.llvm.org/show_bug.cgi?id=40662 Reviewers: artem.tamazov, arsenm, rampitec Differential Revision: https://reviews.llvm.org/D58713 llvm-svn: 355312
This commit is contained in:
parent
9735d9011a
commit
6023d5990d
|
@ -415,6 +415,11 @@ public:
|
|||
return isSSrcF16();
|
||||
}
|
||||
|
||||
bool isSSrcOrLdsB32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
|
||||
isLiteralImm(MVT::i32) || isExpr();
|
||||
}
|
||||
|
||||
bool isVCSrcB32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
|
||||
}
|
||||
|
@ -2477,6 +2482,73 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool IsRevOpcode(const unsigned Opcode)
|
||||
{
|
||||
switch (Opcode) {
|
||||
case AMDGPU::V_SUBREV_F32_e32:
|
||||
case AMDGPU::V_SUBREV_F32_e64:
|
||||
case AMDGPU::V_SUBREV_F32_e32_si:
|
||||
case AMDGPU::V_SUBREV_F32_e32_vi:
|
||||
case AMDGPU::V_SUBREV_F32_e64_si:
|
||||
case AMDGPU::V_SUBREV_F32_e64_vi:
|
||||
case AMDGPU::V_SUBREV_I32_e32:
|
||||
case AMDGPU::V_SUBREV_I32_e64:
|
||||
case AMDGPU::V_SUBREV_I32_e32_si:
|
||||
case AMDGPU::V_SUBREV_I32_e64_si:
|
||||
case AMDGPU::V_SUBBREV_U32_e32:
|
||||
case AMDGPU::V_SUBBREV_U32_e64:
|
||||
case AMDGPU::V_SUBBREV_U32_e32_si:
|
||||
case AMDGPU::V_SUBBREV_U32_e32_vi:
|
||||
case AMDGPU::V_SUBBREV_U32_e64_si:
|
||||
case AMDGPU::V_SUBBREV_U32_e64_vi:
|
||||
case AMDGPU::V_SUBREV_U32_e32:
|
||||
case AMDGPU::V_SUBREV_U32_e64:
|
||||
case AMDGPU::V_SUBREV_U32_e32_gfx9:
|
||||
case AMDGPU::V_SUBREV_U32_e32_vi:
|
||||
case AMDGPU::V_SUBREV_U32_e64_gfx9:
|
||||
case AMDGPU::V_SUBREV_U32_e64_vi:
|
||||
case AMDGPU::V_SUBREV_F16_e32:
|
||||
case AMDGPU::V_SUBREV_F16_e64:
|
||||
case AMDGPU::V_SUBREV_F16_e32_vi:
|
||||
case AMDGPU::V_SUBREV_F16_e64_vi:
|
||||
case AMDGPU::V_SUBREV_U16_e32:
|
||||
case AMDGPU::V_SUBREV_U16_e64:
|
||||
case AMDGPU::V_SUBREV_U16_e32_vi:
|
||||
case AMDGPU::V_SUBREV_U16_e64_vi:
|
||||
case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
|
||||
case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
|
||||
case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
|
||||
case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
|
||||
case AMDGPU::V_LSHLREV_B32_e32_si:
|
||||
case AMDGPU::V_LSHLREV_B32_e64_si:
|
||||
case AMDGPU::V_LSHLREV_B16_e32_vi:
|
||||
case AMDGPU::V_LSHLREV_B16_e64_vi:
|
||||
case AMDGPU::V_LSHLREV_B32_e32_vi:
|
||||
case AMDGPU::V_LSHLREV_B32_e64_vi:
|
||||
case AMDGPU::V_LSHLREV_B64_vi:
|
||||
case AMDGPU::V_LSHRREV_B32_e32_si:
|
||||
case AMDGPU::V_LSHRREV_B32_e64_si:
|
||||
case AMDGPU::V_LSHRREV_B16_e32_vi:
|
||||
case AMDGPU::V_LSHRREV_B16_e64_vi:
|
||||
case AMDGPU::V_LSHRREV_B32_e32_vi:
|
||||
case AMDGPU::V_LSHRREV_B32_e64_vi:
|
||||
case AMDGPU::V_LSHRREV_B64_vi:
|
||||
case AMDGPU::V_ASHRREV_I32_e64_si:
|
||||
case AMDGPU::V_ASHRREV_I32_e32_si:
|
||||
case AMDGPU::V_ASHRREV_I16_e32_vi:
|
||||
case AMDGPU::V_ASHRREV_I16_e64_vi:
|
||||
case AMDGPU::V_ASHRREV_I32_e32_vi:
|
||||
case AMDGPU::V_ASHRREV_I32_e64_vi:
|
||||
case AMDGPU::V_ASHRREV_I64_vi:
|
||||
case AMDGPU::V_PK_LSHLREV_B16_vi:
|
||||
case AMDGPU::V_PK_LSHRREV_B16_vi:
|
||||
case AMDGPU::V_PK_ASHRREV_I16_vi:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
|
||||
|
||||
using namespace SIInstrFlags;
|
||||
|
@ -2511,50 +2583,7 @@ bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
|
|||
return true;
|
||||
|
||||
// lds_direct is specified as src0. Check additional limitations.
|
||||
|
||||
// FIXME: This is a workaround for bug 37943
|
||||
// which allows 64-bit VOP3 opcodes use 32-bit operands.
|
||||
if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4)
|
||||
return false;
|
||||
|
||||
// Documentation does not disable lds_direct for SDWA, but SP3 assembler does.
|
||||
// FIXME: This inconsistence needs to be investigated further.
|
||||
if (Desc.TSFlags & SIInstrFlags::SDWA)
|
||||
return false;
|
||||
|
||||
// The following opcodes do not accept lds_direct which is explicitly stated
|
||||
// in AMD documentation. However SP3 disables lds_direct for most other 'rev'
|
||||
// opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32).
|
||||
// FIXME: This inconsistence needs to be investigated further.
|
||||
switch (Opcode) {
|
||||
case AMDGPU::V_LSHLREV_B32_e32_si:
|
||||
case AMDGPU::V_LSHLREV_B32_e64_si:
|
||||
case AMDGPU::V_LSHLREV_B16_e32_vi:
|
||||
case AMDGPU::V_LSHLREV_B16_e64_vi:
|
||||
case AMDGPU::V_LSHLREV_B32_e32_vi:
|
||||
case AMDGPU::V_LSHLREV_B32_e64_vi:
|
||||
case AMDGPU::V_LSHLREV_B64_vi:
|
||||
case AMDGPU::V_LSHRREV_B32_e32_si:
|
||||
case AMDGPU::V_LSHRREV_B32_e64_si:
|
||||
case AMDGPU::V_LSHRREV_B16_e32_vi:
|
||||
case AMDGPU::V_LSHRREV_B16_e64_vi:
|
||||
case AMDGPU::V_LSHRREV_B32_e32_vi:
|
||||
case AMDGPU::V_LSHRREV_B32_e64_vi:
|
||||
case AMDGPU::V_LSHRREV_B64_vi:
|
||||
case AMDGPU::V_ASHRREV_I32_e64_si:
|
||||
case AMDGPU::V_ASHRREV_I32_e32_si:
|
||||
case AMDGPU::V_ASHRREV_I16_e32_vi:
|
||||
case AMDGPU::V_ASHRREV_I16_e64_vi:
|
||||
case AMDGPU::V_ASHRREV_I32_e32_vi:
|
||||
case AMDGPU::V_ASHRREV_I32_e64_vi:
|
||||
case AMDGPU::V_ASHRREV_I64_vi:
|
||||
case AMDGPU::V_PK_LSHLREV_B16_vi:
|
||||
case AMDGPU::V_PK_LSHRREV_B16_vi:
|
||||
case AMDGPU::V_PK_ASHRREV_I16_vi:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
|
||||
|
|
|
@ -97,6 +97,7 @@ static DecodeStatus StaticDecoderName(MCInst &Inst, \
|
|||
DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
|
||||
|
||||
DECODE_OPERAND_REG(VGPR_32)
|
||||
DECODE_OPERAND_REG(VRegOrLds_32)
|
||||
DECODE_OPERAND_REG(VS_32)
|
||||
DECODE_OPERAND_REG(VS_64)
|
||||
DECODE_OPERAND_REG(VS_128)
|
||||
|
@ -108,6 +109,7 @@ DECODE_OPERAND_REG(VReg_128)
|
|||
DECODE_OPERAND_REG(SReg_32)
|
||||
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
|
||||
DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
|
||||
DECODE_OPERAND_REG(SRegOrLds_32)
|
||||
DECODE_OPERAND_REG(SReg_64)
|
||||
DECODE_OPERAND_REG(SReg_64_XEXEC)
|
||||
DECODE_OPERAND_REG(SReg_128)
|
||||
|
@ -469,6 +471,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
|
|||
return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
|
||||
return decodeSrcOp(OPW32, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
|
||||
return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
|
||||
}
|
||||
|
@ -500,6 +506,13 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
|
|||
return decodeOperand_SReg_32(Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
|
||||
// table-gen generated disassembler doesn't care about operand types
|
||||
// leaving only registry class so SSrc_32 operand turns into SReg_32
|
||||
// and therefore we accept immediates and literals here as well
|
||||
return decodeSrcOp(OPW32, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
|
||||
return decodeSrcOp(OPW64, Val);
|
||||
}
|
||||
|
|
|
@ -71,6 +71,8 @@ public:
|
|||
DecodeStatus convertMIMGInst(MCInst &MI) const;
|
||||
|
||||
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
|
||||
MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const;
|
||||
|
||||
MCOperand decodeOperand_VS_32(unsigned Val) const;
|
||||
MCOperand decodeOperand_VS_64(unsigned Val) const;
|
||||
MCOperand decodeOperand_VS_128(unsigned Val) const;
|
||||
|
@ -84,6 +86,7 @@ public:
|
|||
MCOperand decodeOperand_SReg_32(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
|
||||
MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_64(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
|
||||
MCOperand decodeOperand_SReg_128(unsigned Val) const;
|
||||
|
|
|
@ -442,6 +442,11 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
|||
let AllocationPriority = 7;
|
||||
}
|
||||
|
||||
def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
|
||||
let CopyCost = 1;
|
||||
let AllocationPriority = 8;
|
||||
|
@ -511,6 +516,11 @@ def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
|
|||
let AllocationPriority = 12;
|
||||
}
|
||||
|
||||
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add VGPR_32, LDS_DIRECT_CLASS)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
// Register class for all vector registers (VGPRs + Interploation Registers)
|
||||
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> {
|
||||
let Size = 64;
|
||||
|
@ -631,6 +641,12 @@ multiclass RegInlineOperand <string rc, string MatchName>
|
|||
|
||||
defm SSrc : RegImmOperand<"SReg", "SSrc">;
|
||||
|
||||
def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
|
||||
let OperandNamespace = "AMDGPU";
|
||||
let OperandType = "OPERAND_REG_IMM_INT32";
|
||||
let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SCSrc_* Operands with an SGPR or a inline constant
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -802,9 +802,11 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
switch (RCID) {
|
||||
case AMDGPU::SGPR_32RegClassID:
|
||||
case AMDGPU::VGPR_32RegClassID:
|
||||
case AMDGPU::VRegOrLds_32RegClassID:
|
||||
case AMDGPU::VS_32RegClassID:
|
||||
case AMDGPU::SReg_32RegClassID:
|
||||
case AMDGPU::SReg_32_XM0RegClassID:
|
||||
case AMDGPU::SRegOrLds_32RegClassID:
|
||||
return 32;
|
||||
case AMDGPU::SGPR_64RegClassID:
|
||||
case AMDGPU::VS_64RegClassID:
|
||||
|
|
|
@ -142,7 +142,7 @@ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
|
|||
// TODO: Make profile for this, there is VOP3 encoding also
|
||||
def V_READFIRSTLANE_B32 :
|
||||
InstSI <(outs SReg_32:$vdst),
|
||||
(ins VGPR_32:$src0),
|
||||
(ins VRegOrLds_32:$src0),
|
||||
"v_readfirstlane_b32 $vdst, $src0",
|
||||
[(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
|
||||
Enc32 {
|
||||
|
|
|
@ -360,7 +360,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
|||
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
|
||||
let Outs32 = (outs SReg_32:$vdst);
|
||||
let Outs64 = Outs32;
|
||||
let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
|
||||
let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1);
|
||||
let Ins64 = Ins32;
|
||||
let Asm32 = " $vdst, $src0, $src1";
|
||||
let Asm64 = Asm32;
|
||||
|
@ -765,7 +765,7 @@ defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
|
|||
|
||||
defm V_READLANE_B32 : VOP2_Real_si <0x01>;
|
||||
|
||||
let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
|
||||
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
|
||||
defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI
|
||||
|
||||
v_readfirstlane_b32 s0, lds_direct
|
||||
// CI: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e]
|
||||
|
||||
v_readlane_b32 s0, lds_direct, s0
|
||||
// CI: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0xfe,0x00,0x00,0x02]
|
||||
|
||||
v_writelane_b32 v0, lds_direct, s0
|
||||
// CI: v_writelane_b32 v0, src_lds_direct, s0 ; encoding: [0xfe,0x00,0x00,0x04]
|
|
@ -5,55 +5,77 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
|
||||
s_and_b32 s2, lds_direct, s1
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid operand for instruction
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// lds_direct may not be used with V_{LSHL,LSHR,ASHL}REV opcodes
|
||||
// lds_direct may not be used with "REV" opcodes
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
v_ashrrev_i16 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_ashrrev_i32 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_lshlrev_b16 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_lshlrev_b32 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_lshrrev_b16 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_lshrrev_b32 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_pk_ashrrev_i16 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_pk_lshlrev_b16 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_pk_lshrrev_b16 v0, lds_direct, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_subbrev_co_u32 v0, vcc, src_lds_direct, v0, vcc
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_subrev_co_u32 v0, vcc, src_lds_direct, v0
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_subrev_f16 v0, src_lds_direct, v0
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_subrev_u16 v0, src_lds_direct, v0
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_subrev_u32 v0, src_lds_direct, v0
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// lds_direct may not be used with v_writelane_b32 for VI/GFX9
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
v_writelane_b32 v0, lds_direct, s0
|
||||
// NOGFX9: error: instruction not supported on this GPU
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// lds_direct cannot be used with 64-bit and larger operands
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
v_add_f64 v[0:1], lds_direct, v[0:1]
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid operand for instruction
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// Only SRC0 may specify lds_direct
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
v_add_i32 v0, v0, lds_direct
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
||||
v_add_i32 lds_direct, v0, v0
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid operand for instruction
|
||||
|
||||
v_fma_f32 v0, v0, v0, lds_direct
|
||||
// NOGFX9: error
|
||||
// NOGFX9: error: invalid use of lds_direct
|
||||
|
|
|
@ -31,6 +31,9 @@ v_fract_f32_e64 v0, src_lds_direct
|
|||
v_cvt_f16_u16 v0, src_lds_direct
|
||||
// GFX9: v_cvt_f16_u16_e32 v0, src_lds_direct ; encoding: [0xfe,0x72,0x00,0x7e]
|
||||
|
||||
v_readfirstlane_b32 s0, src_lds_direct
|
||||
// GFX9: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e]
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// VOP2/3
|
||||
//---------------------------------------------------------------------------//
|
||||
|
@ -78,6 +81,9 @@ v_min3_i16 v0, src_lds_direct, v0, v0
|
|||
v_max3_f16 v0, src_lds_direct, v0, v0
|
||||
// GFX9: v_max3_f16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf7,0xd1,0xfe,0x00,0x02,0x04]
|
||||
|
||||
v_readlane_b32 s0, src_lds_direct, s0
|
||||
// GFX9: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00]
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// VOP3P
|
||||
//---------------------------------------------------------------------------//
|
||||
|
@ -107,10 +113,3 @@ v_cmpx_neq_f32 vcc, src_lds_direct, v0
|
|||
|
||||
v_cmp_lt_f16 vcc, lds_direct, v0
|
||||
// GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c]
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// FIXME: enable lds_direct for the following opcodes and add tests
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
//v_readfirstlane_b32 s0, src_lds_direct
|
||||
//v_readlane_b32 s0, src_lds_direct, s0
|
||||
|
|
|
@ -17,3 +17,9 @@
|
|||
|
||||
# GFX9: v_cmpx_le_i32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xa6,0x7d]
|
||||
0xfe,0x00,0xa6,0x7d
|
||||
|
||||
# GFX9: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00]
|
||||
0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00
|
||||
|
||||
# GFX9: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e]
|
||||
0xfe,0x04,0x00,0x7e
|
||||
|
|
Loading…
Reference in New Issue