[AMDGPU][MC] Added support of lds_direct operand

See bug 39293: https://bugs.llvm.org/show_bug.cgi?id=39293

Reviewers: artem.tamazov, rampitec

Differential Revision: https://reviews.llvm.org/D57889

llvm-svn: 353524
This commit is contained in:
Dmitry Preobrazhensky 2019-02-08 14:57:37 +00:00
parent 01d6bfc94d
commit 942c273d64
9 changed files with 301 additions and 1 deletions

View File

@ -686,6 +686,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::XNACK_MASK_HI:
llvm_unreachable("xnack_mask registers should not be used");
case AMDGPU::LDS_DIRECT:
llvm_unreachable("lds_direct register should not be used");
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:

View File

@ -1095,6 +1095,7 @@ private:
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool validateLdsDirect(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@ -1599,6 +1600,8 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Case("vcc", AMDGPU::VCC)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("xnack_mask", AMDGPU::XNACK_MASK)
.Case("lds_direct", AMDGPU::LDS_DIRECT)
.Case("src_lds_direct", AMDGPU::LDS_DIRECT)
.Case("m0", AMDGPU::M0)
.Case("scc", AMDGPU::SCC)
.Case("tba", AMDGPU::TBA)
@ -2465,6 +2468,86 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
return true;
}
bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
using namespace SIInstrFlags;
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
// lds_direct register is defined so that it can be used
// with 9-bit operands only. Ignore encodings which do not accept these.
if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
return true;
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
const int SrcIndices[] = { Src1Idx, Src2Idx };
// lds_direct cannot be specified as either src1 or src2.
for (int SrcIdx : SrcIndices) {
if (SrcIdx == -1) break;
const MCOperand &Src = Inst.getOperand(SrcIdx);
if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
return false;
}
}
if (Src0Idx == -1)
return true;
const MCOperand &Src = Inst.getOperand(Src0Idx);
if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
return true;
// lds_direct is specified as src0. Check additional limitations.
// FIXME: This is a workaround for bug 37943
// which allows 64-bit VOP3 opcodes use 32-bit operands.
if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4)
return false;
// Documentation does not disable lds_direct for SDWA, but SP3 assembler does.
// FIXME: This inconsistence needs to be investigated further.
if (Desc.TSFlags & SIInstrFlags::SDWA)
return false;
// The following opcodes do not accept lds_direct which is explicitly stated
// in AMD documentation. However SP3 disables lds_direct for most other 'rev'
// opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32).
// FIXME: This inconsistence needs to be investigated further.
switch (Opcode) {
case AMDGPU::V_LSHLREV_B32_e32_si:
case AMDGPU::V_LSHLREV_B32_e64_si:
case AMDGPU::V_LSHLREV_B16_e32_vi:
case AMDGPU::V_LSHLREV_B16_e64_vi:
case AMDGPU::V_LSHLREV_B32_e32_vi:
case AMDGPU::V_LSHLREV_B32_e64_vi:
case AMDGPU::V_LSHLREV_B64_vi:
case AMDGPU::V_LSHRREV_B32_e32_si:
case AMDGPU::V_LSHRREV_B32_e64_si:
case AMDGPU::V_LSHRREV_B16_e32_vi:
case AMDGPU::V_LSHRREV_B16_e64_vi:
case AMDGPU::V_LSHRREV_B32_e32_vi:
case AMDGPU::V_LSHRREV_B32_e64_vi:
case AMDGPU::V_LSHRREV_B64_vi:
case AMDGPU::V_ASHRREV_I32_e64_si:
case AMDGPU::V_ASHRREV_I32_e32_si:
case AMDGPU::V_ASHRREV_I16_e32_vi:
case AMDGPU::V_ASHRREV_I16_e64_vi:
case AMDGPU::V_ASHRREV_I32_e32_vi:
case AMDGPU::V_ASHRREV_I32_e64_vi:
case AMDGPU::V_ASHRREV_I64_vi:
case AMDGPU::V_PK_LSHLREV_B16_vi:
case AMDGPU::V_PK_LSHRREV_B16_vi:
case AMDGPU::V_PK_ASHRREV_I16_vi:
return false;
default:
return true;
}
}
bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
@ -2500,6 +2583,11 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc) {
if (!validateLdsDirect(Inst)) {
Error(IDLoc,
"invalid use of lds_direct");
return false;
}
if (!validateSOPLiteral(Inst)) {
Error(IDLoc,
"only one literal operand is allowed");

View File

@ -781,6 +781,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
// ToDo: no support for execz register
case 252: break;
case 253: return createRegOperand(SCC);
case 254: return createRegOperand(LDS_DIRECT);
default: break;
}
return errOperand(Val, "unknown operand encoding " + Twine(Val));

View File

@ -268,6 +268,9 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
case AMDGPU::XNACK_MASK:
O << "xnack_mask";
return;
case AMDGPU::LDS_DIRECT:
O << "src_lds_direct";
return;
case AMDGPU::VCC_LO:
O << "vcc_lo";
return;

View File

@ -163,6 +163,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve xnack_mask registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
// Reserve lds_direct register - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);

View File

@ -75,6 +75,8 @@ def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
def LDS_DIRECT : SIReg <"lds_direct", 254>;
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
@ -409,6 +411,12 @@ def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
let CopyCost = -1;
}
def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add LDS_DIRECT)> {
let isAllocatable = 0;
let CopyCost = -1;
}
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@ -545,7 +553,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
}
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32)> {
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
}

View File

@ -0,0 +1,59 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9
//---------------------------------------------------------------------------//
// lds_direct may be used only with vector ALU instructions
//---------------------------------------------------------------------------//
s_and_b32 s2, lds_direct, s1
// NOGFX9: error
//---------------------------------------------------------------------------//
// lds_direct may not be used with V_{LSHL,LSHR,ASHL}REV opcodes
//---------------------------------------------------------------------------//
v_ashrrev_i16 v0, lds_direct, v0
// NOGFX9: error
v_ashrrev_i32 v0, lds_direct, v0
// NOGFX9: error
v_lshlrev_b16 v0, lds_direct, v0
// NOGFX9: error
v_lshlrev_b32 v0, lds_direct, v0
// NOGFX9: error
v_lshrrev_b16 v0, lds_direct, v0
// NOGFX9: error
v_lshrrev_b32 v0, lds_direct, v0
// NOGFX9: error
v_pk_ashrrev_i16 v0, lds_direct, v0
// NOGFX9: error
v_pk_lshlrev_b16 v0, lds_direct, v0
// NOGFX9: error
v_pk_lshrrev_b16 v0, lds_direct, v0
// NOGFX9: error
//---------------------------------------------------------------------------//
// lds_direct cannot be used with 64-bit and larger operands
//---------------------------------------------------------------------------//
v_add_f64 v[0:1], lds_direct, v[0:1]
// NOGFX9: error
//---------------------------------------------------------------------------//
// Only SRC0 may specify lds_direct
//---------------------------------------------------------------------------//
v_add_i32 v0, v0, lds_direct
// NOGFX9: error
v_add_i32 lds_direct, v0, v0
// NOGFX9: error
v_fma_f32 v0, v0, v0, lds_direct
// NOGFX9: error

View File

@ -0,0 +1,116 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9
//---------------------------------------------------------------------------//
// VOP1/3
//---------------------------------------------------------------------------//
v_mov_b32 v0, src_lds_direct
// GFX9: v_mov_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x02,0x00,0x7e]
v_mov_b32_e64 v0, src_lds_direct
// GFX9: v_mov_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00]
v_cvt_f64_i32 v[0:1], src_lds_direct
// GFX9: v_cvt_f64_i32_e32 v[0:1], src_lds_direct ; encoding: [0xfe,0x08,0x00,0x7e]
v_cvt_f64_i32_e64 v[0:1], src_lds_direct
// GFX9: v_cvt_f64_i32_e64 v[0:1], src_lds_direct ; encoding: [0x00,0x00,0x44,0xd1,0xfe,0x00,0x00,0x00]
v_mov_fed_b32 v0, src_lds_direct
// GFX9: v_mov_fed_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x12,0x00,0x7e]
v_mov_fed_b32_e64 v0, src_lds_direct
// GFX9: v_mov_fed_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x49,0xd1,0xfe,0x00,0x00,0x00]
v_fract_f32 v0, src_lds_direct
// GFX9: v_fract_f32_e32 v0, src_lds_direct ; encoding: [0xfe,0x36,0x00,0x7e]
v_fract_f32_e64 v0, src_lds_direct
// GFX9: v_fract_f32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x5b,0xd1,0xfe,0x00,0x00,0x00]
v_cvt_f16_u16 v0, src_lds_direct
// GFX9: v_cvt_f16_u16_e32 v0, src_lds_direct ; encoding: [0xfe,0x72,0x00,0x7e]
//---------------------------------------------------------------------------//
// VOP2/3
//---------------------------------------------------------------------------//
v_cndmask_b32 v0, src_lds_direct, v0, vcc
// GFX9: v_cndmask_b32_e32 v0, src_lds_direct, v0, vcc ; encoding: [0xfe,0x00,0x00,0x00]
v_cndmask_b32_e64 v0, src_lds_direct, v0, s[0:1]
// GFX9: v_cndmask_b32_e64 v0, src_lds_direct, v0, s[0:1] ; encoding: [0x00,0x00,0x00,0xd1,0xfe,0x00,0x02,0x00]
v_add_f32 v0, src_lds_direct, v0
// GFX9: v_add_f32_e32 v0, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x02]
v_add_f32_e64 v0, src_lds_direct, v0
// GFX9: v_add_f32_e64 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x01,0xd1,0xfe,0x00,0x02,0x00]
v_mul_i32_i24 v0, src_lds_direct, v0
// GFX9: v_mul_i32_i24_e32 v0, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x0c]
v_add_co_u32 v0, vcc, src_lds_direct, v0
// GFX9: v_add_co_u32_e32 v0, vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x32]
//---------------------------------------------------------------------------//
// VOP3
//---------------------------------------------------------------------------//
v_add_co_u32_e64 v0, s[0:1], src_lds_direct, v0
// GFX9: v_add_co_u32_e64 v0, s[0:1], src_lds_direct, v0 ; encoding: [0x00,0x00,0x19,0xd1,0xfe,0x00,0x02,0x00]
v_madmk_f16 v0, src_lds_direct, 0x1121, v0
// GFX9: v_madmk_f16 v0, src_lds_direct, 0x1121, v0 ; encoding: [0xfe,0x00,0x00,0x48,0x21,0x11,0x00,0x00]
v_madak_f16 v0, src_lds_direct, v0, 0x1121
// GFX9: v_madak_f16 v0, src_lds_direct, v0, 0x1121 ; encoding: [0xfe,0x00,0x00,0x4a,0x21,0x11,0x00,0x00]
v_mad_f32 v0, src_lds_direct, v0, v0
// GFX9: v_mad_f32 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xc1,0xd1,0xfe,0x00,0x02,0x04]
v_fma_f32 v0, src_lds_direct, v0, v0
// GFX9: v_fma_f32 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0xfe,0x00,0x02,0x04]
v_min3_i16 v0, src_lds_direct, v0, v0
// GFX9: v_min3_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf5,0xd1,0xfe,0x00,0x02,0x04]
v_max3_f16 v0, src_lds_direct, v0, v0
// GFX9: v_max3_f16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf7,0xd1,0xfe,0x00,0x02,0x04]
//---------------------------------------------------------------------------//
// VOP3P
//---------------------------------------------------------------------------//
v_pk_mad_i16 v0, src_lds_direct, v0, v0
// GFX9: v_pk_mad_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c]
v_pk_add_i16 v0, src_lds_direct, v0
// GFX9: v_pk_add_i16 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x82,0xd3,0xfe,0x00,0x02,0x18]
//---------------------------------------------------------------------------//
// VOPC
//---------------------------------------------------------------------------//
v_cmp_lt_f16 vcc, src_lds_direct, v0
// GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c]
v_cmp_eq_f32 vcc, src_lds_direct, v0
// GFX9: v_cmp_eq_f32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x84,0x7c]
v_cmpx_neq_f32 vcc, src_lds_direct, v0
// GFX9: v_cmpx_neq_f32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xba,0x7c]
//---------------------------------------------------------------------------//
// lds_direct alias
//---------------------------------------------------------------------------//
v_cmp_lt_f16 vcc, lds_direct, v0
// GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c]
//---------------------------------------------------------------------------//
// FIXME: enable lds_direct for the following opcodes and add tests
//---------------------------------------------------------------------------//
//v_readfirstlane_b32 s0, src_lds_direct
//v_readlane_b32 s0, src_lds_direct, s0

View File

@ -0,0 +1,19 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX9
# GFX9: v_mov_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x02,0x00,0x7e]
0xfe,0x02,0x00,0x7e
# GFX9: v_mov_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00]
0x00,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00
# GFX9: v_add_f32_e32 v0, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x02]
0xfe,0x00,0x00,0x02
# GFX9: v_pk_mad_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c]
0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c
# GFX9: v_pk_mul_lo_u16 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x81,0xd3,0xfe,0x00,0x02,0x18]
0x00,0x00,0x81,0xd3,0xfe,0x00,0x02,0x18
# GFX9: v_cmpx_le_i32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xa6,0x7d]
0xfe,0x00,0xa6,0x7d