[AMDGPU][MC][GFX11] Add validation of constant bus limitations for VOPD

Differential Revision: https://reviews.llvm.org/D133881
This commit is contained in:
Dmitry Preobrazhensky 2022-09-15 16:36:19 +03:00
parent c89e60bf1f
commit 0e868aff43
3 changed files with 186 additions and 57 deletions

View File

@ -3468,9 +3468,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
}
}
bool
AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
const OperandVector &Operands) {
bool AMDGPUAsmParser::validateConstantBusLimitations(
const MCInst &Inst, const OperandVector &Operands) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned LastSGPR = AMDGPU::NoRegister;
@ -3478,69 +3477,67 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
unsigned NumLiterals = 0;
unsigned LiteralSize;
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
SIInstrFlags::SDWA)) {
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
++NumLiterals;
LiteralSize = 4;
}
if (!(Desc.TSFlags &
(SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
!isVOPD(Opcode))
return true;
SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
if (SGPRUsed != AMDGPU::NoRegister) {
SGPRsUsed.insert(SGPRUsed);
++ConstantBusUseCount;
}
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
++NumLiterals;
LiteralSize = 4;
}
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
if (SGPRUsed != AMDGPU::NoRegister) {
SGPRsUsed.insert(SGPRUsed);
++ConstantBusUseCount;
}
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
OperandIndices OpIndices = getSrcOperandIndices(Opcode);
for (int OpIdx : OpIndices) {
if (OpIdx == -1) break;
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
continue;
const MCOperand &MO = Inst.getOperand(OpIdx);
if (usesConstantBus(Inst, OpIdx)) {
if (MO.isReg()) {
LastSGPR = mc2PseudoReg(MO.getReg());
// Pairs of registers with a partial intersections like these
// s0, s[0:1]
// flat_scratch_lo, flat_scratch
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
if (SGPRsUsed.insert(LastSGPR).second) {
++ConstantBusUseCount;
}
} else { // Expression or a literal
const MCOperand &MO = Inst.getOperand(OpIdx);
if (usesConstantBus(Inst, OpIdx)) {
if (MO.isReg()) {
LastSGPR = mc2PseudoReg(MO.getReg());
// Pairs of registers with a partial intersections like these
// s0, s[0:1]
// flat_scratch_lo, flat_scratch
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
if (SGPRsUsed.insert(LastSGPR).second) {
++ConstantBusUseCount;
}
} else { // Expression or a literal
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
continue; // special operand like VINTERP attr_chan
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
continue; // special operand like VINTERP attr_chan
// An instruction may use only one literal.
// This has been validated on the previous step.
// See validateVOPLiteral.
// This literal may be used as more than one operand.
// If all these operands are of the same size,
// this literal counts as one scalar value.
// Otherwise it counts as 2 scalar values.
// See "GFX10 Shader Programming", section 3.6.2.3.
// An instruction may use only one literal.
// This has been validated on the previous step.
// See validateVOPLiteral.
// This literal may be used as more than one operand.
// If all these operands are of the same size,
// this literal counts as one scalar value.
// Otherwise it counts as 2 scalar values.
// See "GFX10 Shader Programming", section 3.6.2.3.
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
if (Size < 4) Size = 4;
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
if (Size < 4)
Size = 4;
if (NumLiterals == 0) {
NumLiterals = 1;
LiteralSize = Size;
} else if (LiteralSize != Size) {
NumLiterals = 2;
}
if (NumLiterals == 0) {
NumLiterals = 1;
LiteralSize = Size;
} else if (LiteralSize != Size) {
NumLiterals = 2;
}
}
}

View File

@ -72,3 +72,72 @@ v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0,
// GFX11: error: only one literal operand is allowed
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162
// GFX11-NEXT:{{^}} ^
//===----------------------------------------------------------------------===//
// A VOPD instruction cannot use more than 2 scalar operands
//===----------------------------------------------------------------------===//
// 2 different SGPRs + LITERAL
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
// GFX11-NEXT:{{^}} ^
v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
// GFX11-NEXT:{{^}} ^
v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
// GFX11-NEXT:{{^}} ^
// 2 different SGPRs + VCC
v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
// GFX11-NEXT:{{^}} ^
v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
// GFX11-NEXT:{{^}} ^
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
// GFX11-NEXT:{{^}} ^
// SGPR + LITERAL + VCC
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
// GFX11-NEXT:{{^}} ^
v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
// GFX11-NEXT:{{^}} ^
v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
// GFX11-NEXT:{{^}} ^
v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
// GFX11-NEXT:{{^}} ^
// SGPR + VCC + VCC_LO
// This is a special case because implicit VCC operand has 64 bit size.
// SP3 does not accept this instruction as well.
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
// GFX11: error: invalid operand (violates constant bus restrictions)
// GFX11-NEXT:{{^}}v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
// GFX11-NEXT:{{^}} ^

View File

@ -39,3 +39,66 @@ v_dual_fmamk_f32 v122, v74, 0xfe0b, v162 :: v_dual_dot2acc_f32_f16 v24
v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
//===----------------------------------------------------------------------===//
// A VOPD instruction can use 2 scalar operands,
// but implicit VCC must be counted in.
//===----------------------------------------------------------------------===//
// 2 different SGPRs
v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
// SGPR + LITERAL
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
// SGPR*2 + LITERAL
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s74, v98
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
// SGPR + LITERAL*2
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
// SGPR*2 + LITERAL*2
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, s74, 2.741, v1
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
// LITERAL + VCC
v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, v2, v3
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
// LITERAL*2 + VCC
v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, 2.741, v3
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
// LITERAL*2 + VCC*2
v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_cndmask_b32 v6, 0xbabe, v3
// GFX11: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]
// SGPR*2 + VCC
v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3
// GFX11: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]
// SGPR*2 + VCC*2
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3
// GFX11: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]
// VCC*2
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3
// GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]