forked from OSchip/llvm-project
[AMDGPU][MC][GFX11] Add validation of constant bus limitations for VOPD
Differential Revision: https://reviews.llvm.org/D133881
This commit is contained in:
parent
c89e60bf1f
commit
0e868aff43
|
@ -3468,9 +3468,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
|
||||
const OperandVector &Operands) {
|
||||
bool AMDGPUAsmParser::validateConstantBusLimitations(
|
||||
const MCInst &Inst, const OperandVector &Operands) {
|
||||
const unsigned Opcode = Inst.getOpcode();
|
||||
const MCInstrDesc &Desc = MII.get(Opcode);
|
||||
unsigned LastSGPR = AMDGPU::NoRegister;
|
||||
|
@ -3478,69 +3477,67 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
|
|||
unsigned NumLiterals = 0;
|
||||
unsigned LiteralSize;
|
||||
|
||||
if (Desc.TSFlags &
|
||||
(SIInstrFlags::VOPC |
|
||||
SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
|
||||
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
|
||||
SIInstrFlags::SDWA)) {
|
||||
// Check special imm operands (used by madmk, etc)
|
||||
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
|
||||
++NumLiterals;
|
||||
LiteralSize = 4;
|
||||
}
|
||||
if (!(Desc.TSFlags &
|
||||
(SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
|
||||
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
|
||||
!isVOPD(Opcode))
|
||||
return true;
|
||||
|
||||
SmallDenseSet<unsigned> SGPRsUsed;
|
||||
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
|
||||
if (SGPRUsed != AMDGPU::NoRegister) {
|
||||
SGPRsUsed.insert(SGPRUsed);
|
||||
++ConstantBusUseCount;
|
||||
}
|
||||
// Check special imm operands (used by madmk, etc)
|
||||
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
|
||||
++NumLiterals;
|
||||
LiteralSize = 4;
|
||||
}
|
||||
|
||||
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
|
||||
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
|
||||
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
|
||||
SmallDenseSet<unsigned> SGPRsUsed;
|
||||
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
|
||||
if (SGPRUsed != AMDGPU::NoRegister) {
|
||||
SGPRsUsed.insert(SGPRUsed);
|
||||
++ConstantBusUseCount;
|
||||
}
|
||||
|
||||
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
|
||||
OperandIndices OpIndices = getSrcOperandIndices(Opcode);
|
||||
|
||||
for (int OpIdx : OpIndices) {
|
||||
if (OpIdx == -1) break;
|
||||
for (int OpIdx : OpIndices) {
|
||||
if (OpIdx == -1)
|
||||
continue;
|
||||
|
||||
const MCOperand &MO = Inst.getOperand(OpIdx);
|
||||
if (usesConstantBus(Inst, OpIdx)) {
|
||||
if (MO.isReg()) {
|
||||
LastSGPR = mc2PseudoReg(MO.getReg());
|
||||
// Pairs of registers with a partial intersections like these
|
||||
// s0, s[0:1]
|
||||
// flat_scratch_lo, flat_scratch
|
||||
// flat_scratch_lo, flat_scratch_hi
|
||||
// are theoretically valid but they are disabled anyway.
|
||||
// Note that this code mimics SIInstrInfo::verifyInstruction
|
||||
if (SGPRsUsed.insert(LastSGPR).second) {
|
||||
++ConstantBusUseCount;
|
||||
}
|
||||
} else { // Expression or a literal
|
||||
const MCOperand &MO = Inst.getOperand(OpIdx);
|
||||
if (usesConstantBus(Inst, OpIdx)) {
|
||||
if (MO.isReg()) {
|
||||
LastSGPR = mc2PseudoReg(MO.getReg());
|
||||
// Pairs of registers with a partial intersections like these
|
||||
// s0, s[0:1]
|
||||
// flat_scratch_lo, flat_scratch
|
||||
// flat_scratch_lo, flat_scratch_hi
|
||||
// are theoretically valid but they are disabled anyway.
|
||||
// Note that this code mimics SIInstrInfo::verifyInstruction
|
||||
if (SGPRsUsed.insert(LastSGPR).second) {
|
||||
++ConstantBusUseCount;
|
||||
}
|
||||
} else { // Expression or a literal
|
||||
|
||||
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
|
||||
continue; // special operand like VINTERP attr_chan
|
||||
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
|
||||
continue; // special operand like VINTERP attr_chan
|
||||
|
||||
// An instruction may use only one literal.
|
||||
// This has been validated on the previous step.
|
||||
// See validateVOPLiteral.
|
||||
// This literal may be used as more than one operand.
|
||||
// If all these operands are of the same size,
|
||||
// this literal counts as one scalar value.
|
||||
// Otherwise it counts as 2 scalar values.
|
||||
// See "GFX10 Shader Programming", section 3.6.2.3.
|
||||
// An instruction may use only one literal.
|
||||
// This has been validated on the previous step.
|
||||
// See validateVOPLiteral.
|
||||
// This literal may be used as more than one operand.
|
||||
// If all these operands are of the same size,
|
||||
// this literal counts as one scalar value.
|
||||
// Otherwise it counts as 2 scalar values.
|
||||
// See "GFX10 Shader Programming", section 3.6.2.3.
|
||||
|
||||
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
|
||||
if (Size < 4) Size = 4;
|
||||
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
|
||||
if (Size < 4)
|
||||
Size = 4;
|
||||
|
||||
if (NumLiterals == 0) {
|
||||
NumLiterals = 1;
|
||||
LiteralSize = Size;
|
||||
} else if (LiteralSize != Size) {
|
||||
NumLiterals = 2;
|
||||
}
|
||||
if (NumLiterals == 0) {
|
||||
NumLiterals = 1;
|
||||
LiteralSize = Size;
|
||||
} else if (LiteralSize != Size) {
|
||||
NumLiterals = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,3 +72,72 @@ v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0,
|
|||
// GFX11: error: only one literal operand is allowed
|
||||
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// A VOPD instruction cannot use more than 2 scalar operands
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// 2 different SGPRs + LITERAL
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
// 2 different SGPRs + VCC
|
||||
|
||||
v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
// SGPR + LITERAL + VCC
|
||||
|
||||
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
||||
// SGPR + VCC + VCC_LO
|
||||
// This is a special case because implicit VCC operand has 64 bit size.
|
||||
// SP3 does not accept this instruction as well.
|
||||
|
||||
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
|
||||
// GFX11: error: invalid operand (violates constant bus restrictions)
|
||||
// GFX11-NEXT:{{^}}v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
|
||||
// GFX11-NEXT:{{^}} ^
|
||||
|
|
|
@ -39,3 +39,66 @@ v_dual_fmamk_f32 v122, v74, 0xfe0b, v162 :: v_dual_dot2acc_f32_f16 v24
|
|||
|
||||
v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
|
||||
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// A VOPD instruction can use 2 scalar operands,
|
||||
// but implicit VCC must be counted in.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// 2 different SGPRs
|
||||
|
||||
v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
|
||||
// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
|
||||
|
||||
// SGPR + LITERAL
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
|
||||
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
|
||||
// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
// SGPR*2 + LITERAL
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s74, v98
|
||||
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
// SGPR + LITERAL*2
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
|
||||
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
// SGPR*2 + LITERAL*2
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, s74, 2.741, v1
|
||||
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
// LITERAL + VCC
|
||||
|
||||
v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, v2, v3
|
||||
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
// LITERAL*2 + VCC
|
||||
|
||||
v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, 2.741, v3
|
||||
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
|
||||
// LITERAL*2 + VCC*2
|
||||
|
||||
v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_cndmask_b32 v6, 0xbabe, v3
|
||||
// GFX11: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]
|
||||
|
||||
// SGPR*2 + VCC
|
||||
|
||||
v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3
|
||||
// GFX11: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]
|
||||
|
||||
// SGPR*2 + VCC*2
|
||||
|
||||
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3
|
||||
// GFX11: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]
|
||||
|
||||
// VCC*2
|
||||
|
||||
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3
|
||||
// GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]
|
||||
|
|
Loading…
Reference in New Issue