[AMDGPU] Truncate packed inline constant

If a packed inline constant is sign extended it must be truncated
after the shift. I.e. a constant (0xH0000, 0xHBC00), will be represented
as 0xFFFFFFFFBC000000 in the IR because the immediate is sign extended
to 64 bit. After the value shifted right by 16 to use it in a low part
with op_sel_hi it becomes 0xFFFFFFFFBC00 and does not qualify as inline
constant any longer.

Fixed the error and added verification code. Without the fix and with
the verification bug is causing pk_max_f16_literal.ll to fail.

Differential Revision: https://reviews.llvm.org/D45987

llvm-svn: 330752
This commit is contained in:
Stanislav Mekhanoshin 2018-04-24 18:17:55 +00:00
parent 81cb67ad82
commit a4bfb3c446
3 changed files with 9 additions and 2 deletions

View File

@ -178,7 +178,7 @@ static bool updateOperand(FoldCandidate &Fold,
if (!(Fold.ImmToFold & 0xffff)) {
Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
Old.ChangeToImmediate(Fold.ImmToFold >> 16);
Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
return true;
}
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);

View File

@ -2725,6 +2725,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
unsigned ConstantBusCount = 0;
unsigned LiteralCount = 0;
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
++ConstantBusCount;
@ -2744,6 +2745,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
SGPRUsed = MO.getReg();
} else {
++ConstantBusCount;
++LiteralCount;
}
}
}
@ -2751,6 +2753,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "VOP* instruction uses the constant bus more than once";
return false;
}
if (isVOP3(MI) && LiteralCount) {
ErrInfo = "VOP3 instruction uses literal";
return false;
}
}
// Verify misc. restrictions on specific instructions.

View File

@ -40,7 +40,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
; GFX9: v_pk_max_f16 v{{[0-9]+}}, -1.0, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()