llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1456 lines
46 KiB
C++
Raw Normal View History

//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
static cl::opt<bool> Keep16BitSuffixes(
"amdgpu-keep-16-bit-reg-suffixes",
cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
cl::init(false),
cl::ReallyHidden);
void AMDGPUInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
// FIXME: The current implementation of
// AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
// as an integer or we provide a name which represents a physical register.
// For CFI instructions we really want to emit a name for the DWARF register
// instead, because there may be multiple DWARF registers corresponding to a
// single physical register. One case where this problem manifests is with
// wave32/wave64 where using the physical register name is ambiguous: if we
// write e.g. `.cfi_undefined v0` we lose information about the wavefront
// size which we need to encode the register in the final DWARF. Ideally we
// would extend MC to support parsing DWARF register names so we could do
// something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
// non-pretty DWARF register names in assembly text.
OS << RegNo;
}
void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &OS) {
OS.flush();
printInstruction(MI, Address, STI, OS);
printAnnotation(OS, Annot);
}
void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
}
void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
}
void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
// It's possible to end up with a 32-bit literal used with a 16-bit operand
// with ignored high bits. Print as 32-bit anyway in that case.
int64_t Imm = MI->getOperand(OpNo).getImm();
if (isInt<16>(Imm) || isUInt<16>(Imm))
O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
else
printU32ImmOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
}
void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
}
void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
}
void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
raw_ostream &O, StringRef BitName) {
if (MI->getOperand(OpNo).getImm()) {
O << ' ' << BitName;
}
}
void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "offen");
}
void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "idxen");
}
void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "addr64");
}
void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
O << " offset:";
printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
bool IsFlatSeg = !(Desc.TSFlags &
(SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch));
if (IsFlatSeg) { // Unsigned offset
printU16ImmDecOperand(MI, OpNo, O);
} else { // Signed offset
if (AMDGPU::isGFX10Plus(STI)) {
O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
} else {
O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
}
}
}
}
void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
O << " offset0:";
printU8ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
O << " offset1:";
printU8ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printU32ImmOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << formatHex(MI->getOperand(OpNo).getImm());
}
void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printU32ImmOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "gds");
}
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto Imm = MI->getOperand(OpNo).getImm();
if (Imm & CPol::GLC)
O << " glc";
if (Imm & CPol::SLC)
O << " slc";
if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
O << " scc";
if (Imm & ~CPol::ALL)
O << " /* unexpected cache policy bit */";
}
void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
}
void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "tfe");
}
void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
O << " dmask:";
printU16ImmOperand(MI, OpNo, STI, O);
}
}
void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Dim = MI->getOperand(OpNo).getImm();
O << " dim:SQ_RSRC_IMG_";
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
if (DimInfo)
O << DimInfo->AsmSuffix;
else
O << Dim;
}
void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "unorm");
}
void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "da");
}
void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (STI.hasFeature(AMDGPU::FeatureR128A16))
printNamedBit(MI, OpNo, O, "a16");
else
printNamedBit(MI, OpNo, O, "r128");
}
void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "a16");
}
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "lwe");
AMDGPU: Turn D16 for MIMG instructions into a regular operand Summary: This allows us to reduce the number of different machine instruction opcodes, which reduces the table sizes and helps flatten the TableGen multiclass hierarchies. We can do this because for each hardware MIMG opcode, we have a full set of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata and vaddr registers. Instead of having separate D16 machine instructions, a packed D16 instructions loading e.g. 4 components can simply use the same V2 opcode variant that non-D16 instructions use. We still require a TSFlag for D16 buffer instructions, because the D16-ness of buffer instructions is part of the opcode. Renaming the flag should help avoid future confusion. The one non-obvious code change is that for gather4 instructions, the disassembler can no longer automatically decide whether to use a V2 or a V4 variant. The existing logic which choose the correct variant for other MIMG instruction is extended to cover gather4 as well. As a bonus, some of the assembler error messages are now more helpful (e.g., complaining about a wrong data size instead of a non-existing instruction). While we're at it, delete a whole bunch of dead legacy TableGen code. Change-Id: I89b02c2841c06f95e662541433e597f5d4553978 Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47434 llvm-svn: 335222
2018-06-21 21:36:01 +08:00
}
void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "d16");
}
void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "compr");
}
void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "vm");
}
[AMDGPU] New tbuffer intrinsics Summary: This commit adds new intrinsics llvm.amdgcn.raw.tbuffer.load llvm.amdgcn.struct.tbuffer.load llvm.amdgcn.raw.tbuffer.store llvm.amdgcn.struct.tbuffer.store with the following changes from the llvm.amdgcn.tbuffer.* intrinsics: * there are separate raw and struct versions: raw does not have an index arg and sets idxen=0 in the instruction, and struct always sets idxen=1 in the instruction even if the index is 0, to allow for the fact that gfx9 does bounds checking differently depending on whether idxen is set; * there is a combined format arg (dfmt+nfmt) * there is a combined cachepolicy arg (glc+slc) * there are now only two offset args: one for the offset that is included in bounds checking and swizzling, to be split between the instruction's voffset and immoffset fields, and one for the offset that is excluded from bounds checking and swizzling, to go into the instruction's soffset field. The AMDISD::TBUFFER_* SD nodes always have an index operand, all three offset operands, combined format operand, combined cachepolicy operand, and an extra idxen operand. The tbuffer pseudo- and real instructions now also have a combined format operand. The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store intrinsics continue to work. V2: Separate raw and struct intrinsics. V3: Moved extract_glc and extract_slc defs to a more sensible place. V4: Rebased on D49995. V5: Only two separate offset args instead of three. V6: Pseudo- and real instructions have joint format operand. V7: Restored optionality of dfmt and nfmt in assembler. V8: Addressed minor review comments. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D49026 Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4 llvm-svn: 340268
2018-08-21 19:06:05 +08:00
void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
}
void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::MTBUFFormat;
int OpNo =
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
assert(OpNo != -1);
unsigned Val = MI->getOperand(OpNo).getImm();
if (AMDGPU::isGFX10Plus(STI)) {
if (Val == UFMT_DEFAULT)
return;
if (isValidUnifiedFormat(Val)) {
O << " format:[" << getUnifiedFormatName(Val) << ']';
} else {
O << " format:" << Val;
}
} else {
if (Val == DFMT_NFMT_DEFAULT)
return;
if (isValidDfmtNfmt(Val, STI)) {
unsigned Dfmt;
unsigned Nfmt;
decodeDfmtNfmt(Val, Dfmt, Nfmt);
O << " format:[";
if (Dfmt != DFMT_DEFAULT) {
O << getDfmtName(Dfmt);
if (Nfmt != NFMT_DEFAULT) {
O << ',';
}
}
if (Nfmt != NFMT_DEFAULT) {
O << getNfmtName(Nfmt, STI);
}
O << ']';
} else {
O << " format:" << Val;
}
}
}
void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
const MCRegisterInfo &MRI) {
#if !defined(NDEBUG)
switch (RegNo) {
case AMDGPU::FP_REG:
case AMDGPU::SP_REG:
case AMDGPU::PRIVATE_RSRC_REG:
llvm_unreachable("pseudo-register should not ever be emitted");
case AMDGPU::SCC:
llvm_unreachable("pseudo scc should not ever be emitted");
default:
break;
}
#endif
StringRef RegName(getRegisterName(RegNo));
if (!Keep16BitSuffixes)
if (!RegName.consume_back(".l"))
RegName.consume_back(".h");
O << RegName;
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
auto Opcode = MI->getOpcode();
auto Flags = MII.get(Opcode).TSFlags;
if (OpNo == 0) {
if (Flags & SIInstrFlags::VOP3) {
if (!getVOP3IsSingle(Opcode))
O << "_e64";
} else if (Flags & SIInstrFlags::DPP) {
O << "_dpp";
} else if (Flags & SIInstrFlags::SDWA) {
O << "_sdwa";
} else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode))) {
O << "_e32";
}
O << " ";
}
printOperand(MI, OpNo, STI, O);
// Print default vcc/vcc_lo operand.
switch (Opcode) {
default: break;
case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
printDefaultVccOperand(1, STI, O);
break;
}
}
void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
O << " ";
else
O << "_e32 ";
printOperand(MI, OpNo, STI, O);
}
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
if (isInlinableIntLiteral(SImm)) {
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
O << SImm;
} else {
uint64_t Imm16 = static_cast<uint16_t>(Imm);
O << formatHex(Imm16);
}
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
}
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}
if (Imm == 0x3C00)
O<< "1.0";
else if (Imm == 0xBC00)
O<< "-1.0";
else if (Imm == 0x3800)
O<< "0.5";
else if (Imm == 0xB800)
O<< "-0.5";
else if (Imm == 0x4000)
O<< "2.0";
else if (Imm == 0xC000)
O<< "-2.0";
else if (Imm == 0x4400)
O<< "4.0";
else if (Imm == 0xC400)
O<< "-4.0";
else if (Imm == 0x3118 &&
STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) {
O << "0.15915494";
} else {
uint64_t Imm16 = static_cast<uint16_t>(Imm);
O << formatHex(Imm16);
}
}
void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
printImmediate16(Lo16, STI, O);
}
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
return;
}
if (Imm == FloatToBits(0.0f))
O << "0.0";
else if (Imm == FloatToBits(1.0f))
O << "1.0";
else if (Imm == FloatToBits(-1.0f))
O << "-1.0";
else if (Imm == FloatToBits(0.5f))
O << "0.5";
else if (Imm == FloatToBits(-0.5f))
O << "-0.5";
else if (Imm == FloatToBits(2.0f))
O << "2.0";
else if (Imm == FloatToBits(-2.0f))
O << "-2.0";
else if (Imm == FloatToBits(4.0f))
O << "4.0";
else if (Imm == FloatToBits(-4.0f))
O << "-4.0";
else if (Imm == 0x3e22f983 &&
STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
O << "0.15915494";
else
O << formatHex(static_cast<uint64_t>(Imm));
}
void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int64_t SImm = static_cast<int64_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
return;
}
if (Imm == DoubleToBits(0.0))
O << "0.0";
else if (Imm == DoubleToBits(1.0))
O << "1.0";
else if (Imm == DoubleToBits(-1.0))
O << "-1.0";
else if (Imm == DoubleToBits(0.5))
O << "0.5";
else if (Imm == DoubleToBits(-0.5))
O << "-0.5";
else if (Imm == DoubleToBits(2.0))
O << "2.0";
else if (Imm == DoubleToBits(-2.0))
O << "-2.0";
else if (Imm == DoubleToBits(4.0))
O << "4.0";
else if (Imm == DoubleToBits(-4.0))
O << "-4.0";
else if (Imm == 0x3fc45f306dc9c882 &&
STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
O << "0.15915494309189532";
else {
AMDGPU] Assembler: better support for immediate literals in assembler. Summary: Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals. E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least. With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction). Here are rules how we convert literals: - We parsed fp literal: - Instruction expects 64-bit operand: - If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5) - then we do nothing this literal - Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5) - report error - Else literal is not-inlinable but we can encode it as additional 32-bit literal constant - If instruction expect fp operand type (f64) - Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5) - If so then do nothing - Else (e.g. v_fract_f64 v[0:1], 3.1415) - report warning that low 32 bits will be set to zeroes and precision will be lost - set low 32 bits of literal to zeroes - Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5) - report error as it is unclear how to encode this literal - Instruction expects 32-bit operand: - Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow - Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5) - do nothing - Else report error - Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0) - Parsed binary literal: - Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35) - do nothing - Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35) - report error - Else, literal is not-inlinable and we are not required to inline it - Are high 32 bit of literal zeroes or same as sign bit (32 bit) - do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef) - Else - report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0) For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types: ''' enum OperandType { OPERAND_REG_IMM32_INT, OPERAND_REG_IMM32_FP, OPERAND_REG_INLINE_C_INT, OPERAND_REG_INLINE_C_FP, } ''' This is not working yet: - Several tests are failing - Problems with predicate methods for inline immediates - LLVM generated assembler parts try to select e64 encoding before e32. More changes are required for several AsmOperands. Reviewers: vpykhtin, tstellarAMD Subscribers: arsenm, kzhuravl, artem.tamazov Differential Revision: https://reviews.llvm.org/D22922 llvm-svn: 281050
2016-09-09 22:44:04 +08:00
assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
// In rare situations, we will have a 32-bit literal in a 64-bit
// operand. This is technically allowed for the encoding of s_mov_b64.
O << formatHex(static_cast<uint64_t>(Imm));
}
}
void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (!Imm)
return;
O << " blgp:" << Imm;
}
void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (!Imm)
return;
O << " cbsz:" << Imm;
}
void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (!Imm)
return;
O << " abid:" << Imm;
}
void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (OpNo > 0)
O << ", ";
printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
if (OpNo == 0)
O << ", ";
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
// Print default vcc/vcc_lo operand of VOPC.
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
(Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
printDefaultVccOperand(OpNo, STI, O);
if (OpNo >= MI->getNumOperands()) {
O << "/*Missing OP" << OpNo << "*/";
return;
}
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
printRegOperand(Op.getReg(), O, MRI);
} else if (Op.isImm()) {
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType;
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case MCOI::OPERAND_IMMEDIATE:
printImmediate32(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
printImmediate64(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_IMM_INT16:
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
printImmediateInt16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
printImmediate16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
if (!isUInt<16>(Op.getImm()) &&
STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
printImmediate32(Op.getImm(), STI, O);
break;
}
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
// Deal with 16-bit FP inline immediates not working.
if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
break;
}
LLVM_FALLTHROUGH;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
AMDGPU: Don't use 16-bit FP inline constants in integer operands It seems to be a hardware defect that the half inline constants do not work as expected for the 16-bit integer operations (the inverse does work correctly). Experimentation seems to show these are really reading the 32-bit inline constants, which can be observed by writing inline asm using op_sel to see what's in the high half of the constant. Theoretically we could fold the high halves of the 32-bit constants using op_sel. The *_asm_all.s MC tests are broken, and I don't know where the script to autogenerate these are. I started manually fixing it, but there's just too many cases to fix. This also does break the assembler/disassembler support for these values, and I'm not sure what to do about it. These are still valid encodings, so it seems like you should be able to use them in some way. If you wrote assembly using them, you could have really meant it (perhaps to read the high bits with op_sel?). The disassembler will print the invalid literal constant which will fail to re-assemble. The behavior is also different depending on the use context. Consider this example, which was previously accepted and encoded using the inline constant: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] In contexts where an inline immediate is required (such as on gfx8/9), this will now be rejected. For gfx10, this will produce the literal encoding and change the printed format: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] This is just another variation of the issue that we don't perfectly handle round trip assembly/disassembly due to not tracking how immediates were encoded. This doesn't matter much in practice, since compilers don't emit the suboptimal encoding. I doubt any users are relying on this behavior (although I did make use of the old behavior to figure out what was wrong). Fixes bug 46302.
2020-06-15 01:09:02 +08:00
printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
printImmediateV216(Op.getImm(), STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
case MCOI::OPERAND_PCREL:
O << formatDec(Op.getImm());
break;
case MCOI::OPERAND_REGISTER:
// FIXME: This should be removed and handled somewhere else. Seems to come
// from a disassembler bug.
O << "/*invalid immediate*/";
break;
default:
// We hit this for the immediate instruction bits that don't yet have a
// custom printer.
llvm_unreachable("unexpected immediate operand type");
}
} else if (Op.isDFPImm()) {
double Value = bit_cast<double>(Op.getDFPImm());
// We special case 0.0 because otherwise it will be printed as an integer.
if (Value == 0.0)
O << "0.0";
else {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
int RCID = Desc.OpInfo[OpNo].RegClass;
unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
if (RCBits == 32)
printImmediate32(FloatToBits(Value), STI, O);
else if (RCBits == 64)
printImmediate64(DoubleToBits(Value), STI, O);
else
llvm_unreachable("Invalid register class size");
}
} else if (Op.isExpr()) {
const MCExpr *Exp = Op.getExpr();
Exp->print(O, &MAI);
} else {
O << "/*INV_OP*/";
}
// Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
switch (MI->getOpcode()) {
default: break;
case AMDGPU::V_CNDMASK_B32_e32_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
case AMDGPU::V_CNDMASK_B32_e32_vi:
if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src1))
printDefaultVccOperand(OpNo, STI, O);
break;
}
if (Desc.TSFlags & SIInstrFlags::MTBUF) {
int SOffsetIdx =
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
assert(SOffsetIdx != -1);
if ((int)OpNo == SOffsetIdx)
printSymbolicFormat(MI, STI, O);
}
}
void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
// Use 'neg(...)' instead of '-' to avoid ambiguity.
// This is important for integer literals because
// -1 is not the same value as neg(1).
bool NegMnemo = false;
if (InputModifiers & SISrcMods::NEG) {
if (OpNo + 1 < MI->getNumOperands() &&
(InputModifiers & SISrcMods::ABS) == 0) {
const MCOperand &Op = MI->getOperand(OpNo + 1);
NegMnemo = Op.isImm() || Op.isDFPImm();
}
if (NegMnemo) {
O << "neg(";
} else {
O << '-';
}
}
if (InputModifiers & SISrcMods::ABS)
O << '|';
printOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::ABS)
O << '|';
if (NegMnemo) {
O << ')';
}
}
void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
if (InputModifiers & SISrcMods::SEXT)
O << "sext(";
printOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::SEXT)
O << ')';
// Print default vcc/vcc_lo operand of VOP2b.
switch (MI->getOpcode()) {
default: break;
case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src1))
printDefaultVccOperand(OpNo, STI, O);
break;
}
}
void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (!AMDGPU::isGFX10Plus(STI))
llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
unsigned Imm = MI->getOperand(OpNo).getImm();
O << "dpp8:[" << formatDec(Imm & 0x7);
for (size_t i = 1; i < 8; ++i) {
O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
}
O << ']';
}
void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace AMDGPU::DPP;
unsigned Imm = MI->getOperand(OpNo).getImm();
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
if (Src0Idx >= 0 &&
Desc.OpInfo[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID &&
!AMDGPU::isLegal64BitDPPControl(Imm)) {
O << " /* 64 bit dpp only supports row_newbcast */";
return;
} else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
O << "quad_perm:[";
O << formatDec(Imm & 0x3) << ',';
O << formatDec((Imm & 0xc) >> 2) << ',';
O << formatDec((Imm & 0x30) >> 4) << ',';
O << formatDec((Imm & 0xc0) >> 6) << ']';
} else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
(Imm <= DppCtrl::ROW_SHL_LAST)) {
O << "row_shl:";
printU4ImmDecOperand(MI, OpNo, O);
} else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
(Imm <= DppCtrl::ROW_SHR_LAST)) {
O << "row_shr:";
printU4ImmDecOperand(MI, OpNo, O);
} else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
(Imm <= DppCtrl::ROW_ROR_LAST)) {
O << "row_ror:";
printU4ImmDecOperand(MI, OpNo, O);
} else if (Imm == DppCtrl::WAVE_SHL1) {
if (AMDGPU::isGFX10Plus(STI)) {
O << "/* wave_shl is not supported starting from GFX10 */";
return;
}
O << "wave_shl:1";
} else if (Imm == DppCtrl::WAVE_ROL1) {
if (AMDGPU::isGFX10Plus(STI)) {
O << "/* wave_rol is not supported starting from GFX10 */";
return;
}
O << "wave_rol:1";
} else if (Imm == DppCtrl::WAVE_SHR1) {
if (AMDGPU::isGFX10Plus(STI)) {
O << "/* wave_shr is not supported starting from GFX10 */";
return;
}
O << "wave_shr:1";
} else if (Imm == DppCtrl::WAVE_ROR1) {
if (AMDGPU::isGFX10Plus(STI)) {
O << "/* wave_ror is not supported starting from GFX10 */";
return;
}
O << "wave_ror:1";
} else if (Imm == DppCtrl::ROW_MIRROR) {
O << "row_mirror";
} else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
O << "row_half_mirror";
} else if (Imm == DppCtrl::BCAST15) {
if (AMDGPU::isGFX10Plus(STI)) {
O << "/* row_bcast is not supported starting from GFX10 */";
return;
}
O << "row_bcast:15";
} else if (Imm == DppCtrl::BCAST31) {
if (AMDGPU::isGFX10Plus(STI)) {
O << "/* row_bcast is not supported starting from GFX10 */";
return;
}
O << "row_bcast:31";
} else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
(Imm <= DppCtrl::ROW_SHARE_LAST)) {
if (AMDGPU::isGFX90A(STI)) {
O << "row_newbcast:";
} else if (AMDGPU::isGFX10Plus(STI)) {
O << "row_share:";
} else {
O << " /* row_newbcast/row_share is not supported on ASICs earlier "
"than GFX90A/GFX10 */";
return;
}
printU4ImmDecOperand(MI, OpNo, O);
} else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
(Imm <= DppCtrl::ROW_XMASK_LAST)) {
if (!AMDGPU::isGFX10Plus(STI)) {
O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
return;
}
O << "row_xmask:";
printU4ImmDecOperand(MI, OpNo, O);
} else {
O << "/* Invalid dpp_ctrl value */";
}
}
void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << " row_mask:";
printU4ImmOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << " bank_mask:";
printU4ImmOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm) {
O << " bound_ctrl:1";
}
}
void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::DPP;
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
O << " fi:1";
}
}
void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
using namespace llvm::AMDGPU::SDWA;
unsigned Imm = MI->getOperand(OpNo).getImm();
switch (Imm) {
case SdwaSel::BYTE_0: O << "BYTE_0"; break;
case SdwaSel::BYTE_1: O << "BYTE_1"; break;
case SdwaSel::BYTE_2: O << "BYTE_2"; break;
case SdwaSel::BYTE_3: O << "BYTE_3"; break;
case SdwaSel::WORD_0: O << "WORD_0"; break;
case SdwaSel::WORD_1: O << "WORD_1"; break;
case SdwaSel::DWORD: O << "DWORD"; break;
default: llvm_unreachable("Invalid SDWA data select operand");
}
}
void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "dst_sel:";
printSDWASel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "src0_sel:";
printSDWASel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "src1_sel:";
printSDWASel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::SDWA;
O << "dst_unused:";
unsigned Imm = MI->getOperand(OpNo).getImm();
switch (Imm) {
case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
default: llvm_unreachable("Invalid SDWA dest_unused operand");
}
}
void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O,
unsigned N) {
unsigned Opc = MI->getOpcode();
int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
unsigned En = MI->getOperand(EnIdx).getImm();
int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
// If compr is set, print as src0, src0, src1, src1
if (MI->getOperand(ComprIdx).getImm())
OpNo = OpNo - N + N / 2;
if (En & (1 << N))
printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
else
O << "off";
}
void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printExpSrcN(MI, OpNo, STI, O, 0);
}
void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printExpSrcN(MI, OpNo, STI, O, 1);
}
void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printExpSrcN(MI, OpNo, STI, O, 2);
}
void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printExpSrcN(MI, OpNo, STI, O, 3);
}
void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::Exp;
// This is really a 6 bit field.
unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
int Index;
StringRef TgtName;
if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
O << ' ' << TgtName;
if (Index >= 0)
O << Index;
} else {
O << " invalid_target_" << Id;
}
}
static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
bool IsPacked, bool HasDstSel) {
int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
for (int I = 0; I < NumOps; ++I) {
if (!!(Ops[I] & Mod) != DefaultValue)
return false;
}
if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
return false;
return true;
}
void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
StringRef Name,
unsigned Mod,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
int NumOps = 0;
int Ops[3];
for (int OpName : { AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers }) {
int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
if (Idx == -1)
break;
Ops[NumOps++] = MI->getOperand(Idx).getImm();
}
const bool HasDstSel =
NumOps > 0 &&
Mod == SISrcMods::OP_SEL_0 &&
MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
const bool IsPacked =
MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
return;
O << Name;
for (int I = 0; I < NumOps; ++I) {
if (I != 0)
O << ',';
O << !!(Ops[I] & Mod);
}
if (HasDstSel) {
O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
}
O << ']';
}
void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
if (FI || BC)
O << " op_sel:[" << FI << ',' << BC << ']';
return;
}
printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
}
void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
}
void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
}
void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
}
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
switch (Imm) {
case 0:
O << "p10";
break;
case 1:
O << "p20";
break;
case 2:
O << "p0";
break;
default:
O << "invalid_param_" << Imm;
}
}
void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Attr = MI->getOperand(OpNum).getImm();
O << "attr" << Attr;
}
void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Chan = MI->getOperand(OpNum).getImm();
O << '.' << "xyzw"[Chan & 0x3];
}
void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::VGPRIndexMode;
unsigned Val = MI->getOperand(OpNo).getImm();
if ((Val & ~ENABLE_MASK) != 0) {
O << formatHex(static_cast<uint64_t>(Val));
} else {
O << "gpr_idx(";
bool NeedComma = false;
for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
if (Val & (1 << ModeId)) {
if (NeedComma)
O << ',';
O << IdSymbolic[ModeId];
NeedComma = true;
}
}
O << ')';
}
}
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printOperand(MI, OpNo, STI, O);
O << ", ";
printOperand(MI, OpNo + 1, STI, O);
}
void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
raw_ostream &O, StringRef Asm,
StringRef Default) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isImm());
if (Op.getImm() == 1) {
O << Asm;
} else {
O << Default;
}
}
void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
raw_ostream &O, char Asm) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isImm());
if (Op.getImm() == 1)
O << Asm;
}
void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "high");
}
void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printNamedBit(MI, OpNo, O, "clamp");
}
void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int Imm = MI->getOperand(OpNo).getImm();
if (Imm == SIOutMods::MUL2)
O << " mul:2";
else if (Imm == SIOutMods::MUL4)
O << " mul:4";
else if (Imm == SIOutMods::DIV2)
O << " div:2";
}
void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::SendMsg;
const unsigned Imm16 = MI->getOperand(OpNo).getImm();
uint16_t MsgId;
uint16_t OpId;
uint16_t StreamId;
decodeMsg(Imm16, MsgId, OpId, StreamId);
if (isValidMsgId(MsgId, STI) &&
isValidMsgOp(MsgId, OpId, STI) &&
isValidMsgStream(MsgId, OpId, StreamId, STI)) {
O << "sendmsg(" << getMsgName(MsgId);
if (msgRequiresOp(MsgId)) {
O << ", " << getMsgOpName(MsgId, OpId);
if (msgSupportsStream(MsgId, OpId)) {
O << ", " << StreamId;
}
}
O << ')';
} else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
} else {
O << Imm16; // Unknown imm16 code.
}
}
static void printSwizzleBitmask(const uint16_t AndMask,
const uint16_t OrMask,
const uint16_t XorMask,
raw_ostream &O) {
using namespace llvm::AMDGPU::Swizzle;
uint16_t Probe0 = ((0 & AndMask) | OrMask) ^ XorMask;
uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
O << "\"";
for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
uint16_t p0 = Probe0 & Mask;
uint16_t p1 = Probe1 & Mask;
if (p0 == p1) {
if (p0 == 0) {
O << "0";
} else {
O << "1";
}
} else {
if (p0 == 0) {
O << "p";
} else {
O << "i";
}
}
}
O << "\"";
}
void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::Swizzle;
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == 0) {
return;
}
O << " offset:";
if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
for (unsigned I = 0; I < LANE_NUM; ++I) {
O << ",";
O << formatDec(Imm & LANE_MASK);
Imm >>= LANE_SHIFT;
}
O << ")";
} else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
uint16_t OrMask = (Imm >> BITMASK_OR_SHIFT) & BITMASK_MASK;
uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
if (AndMask == BITMASK_MAX &&
OrMask == 0 &&
countPopulation(XorMask) == 1) {
O << "swizzle(" << IdSymbolic[ID_SWAP];
O << ",";
O << formatDec(XorMask);
O << ")";
} else if (AndMask == BITMASK_MAX &&
OrMask == 0 && XorMask > 0 &&
isPowerOf2_64(XorMask + 1)) {
O << "swizzle(" << IdSymbolic[ID_REVERSE];
O << ",";
O << formatDec(XorMask + 1);
O << ")";
} else {
uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
if (GroupSize > 1 &&
isPowerOf2_64(GroupSize) &&
OrMask < GroupSize &&
XorMask == 0) {
O << "swizzle(" << IdSymbolic[ID_BROADCAST];
O << ",";
O << formatDec(GroupSize);
O << ",";
O << formatDec(OrMask);
O << ")";
} else {
O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
O << ",";
printSwizzleBitmask(AndMask, OrMask, XorMask, O);
O << ")";
}
}
} else {
printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
bool NeedSpace = false;
if (Vmcnt != getVmcntBitMask(ISA)) {
O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true;
}
if (Expcnt != getExpcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "expcnt(" << Expcnt << ')';
NeedSpace = true;
}
if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')';
}
}
void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Id;
unsigned Offset;
unsigned Width;
using namespace llvm::AMDGPU::Hwreg;
unsigned Val = MI->getOperand(OpNo).getImm();
decodeHwreg(Val, Id, Offset, Width);
StringRef HwRegName = getHwreg(Id, STI);
O << "hwreg(";
if (!HwRegName.empty()) {
O << HwRegName;
} else {
O << Id;
}
if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
O << ", " << Offset << ", " << Width;
}
O << ')';
}
void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == 0) {
return;
}
O << ' ' << formatDec(Imm);
}
#include "AMDGPUGenAsmWriter.inc"