llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

5839 lines
183 KiB
C++

//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <map>
#include <memory>
#include <string>
using namespace llvm;
using namespace llvm::AMDGPU;
using namespace llvm::amdhsa;
namespace {
class AMDGPUAsmParser;
enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
class AMDGPUOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
Immediate,
Register,
Expression
} Kind;
SMLoc StartLoc, EndLoc;
const AMDGPUAsmParser *AsmParser;
public:
AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
: MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
using Ptr = std::unique_ptr<AMDGPUOperand>;
struct Modifiers {
bool Abs = false;
bool Neg = false;
bool Sext = false;
bool hasFPModifiers() const { return Abs || Neg; }
bool hasIntModifiers() const { return Sext; }
bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
int64_t getFPModifiersOperand() const {
int64_t Operand = 0;
Operand |= Abs ? SISrcMods::ABS : 0u;
Operand |= Neg ? SISrcMods::NEG : 0u;
return Operand;
}
int64_t getIntModifiersOperand() const {
int64_t Operand = 0;
Operand |= Sext ? SISrcMods::SEXT : 0u;
return Operand;
}
int64_t getModifiersOperand() const {
assert(!(hasFPModifiers() && hasIntModifiers())
&& "fp and int modifiers should not be used simultaneously");
if (hasFPModifiers()) {
return getFPModifiersOperand();
} else if (hasIntModifiers()) {
return getIntModifiersOperand();
} else {
return 0;
}
}
friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
};
enum ImmTy {
ImmTyNone,
ImmTyGDS,
ImmTyLDS,
ImmTyOffen,
ImmTyIdxen,
ImmTyAddr64,
ImmTyOffset,
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
ImmTyGLC,
ImmTySLC,
ImmTyTFE,
ImmTyD16,
ImmTyClampSI,
ImmTyOModSI,
ImmTyDppCtrl,
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
ImmTySdwaDstSel,
ImmTySdwaSrc0Sel,
ImmTySdwaSrc1Sel,
ImmTySdwaDstUnused,
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
ImmTyR128A16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
ImmTyExpVM,
ImmTyFORMAT,
ImmTyHwreg,
ImmTyOff,
ImmTySendMsg,
ImmTyInterpSlot,
ImmTyInterpAttr,
ImmTyAttrChan,
ImmTyOpSel,
ImmTyOpSelHi,
ImmTyNegLo,
ImmTyNegHi,
ImmTySwizzle,
ImmTyGprIdxMode,
ImmTyEndpgm,
ImmTyHigh
};
struct TokOp {
const char *Data;
unsigned Length;
};
struct ImmOp {
int64_t Val;
ImmTy Type;
bool IsFPImm;
Modifiers Mods;
};
struct RegOp {
unsigned RegNo;
bool IsForcedVOP3;
Modifiers Mods;
};
union {
TokOp Tok;
ImmOp Imm;
RegOp Reg;
const MCExpr *Expr;
};
bool isToken() const override {
if (Kind == Token)
return true;
if (Kind != Expression || !Expr)
return false;
// When parsing operands, we can't always tell if something was meant to be
// a token, like 'gds', or an expression that references a global variable.
// In this case, we assume the string is an expression, and if we need to
// interpret is a token, then we treat the symbol name as the token.
return isa<MCSymbolRefExpr>(Expr);
}
bool isImm() const override {
return Kind == Immediate;
}
bool isInlinableImm(MVT type) const;
bool isLiteralImm(MVT type) const;
bool isRegKind() const {
return Kind == Register;
}
bool isReg() const override {
return isRegKind() && !hasModifiers();
}
bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
return isRegClass(RCID) || isInlinableImm(type);
}
bool isRegOrImmWithInt16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
bool isRegOrImmWithInt32InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
bool isRegOrImmWithInt64InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
bool isRegOrImmWithFP16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
bool isRegOrImmWithFP32InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
bool isRegOrImmWithFP64InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
bool isVReg() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
isRegClass(AMDGPU::VReg_64RegClassID) ||
isRegClass(AMDGPU::VReg_96RegClassID) ||
isRegClass(AMDGPU::VReg_128RegClassID) ||
isRegClass(AMDGPU::VReg_256RegClassID) ||
isRegClass(AMDGPU::VReg_512RegClassID);
}
bool isVReg32() const {
return isRegClass(AMDGPU::VGPR_32RegClassID);
}
bool isVReg32OrOff() const {
return isOff() || isVReg32();
}
bool isSDWAOperand(MVT type) const;
bool isSDWAFP16Operand() const;
bool isSDWAFP32Operand() const;
bool isSDWAInt16Operand() const;
bool isSDWAInt32Operand() const;
bool isImmTy(ImmTy ImmT) const {
return isImm() && Imm.Type == ImmT;
}
bool isImmModifier() const {
return isImm() && Imm.Type != ImmTyNone;
}
bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
bool isExpVM() const { return isImmTy(ImmTyExpVM); }
bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
bool isOffen() const { return isImmTy(ImmTyOffen); }
bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
bool isOpSel() const { return isImmTy(ImmTyOpSel); }
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isHigh() const { return isImmTy(ImmTyHigh); }
bool isMod() const {
return isClampSI() || isOModSI();
}
bool isRegOrImm() const {
return isReg() || isImm();
}
bool isRegClass(unsigned RCID) const;
bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
}
bool isSCSrcB16() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
}
bool isSCSrcV2B16() const {
return isSCSrcB16();
}
bool isSCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
}
bool isSCSrcB64() const {
return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
}
bool isSCSrcF16() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
}
bool isSCSrcV2F16() const {
return isSCSrcF16();
}
bool isSCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
}
bool isSCSrcF64() const {
return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
}
bool isSSrcB32() const {
return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
}
bool isSSrcB16() const {
return isSCSrcB16() || isLiteralImm(MVT::i16);
}
bool isSSrcV2B16() const {
llvm_unreachable("cannot happen");
return isSSrcB16();
}
bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
return isSCSrcB64() || isLiteralImm(MVT::i64);
}
bool isSSrcF32() const {
return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
}
bool isSSrcF64() const {
return isSCSrcB64() || isLiteralImm(MVT::f64);
}
bool isSSrcF16() const {
return isSCSrcB16() || isLiteralImm(MVT::f16);
}
bool isSSrcV2F16() const {
llvm_unreachable("cannot happen");
return isSSrcF16();
}
bool isSSrcOrLdsB32() const {
return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
isLiteralImm(MVT::i32) || isExpr();
}
bool isVCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
bool isVCSrcB64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
bool isVCSrcB16() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
bool isVCSrcV2B16() const {
return isVCSrcB16();
}
bool isVCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
bool isVCSrcF64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
bool isVCSrcF16() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
bool isVCSrcV2F16() const {
return isVCSrcF16();
}
bool isVSrcB32() const {
return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
}
bool isVSrcB64() const {
return isVCSrcF64() || isLiteralImm(MVT::i64);
}
bool isVSrcB16() const {
return isVCSrcF16() || isLiteralImm(MVT::i16);
}
bool isVSrcV2B16() const {
llvm_unreachable("cannot happen");
return isVSrcB16();
}
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
}
bool isVSrcF64() const {
return isVCSrcF64() || isLiteralImm(MVT::f64);
}
bool isVSrcF16() const {
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
bool isVSrcV2F16() const {
llvm_unreachable("cannot happen");
return isVSrcF16();
}
bool isKImmFP32() const {
return isLiteralImm(MVT::f32);
}
bool isKImmFP16() const {
return isLiteralImm(MVT::f16);
}
bool isMem() const override {
return false;
}
bool isExpr() const {
return Kind == Expression;
}
bool isSoppBrTarget() const {
return isExpr() || isImm();
}
bool isSWaitCnt() const;
bool isHwreg() const;
bool isSendMsg() const;
bool isSwizzle() const;
bool isSMRDOffset8() const;
bool isSMRDOffset20() const;
bool isSMRDLiteralOffset() const;
bool isDPPCtrl() const;
bool isGPRIdxMode() const;
bool isS16Imm() const;
bool isU16Imm() const;
bool isEndpgm() const;
StringRef getExpressionAsToken() const {
assert(isExpr());
const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
return S->getSymbol().getName();
}
StringRef getToken() const {
assert(isToken());
if (Kind == Expression)
return getExpressionAsToken();
return StringRef(Tok.Data, Tok.Length);
}
int64_t getImm() const {
assert(isImm());
return Imm.Val;
}
ImmTy getImmTy() const {
assert(isImm());
return Imm.Type;
}
unsigned getReg() const override {
return Reg.RegNo;
}
SMLoc getStartLoc() const override {
return StartLoc;
}
SMLoc getEndLoc() const override {
return EndLoc;
}
SMRange getLocRange() const {
return SMRange(StartLoc, EndLoc);
}
Modifiers getModifiers() const {
assert(isRegKind() || isImmTy(ImmTyNone));
return isRegKind() ? Reg.Mods : Imm.Mods;
}
void setModifiers(Modifiers Mods) {
assert(isRegKind() || isImmTy(ImmTyNone));
if (isRegKind())
Reg.Mods = Mods;
else
Imm.Mods = Mods;
}
bool hasModifiers() const {
return getModifiers().hasModifiers();
}
bool hasFPModifiers() const {
return getModifiers().hasFPModifiers();
}
bool hasIntModifiers() const {
return getModifiers().hasIntModifiers();
}
uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
template <unsigned Bitwidth>
void addKImmFPOperands(MCInst &Inst, unsigned N) const;
void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
addKImmFPOperands<16>(Inst, N);
}
void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
addKImmFPOperands<32>(Inst, N);
}
void addRegOperands(MCInst &Inst, unsigned N) const;
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
if (isRegKind())
addRegOperands(Inst, N);
else if (isExpr())
Inst.addOperand(MCOperand::createExpr(Expr));
else
addImmOperands(Inst, N);
}
void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
Modifiers Mods = getModifiers();
Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
if (isRegKind()) {
addRegOperands(Inst, N);
} else {
addImmOperands(Inst, N, false);
}
}
void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
assert(!hasIntModifiers());
addRegOrImmWithInputModsOperands(Inst, N);
}
void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
assert(!hasFPModifiers());
addRegOrImmWithInputModsOperands(Inst, N);
}
void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
Modifiers Mods = getModifiers();
Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
assert(isRegKind());
addRegOperands(Inst, N);
}
void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
assert(!hasIntModifiers());
addRegWithInputModsOperands(Inst, N);
}
void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
assert(!hasFPModifiers());
addRegWithInputModsOperands(Inst, N);
}
void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
if (isImm())
addImmOperands(Inst, N);
else {
assert(isExpr());
Inst.addOperand(MCOperand::createExpr(Expr));
}
}
static void printImmTy(raw_ostream& OS, ImmTy Type) {
switch (Type) {
case ImmTyNone: OS << "None"; break;
case ImmTyGDS: OS << "GDS"; break;
case ImmTyLDS: OS << "LDS"; break;
case ImmTyOffen: OS << "Offen"; break;
case ImmTyIdxen: OS << "Idxen"; break;
case ImmTyAddr64: OS << "Addr64"; break;
case ImmTyOffset: OS << "Offset"; break;
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
case ImmTyFORMAT: OS << "FORMAT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
case ImmTyDppRowMask: OS << "DppRowMask"; break;
case ImmTyDppBankMask: OS << "DppBankMask"; break;
case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
case ImmTyR128A16: OS << "R128A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
case ImmTyExpCompr: OS << "ExpCompr"; break;
case ImmTyExpVM: OS << "ExpVM"; break;
case ImmTyHwreg: OS << "Hwreg"; break;
case ImmTySendMsg: OS << "SendMsg"; break;
case ImmTyInterpSlot: OS << "InterpSlot"; break;
case ImmTyInterpAttr: OS << "InterpAttr"; break;
case ImmTyAttrChan: OS << "AttrChan"; break;
case ImmTyOpSel: OS << "OpSel"; break;
case ImmTyOpSelHi: OS << "OpSelHi"; break;
case ImmTyNegLo: OS << "NegLo"; break;
case ImmTyNegHi: OS << "NegHi"; break;
case ImmTySwizzle: OS << "Swizzle"; break;
case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
case ImmTyHigh: OS << "High"; break;
case ImmTyEndpgm:
OS << "Endpgm";
break;
}
}
void print(raw_ostream &OS) const override {
switch (Kind) {
case Register:
OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
break;
case Immediate:
OS << '<' << getImm();
if (getImmTy() != ImmTyNone) {
OS << " type: "; printImmTy(OS, getImmTy());
}
OS << " mods: " << Imm.Mods << '>';
break;
case Token:
OS << '\'' << getToken() << '\'';
break;
case Expression:
OS << "<expr " << *Expr << '>';
break;
}
}
static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
int64_t Val, SMLoc Loc,
ImmTy Type = ImmTyNone,
bool IsFPImm = false) {
auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
Op->Imm.Val = Val;
Op->Imm.IsFPImm = IsFPImm;
Op->Imm.Type = Type;
Op->Imm.Mods = Modifiers();
Op->StartLoc = Loc;
Op->EndLoc = Loc;
return Op;
}
static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
StringRef Str, SMLoc Loc,
bool HasExplicitEncodingSize = true) {
auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
Res->StartLoc = Loc;
Res->EndLoc = Loc;
return Res;
}
static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
unsigned RegNo, SMLoc S,
SMLoc E,
bool ForceVOP3) {
auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
Op->Reg.RegNo = RegNo;
Op->Reg.Mods = Modifiers();
Op->Reg.IsForcedVOP3 = ForceVOP3;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
const class MCExpr *Expr, SMLoc S) {
auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
Op->Expr = Expr;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
};
raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
return OS;
}
//===----------------------------------------------------------------------===//
// AsmParser
//===----------------------------------------------------------------------===//
// Holds info related to the current kernel, e.g. count of SGPRs used.
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
class KernelScopeInfo {
int SgprIndexUnusedMin = -1;
int VgprIndexUnusedMin = -1;
MCContext *Ctx = nullptr;
void usesSgprAt(int i) {
if (i >= SgprIndexUnusedMin) {
SgprIndexUnusedMin = ++i;
if (Ctx) {
MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
}
}
}
void usesVgprAt(int i) {
if (i >= VgprIndexUnusedMin) {
VgprIndexUnusedMin = ++i;
if (Ctx) {
MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
}
}
}
public:
KernelScopeInfo() = default;
void initialize(MCContext &Context) {
Ctx = &Context;
usesSgprAt(SgprIndexUnusedMin = -1);
usesVgprAt(VgprIndexUnusedMin = -1);
}
void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
switch (RegKind) {
case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
default: break;
}
}
};
class AMDGPUAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
// Number of extra operands parsed after the first optional operand.
// This may be necessary to skip hardcoded mandatory operands.
static const unsigned MAX_OPR_LOOKAHEAD = 8;
unsigned ForcedEncodingSize = 0;
bool ForcedDPP = false;
bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
/// @name Auto-generated Match Functions
/// {
#define GET_ASSEMBLER_HEADER
#include "AMDGPUGenAsmMatcher.inc"
/// }
private:
bool ParseAsAbsoluteExpression(uint32_t &Ret);
bool OutOfRangeError(SMRange Range);
/// Calculate VGPR/SGPR blocks required for given target, reserved
/// registers, and user-specified NextFreeXGPR values.
///
/// \param Features [in] Target features, used for bug corrections.
/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
/// \param SGPRRange [in] Token range, used for SGPR diagnostics.
/// \param VGPRBlocks [out] Result VGPR block count.
/// \param SGPRBlocks [out] Result SGPR block count.
bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed,
unsigned NextFreeVGPR, SMRange VGPRRange,
unsigned NextFreeSGPR, SMRange SGPRRange,
unsigned &VGPRBlocks, unsigned &SGPRBlocks);
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSAKernel();
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
bool ParseDirectiveAMDGPUHsaKernel();
bool ParseDirectiveISAVersion();
bool ParseDirectiveHSAMetadata();
bool ParseDirectivePALMetadata();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1,
unsigned RegNum);
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
unsigned& RegNum, unsigned& RegWidth,
unsigned *DwordRegIndex);
Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
void initializeGprCountSymbol(RegisterKind RegKind);
bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
bool IsGdsHardcoded);
public:
enum AMDGPUMatchResultTy {
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
MCAsmParserExtension::Initialize(Parser);
if (getFeatureBits().none()) {
// Set default features.
copySTI().ToggleFeature("SOUTHERN_ISLANDS");
}
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
{
// TODO: make those pre-defined variables read-only.
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
} else {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
KernelScope.initialize(getContext());
}
}
bool hasXNACK() const {
return AMDGPU::hasXNACK(getSTI());
}
bool hasMIMG_R128() const {
return AMDGPU::hasMIMG_R128(getSTI());
}
bool hasPackedD16() const {
return AMDGPU::hasPackedD16(getSTI());
}
bool isSI() const {
return AMDGPU::isSI(getSTI());
}
bool isCI() const {
return AMDGPU::isCI(getSTI());
}
bool isVI() const {
return AMDGPU::isVI(getSTI());
}
bool isGFX9() const {
return AMDGPU::isGFX9(getSTI());
}
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
bool hasFlatOffsets() const {
return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
}
bool hasSGPR102_SGPR103() const {
return !isVI();
}
bool hasIntClamp() const {
return getFeatureBits()[AMDGPU::FeatureIntClamp];
}
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
}
const MCRegisterInfo *getMRI() const {
// We need this const_cast because for some reason getContext() is not const
// in MCAsmParser.
return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
}
const MCInstrInfo *getMII() const {
return &MII;
}
const FeatureBitset &getFeatureBits() const {
return getSTI().getFeatureBits();
}
void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
bool isForcedDPP() const { return ForcedDPP; }
bool isForcedSDWA() const { return ForcedSDWA; }
ArrayRef<unsigned> getMatchedVariants() const;
std::unique_ptr<AMDGPUOperand> parseRegister();
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseDirective(AsmToken DirectiveID) override;
OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
StringRef parseMnemonicSuffix(StringRef Name);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
//bool ProcessInstruction(MCInst &Inst);
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
OperandMatchResultTy
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t &) = nullptr);
OperandMatchResultTy parseOperandArrayWithPrefix(
const char *Prefix,
OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t&) = nullptr);
OperandMatchResultTy
parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value);
bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
OperandMatchResultTy parseReg(OperandVector &Operands);
OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
void cvtExp(MCInst &Inst, const OperandVector &Operands);
bool parseCnt(int64_t &IntVal);
OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
OperandMatchResultTy parseHwreg(OperandVector &Operands);
private:
struct OperandInfoTy {
int64_t Id;
bool IsSymbolic = false;
OperandInfoTy(int64_t Id_) : Id(Id_) {}
};
bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
bool validateSOPLiteral(const MCInst &Inst) const;
bool validateConstantBusLimitations(const MCInst &Inst);
bool validateEarlyClobberLimitations(const MCInst &Inst);
bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool validateLdsDirect(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
bool trySkipId(const StringRef Id);
bool trySkipToken(const AsmToken::TokenKind Kind);
bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
bool parseExpr(int64_t &Imm);
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
OperandMatchResultTy parseExpTgt(OperandVector &Operands);
OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
const unsigned MaxVal,
const StringRef ErrMsg);
OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
bool parseSwizzleOffset(int64_t &Imm);
bool parseSwizzleMacro(int64_t &Imm);
bool parseSwizzleQuadPerm(int64_t &Imm);
bool parseSwizzleBitmaskPerm(int64_t &Imm);
bool parseSwizzleBroadcast(int64_t &Imm);
bool parseSwizzleSwap(int64_t &Imm);
bool parseSwizzleReverse(int64_t &Imm);
OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
int64_t parseGPRIdxMacro();
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
AMDGPUOperand::Ptr defaultOffsetU12() const;
AMDGPUOperand::Ptr defaultOffsetS13() const;
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
AMDGPUOperand::Ptr defaultRowMask() const;
AMDGPUOperand::Ptr defaultBankMask() const;
AMDGPUOperand::Ptr defaultBoundCtrl() const;
void cvtDPP(MCInst &Inst, const OperandVector &Operands);
OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type);
OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType, bool skipVcc = false);
OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
};
struct OptionalOperand {
const char *Name;
AMDGPUOperand::ImmTy Type;
bool IsBit;
bool (*ConvertResult)(int64_t&);
};
} // end anonymous namespace
// May be called with integer type with equivalent bitwidth.
static const fltSemantics *getFltSemantics(unsigned Size) {
switch (Size) {
case 4:
return &APFloat::IEEEsingle();
case 8:
return &APFloat::IEEEdouble();
case 2:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
}
}
static const fltSemantics *getFltSemantics(MVT VT) {
return getFltSemantics(VT.getSizeInBits() / 8);
}
static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
}
}
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
bool Lost;
// Convert literal to single precision
APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
APFloat::rmNearestTiesToEven,
&Lost);
// We allow precision lost but not overflow or underflow
if (Status != APFloat::opOK &&
Lost &&
((Status & APFloat::opOverflow) != 0 ||
(Status & APFloat::opUnderflow) != 0)) {
return false;
}
return true;
}
bool AMDGPUOperand::isInlinableImm(MVT type) const {
if (!isImmTy(ImmTyNone)) {
// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
return false;
}
// TODO: We should avoid using host float here. It would be better to
// check the float bit values which is what a few other places do.
// We've had bot failures before due to weird NaN support on mips hosts.
APInt Literal(64, Imm.Val);
if (Imm.IsFPImm) { // We got fp literal token
if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
return AMDGPU::isInlinableLiteral64(Imm.Val,
AsmParser->hasInv2PiInlineImm());
}
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
if (!canLosslesslyConvertToFPType(FPLiteral, type))
return false;
if (type.getScalarSizeInBits() == 16) {
return AMDGPU::isInlinableLiteral16(
static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
// Check if single precision literal is inlinable
return AMDGPU::isInlinableLiteral32(
static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
// We got int literal token.
if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
return AMDGPU::isInlinableLiteral64(Imm.Val,
AsmParser->hasInv2PiInlineImm());
}
if (type.getScalarSizeInBits() == 16) {
return AMDGPU::isInlinableLiteral16(
static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
AsmParser->hasInv2PiInlineImm());
}
return AMDGPU::isInlinableLiteral32(
static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
bool AMDGPUOperand::isLiteralImm(MVT type) const {
// Check that this immediate can be added as literal
if (!isImmTy(ImmTyNone)) {
return false;
}
if (!Imm.IsFPImm) {
// We got int literal token.
if (type == MVT::f64 && hasFPModifiers()) {
// Cannot apply fp modifiers to int literals preserving the same semantics
// for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
// disable these cases.
return false;
}
unsigned Size = type.getSizeInBits();
if (Size == 64)
Size = 32;
// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
// types.
return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
}
// We got fp literal token
if (type == MVT::f64) { // Expected 64-bit fp operand
// We would set low 64-bits of literal to zeroes but we accept this literals
return true;
}
if (type == MVT::i64) { // Expected 64-bit int operand
// We don't allow fp literals in 64-bit integer instructions. It is
// unclear how we should encode them.
return false;
}
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
return canLosslesslyConvertToFPType(FPLiteral, type);
}
bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
}
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
else if (AsmParser->isGFX9())
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
else
return false;
}
bool AMDGPUOperand::isSDWAFP16Operand() const {
return isSDWAOperand(MVT::f16);
}
bool AMDGPUOperand::isSDWAFP32Operand() const {
return isSDWAOperand(MVT::f32);
}
bool AMDGPUOperand::isSDWAInt16Operand() const {
return isSDWAOperand(MVT::i16);
}
bool AMDGPUOperand::isSDWAInt32Operand() const {
return isSDWAOperand(MVT::i32);
}
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
{
assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
assert(Size == 2 || Size == 4 || Size == 8);
const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
if (Imm.Mods.Abs) {
Val &= ~FpSignMask;
}
if (Imm.Mods.Neg) {
Val ^= FpSignMask;
}
return Val;
}
void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
Inst.getNumOperands())) {
addLiteralImmOperand(Inst, Imm.Val,
ApplyModifiers &
isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
} else {
assert(!isImmTy(ImmTyNone) || !hasModifiers());
Inst.addOperand(MCOperand::createImm(Imm.Val));
}
}
void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
auto OpNum = Inst.getNumOperands();
// Check that this operand accepts literals
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
if (ApplyModifiers) {
assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
Val = applyInputFPModifiers(Val, Size);
}
APInt Literal(64, Val);
uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
if (Imm.IsFPImm) { // We got fp literal token
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
return;
}
// Non-inlineable
if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
// For fp operands we check if low 32 bits are zeros
if (Literal.getLoBits(32) != 0) {
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
"Can't encode literal as exact 64-bit floating-point operand. "
"Low 32-bits will be set to zero");
}
Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
return;
}
// We don't allow fp literals in 64-bit integer instructions. It is
// unclear how we should encode them. This case should be checked earlier
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
FPLiteral.convert(*getOpFltSemantics(OpTy),
APFloat::rmNearestTiesToEven, &lost);
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
ImmVal |= (ImmVal << 16);
}
Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
llvm_unreachable("invalid operand size");
}
return;
}
// We got int literal token.
// Only sign extend inline immediates.
// FIXME: No errors on truncation
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
if (isInt<32>(Val) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
return;
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
return;
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
if (isInt<16>(Val) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
return;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
assert(AMDGPU::isInlinableLiteral16(LiteralVal,
AsmParser->hasInv2PiInlineImm()));
uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
static_cast<uint32_t>(LiteralVal);
Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
llvm_unreachable("invalid operand size");
}
}
template <unsigned Bitwidth>
void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
APInt Literal(64, Imm.Val);
if (!Imm.IsFPImm) {
// We got int literal token.
Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
return;
}
bool Lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
APFloat::rmNearestTiesToEven, &Lost);
Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
}
void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
}
//===----------------------------------------------------------------------===//
// AsmParser
//===----------------------------------------------------------------------===//
static int getRegClass(RegisterKind Is, unsigned RegWidth) {
if (Is == IS_VGPR) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::VGPR_32RegClassID;
case 2: return AMDGPU::VReg_64RegClassID;
case 3: return AMDGPU::VReg_96RegClassID;
case 4: return AMDGPU::VReg_128RegClassID;
case 8: return AMDGPU::VReg_256RegClassID;
case 16: return AMDGPU::VReg_512RegClassID;
}
} else if (Is == IS_TTMP) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::TTMP_32RegClassID;
case 2: return AMDGPU::TTMP_64RegClassID;
case 4: return AMDGPU::TTMP_128RegClassID;
case 8: return AMDGPU::TTMP_256RegClassID;
case 16: return AMDGPU::TTMP_512RegClassID;
}
} else if (Is == IS_SGPR) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SGPR_128RegClassID;
case 8: return AMDGPU::SGPR_256RegClassID;
case 16: return AMDGPU::SGPR_512RegClassID;
}
}
return -1;
}
static unsigned getSpecialRegForName(StringRef RegName) {
return StringSwitch<unsigned>(RegName)
.Case("exec", AMDGPU::EXEC)
.Case("vcc", AMDGPU::VCC)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("xnack_mask", AMDGPU::XNACK_MASK)
.Case("lds_direct", AMDGPU::LDS_DIRECT)
.Case("src_lds_direct", AMDGPU::LDS_DIRECT)
.Case("m0", AMDGPU::M0)
.Case("scc", AMDGPU::SCC)
.Case("tba", AMDGPU::TBA)
.Case("tma", AMDGPU::TMA)
.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
.Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
.Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
.Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
.Case("vcc_lo", AMDGPU::VCC_LO)
.Case("vcc_hi", AMDGPU::VCC_HI)
.Case("exec_lo", AMDGPU::EXEC_LO)
.Case("exec_hi", AMDGPU::EXEC_HI)
.Case("tma_lo", AMDGPU::TMA_LO)
.Case("tma_hi", AMDGPU::TMA_HI)
.Case("tba_lo", AMDGPU::TBA_LO)
.Case("tba_hi", AMDGPU::TBA_HI)
.Default(0);
}
bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
auto R = parseRegister();
if (!R) return true;
assert(R->isReg());
RegNo = R->getReg();
StartLoc = R->getStartLoc();
EndLoc = R->getEndLoc();
return false;
}
bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
RegisterKind RegKind, unsigned Reg1,
unsigned RegNum) {
switch (RegKind) {
case IS_SPECIAL:
if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
Reg = AMDGPU::EXEC;
RegWidth = 2;
return true;
}
if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
Reg = AMDGPU::FLAT_SCR;
RegWidth = 2;
return true;
}
if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
Reg = AMDGPU::XNACK_MASK;
RegWidth = 2;
return true;
}
if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
Reg = AMDGPU::VCC;
RegWidth = 2;
return true;
}
if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
Reg = AMDGPU::TBA;
RegWidth = 2;
return true;
}
if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
Reg = AMDGPU::TMA;
RegWidth = 2;
return true;
}
return false;
case IS_VGPR:
case IS_SGPR:
case IS_TTMP:
if (Reg1 != Reg + RegWidth) {
return false;
}
RegWidth++;
return true;
default:
llvm_unreachable("unexpected register kind");
}
}
bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
unsigned &RegNum, unsigned &RegWidth,
unsigned *DwordRegIndex) {
if (DwordRegIndex) { *DwordRegIndex = 0; }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (getLexer().is(AsmToken::Identifier)) {
StringRef RegName = Parser.getTok().getString();
if ((Reg = getSpecialRegForName(RegName))) {
Parser.Lex();
RegKind = IS_SPECIAL;
} else {
unsigned RegNumIndex = 0;
if (RegName[0] == 'v') {
RegNumIndex = 1;
RegKind = IS_VGPR;
} else if (RegName[0] == 's') {
RegNumIndex = 1;
RegKind = IS_SGPR;
} else if (RegName.startswith("ttmp")) {
RegNumIndex = strlen("ttmp");
RegKind = IS_TTMP;
} else {
return false;
}
if (RegName.size() > RegNumIndex) {
// Single 32-bit register: vXX.
if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
return false;
Parser.Lex();
RegWidth = 1;
} else {
// Range of registers: v[XX:YY]. ":YY" is optional.
Parser.Lex();
int64_t RegLo, RegHi;
if (getLexer().isNot(AsmToken::LBrac))
return false;
Parser.Lex();
if (getParser().parseAbsoluteExpression(RegLo))
return false;
const bool isRBrace = getLexer().is(AsmToken::RBrac);
if (!isRBrace && getLexer().isNot(AsmToken::Colon))
return false;
Parser.Lex();
if (isRBrace) {
RegHi = RegLo;
} else {
if (getParser().parseAbsoluteExpression(RegHi))
return false;
if (getLexer().isNot(AsmToken::RBrac))
return false;
Parser.Lex();
}
RegNum = (unsigned) RegLo;
RegWidth = (RegHi - RegLo) + 1;
}
}
} else if (getLexer().is(AsmToken::LBrac)) {
// List of consecutive registers: [s0,s1,s2,s3]
Parser.Lex();
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
return false;
if (RegWidth != 1)
return false;
RegisterKind RegKind1;
unsigned Reg1, RegNum1, RegWidth1;
do {
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex();
} else if (getLexer().is(AsmToken::RBrac)) {
Parser.Lex();
break;
} else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
if (RegWidth1 != 1) {
return false;
}
if (RegKind1 != RegKind) {
return false;
}
if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
return false;
}
} else {
return false;
}
} while (true);
} else {
return false;
}
switch (RegKind) {
case IS_SPECIAL:
RegNum = 0;
RegWidth = 1;
break;
case IS_VGPR:
case IS_SGPR:
case IS_TTMP:
{
unsigned Size = 1;
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
// SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
Size = std::min(RegWidth, 4u);
}
if (RegNum % Size != 0)
return false;
if (DwordRegIndex) { *DwordRegIndex = RegNum; }
RegNum = RegNum / Size;
int RCID = getRegClass(RegKind, RegWidth);
if (RCID == -1)
return false;
const MCRegisterClass RC = TRI->getRegClass(RCID);
if (RegNum >= RC.getNumRegs())
return false;
Reg = RC.getRegister(RegNum);
break;
}
default:
llvm_unreachable("unexpected register kind");
}
if (!subtargetHasRegister(*TRI, Reg))
return false;
return true;
}
Optional<StringRef>
AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
switch (RegKind) {
case IS_VGPR:
return StringRef(".amdgcn.next_free_vgpr");
case IS_SGPR:
return StringRef(".amdgcn.next_free_sgpr");
default:
return None;
}
}
void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
auto SymbolName = getGprCountSymbolName(RegKind);
assert(SymbolName && "initializing invalid register kind");
MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
}
bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
unsigned RegWidth) {
// Symbols are only defined for GCN targets
if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
return true;
auto SymbolName = getGprCountSymbolName(RegKind);
if (!SymbolName)
return true;
MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
int64_t NewMax = DwordRegIndex + RegWidth - 1;
int64_t OldCount;
if (!Sym->isVariable())
return !Error(getParser().getTok().getLoc(),
".amdgcn.next_free_{v,s}gpr symbols must be variable");
if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
return !Error(
getParser().getTok().getLoc(),
".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
if (OldCount <= NewMax)
Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
return true;
}
std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
const auto &Tok = Parser.getTok();
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
RegisterKind RegKind;
unsigned Reg, RegNum, RegWidth, DwordRegIndex;
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
return nullptr;
}
if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
return nullptr;
} else
KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
bool
AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
(getLexer().getKind() == AsmToken::Integer ||
getLexer().getKind() == AsmToken::Real)) {
// This is a workaround for handling operands like these:
// |1.0|
// |-1|
// This syntax is not compatible with syntax of standard
// MC expressions (due to the trailing '|').
SMLoc EndLoc;
const MCExpr *Expr;
if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
return true;
}
return !Expr->evaluateAsAbsolute(Val);
}
return getParser().parseAbsoluteExpression(Val);
}
OperandMatchResultTy
AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
// TODO: add syntactic sugar for 1/(2*PI)
bool Minus = false;
if (getLexer().getKind() == AsmToken::Minus) {
const AsmToken NextToken = getLexer().peekTok();
if (!NextToken.is(AsmToken::Integer) &&
!NextToken.is(AsmToken::Real)) {
return MatchOperand_NoMatch;
}
Minus = true;
Parser.Lex();
}
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
case AsmToken::Integer: {
int64_t IntVal;
if (parseAbsoluteExpr(IntVal, AbsMod))
return MatchOperand_ParseFail;
if (Minus)
IntVal *= -1;
Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
return MatchOperand_Success;
}
case AsmToken::Real: {
int64_t IntVal;
if (parseAbsoluteExpr(IntVal, AbsMod))
return MatchOperand_ParseFail;
APFloat F(BitsToDouble(IntVal));
if (Minus)
F.changeSign();
Operands.push_back(
AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
AMDGPUOperand::ImmTyNone, true));
return MatchOperand_Success;
}
default:
return MatchOperand_NoMatch;
}
}
OperandMatchResultTy
AMDGPUAsmParser::parseReg(OperandVector &Operands) {
if (auto R = parseRegister()) {
assert(R->isReg());
R->Reg.IsForcedVOP3 = isForcedVOP3();
Operands.push_back(std::move(R));
return MatchOperand_Success;
}
return MatchOperand_NoMatch;
}
OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
auto res = parseImm(Operands, AbsMod);
if (res != MatchOperand_NoMatch) {
return res;
}
return parseReg(Operands);
}
OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm) {
bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
if (getLexer().getKind()== AsmToken::Minus) {
const AsmToken NextToken = getLexer().peekTok();
// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
if (NextToken.is(AsmToken::Minus)) {
Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
return MatchOperand_ParseFail;
}
// '-' followed by an integer literal N should be interpreted as integer
// negation rather than a floating-point NEG modifier applied to N.
// Beside being contr-intuitive, such use of floating-point NEG modifier
// results in different meaning of integer literals used with VOP1/2/C
// and VOP3, for example:
// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
// Negative fp literals should be handled likewise for unifomtity
if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
Parser.Lex();
Negate = true;
}
}
if (getLexer().getKind() == AsmToken::Identifier &&
Parser.getTok().getString() == "neg") {
if (Negate) {
Error(Parser.getTok().getLoc(), "expected register or immediate");
return MatchOperand_ParseFail;
}
Parser.Lex();
Negate2 = true;
if (getLexer().isNot(AsmToken::LParen)) {
Error(Parser.getTok().getLoc(), "expected left paren after neg");
return MatchOperand_ParseFail;
}
Parser.Lex();
}
if (getLexer().getKind() == AsmToken::Identifier &&
Parser.getTok().getString() == "abs") {
Parser.Lex();
Abs2 = true;
if (getLexer().isNot(AsmToken::LParen)) {
Error(Parser.getTok().getLoc(), "expected left paren after abs");
return MatchOperand_ParseFail;
}
Parser.Lex();
}
if (getLexer().getKind() == AsmToken::Pipe) {
if (Abs2) {
Error(Parser.getTok().getLoc(), "expected register or immediate");
return MatchOperand_ParseFail;
}
Parser.Lex();
Abs = true;
}
OperandMatchResultTy Res;
if (AllowImm) {
Res = parseRegOrImm(Operands, Abs);
} else {
Res = parseReg(Operands);
}
if (Res != MatchOperand_Success) {
return Res;
}
AMDGPUOperand::Modifiers Mods;
if (Abs) {
if (getLexer().getKind() != AsmToken::Pipe) {
Error(Parser.getTok().getLoc(), "expected vertical bar");
return MatchOperand_ParseFail;
}
Parser.Lex();
Mods.Abs = true;
}
if (Abs2) {
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "expected closing parentheses");
return MatchOperand_ParseFail;
}
Parser.Lex();
Mods.Abs = true;
}
if (Negate) {
Mods.Neg = true;
} else if (Negate2) {
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "expected closing parentheses");
return MatchOperand_ParseFail;
}
Parser.Lex();
Mods.Neg = true;
}
if (Mods.hasFPModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
Op.setModifiers(Mods);
}
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
bool AllowImm) {
bool Sext = false;
if (getLexer().getKind() == AsmToken::Identifier &&
Parser.getTok().getString() == "sext") {
Parser.Lex();
Sext = true;
if (getLexer().isNot(AsmToken::LParen)) {
Error(Parser.getTok().getLoc(), "expected left paren after sext");
return MatchOperand_ParseFail;
}
Parser.Lex();
}
OperandMatchResultTy Res;
if (AllowImm) {
Res = parseRegOrImm(Operands);
} else {
Res = parseReg(Operands);
}
if (Res != MatchOperand_Success) {
return Res;
}
AMDGPUOperand::Modifiers Mods;
if (Sext) {
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "expected closing parentheses");
return MatchOperand_ParseFail;
}
Parser.Lex();
Mods.Sext = true;
}
if (Mods.hasIntModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
Op.setModifiers(Mods);
}
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
return parseRegOrImmWithFPInputMods(Operands, false);
}
OperandMatchResultTy
AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
return parseRegOrImmWithIntInputMods(Operands, false);
}
OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
if (Reg) {
Operands.push_back(std::move(Reg));
return MatchOperand_Success;
}
const AsmToken &Tok = Parser.getTok();
if (Tok.getString() == "off") {
Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
AMDGPUOperand::ImmTyOff, false));
Parser.Lex();
return MatchOperand_Success;
}
return MatchOperand_NoMatch;
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
(getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
(isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
(isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
return Match_InvalidOperand;
if ((TSFlags & SIInstrFlags::VOP3) &&
(TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
getForcedEncodingSize() != 64)
return Match_PreferE32;
if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
// v_mac_f32/16 allow only dst_sel == DWORD;
auto OpNum =
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
const auto &Op = Inst.getOperand(OpNum);
if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
return Match_InvalidOperand;
}
}
if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
// FIXME: Produces error without correct column reported.
auto OpNum =
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
const auto &Op = Inst.getOperand(OpNum);
if (Op.getImm() != 0)
return Match_InvalidOperand;
}
return Match_Success;
}
// What asm variants we should check
ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
if (getForcedEncodingSize() == 32) {
static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
return makeArrayRef(Variants);
}
if (isForcedVOP3()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
return makeArrayRef(Variants);
}
if (isForcedSDWA()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
AMDGPUAsmVariants::SDWA9};
return makeArrayRef(Variants);
}
if (isForcedDPP()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
return makeArrayRef(Variants);
}
static const unsigned Variants[] = {
AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
};
return makeArrayRef(Variants);
}
unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
const unsigned Num = Desc.getNumImplicitUses();
for (unsigned i = 0; i < Num; ++i) {
unsigned Reg = Desc.ImplicitUses[i];
switch (Reg) {
case AMDGPU::FLAT_SCR:
case AMDGPU::VCC:
case AMDGPU::M0:
return Reg;
default:
break;
}
}
return AMDGPU::NoRegister;
}
// NB: This code is correct only when used to check constant
// bus limitations because GFX7 support no f16 inline constants.
// Note that there are no cases when a GFX7 opcode violates
// constant bus limitations due to the use of an f16 constant.
bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
unsigned OpIdx) const {
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
return false;
}
const MCOperand &MO = Inst.getOperand(OpIdx);
int64_t Val = MO.getImm();
auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
switch (OpSize) { // expected operand size
case 8:
return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
case 4:
return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
case 2: {
const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
} else {
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
}
}
default:
llvm_unreachable("invalid operand size");
}
}
bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
const MCOperand &MO = Inst.getOperand(OpIdx);
if (MO.isImm()) {
return !isInlineConstant(Inst, OpIdx);
}
return !MO.isReg() ||
isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
}
bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned ConstantBusUseCount = 0;
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
SIInstrFlags::SDWA)) {
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
++ConstantBusUseCount;
}
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
if (SGPRUsed != AMDGPU::NoRegister) {
++ConstantBusUseCount;
}
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
for (int OpIdx : OpIndices) {
if (OpIdx == -1) break;
const MCOperand &MO = Inst.getOperand(OpIdx);
if (usesConstantBus(Inst, OpIdx)) {
if (MO.isReg()) {
const unsigned Reg = mc2PseudoReg(MO.getReg());
// Pairs of registers with a partial intersections like these
// s0, s[0:1]
// flat_scratch_lo, flat_scratch
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
if (Reg != SGPRUsed) {
++ConstantBusUseCount;
}
SGPRUsed = Reg;
} else { // Expression or a literal
++ConstantBusUseCount;
}
}
}
}
return ConstantBusUseCount <= 1;
}
bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
if (DstIdx == -1 ||
Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
return true;
}
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
assert(DstIdx != -1);
const MCOperand &Dst = Inst.getOperand(DstIdx);
assert(Dst.isReg());
const unsigned DstReg = mc2PseudoReg(Dst.getReg());
const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
for (int SrcIdx : SrcIndices) {
if (SrcIdx == -1) break;
const MCOperand &Src = Inst.getOperand(SrcIdx);
if (Src.isReg()) {
const unsigned SrcReg = mc2PseudoReg(Src.getReg());
if (isRegIntersect(DstReg, SrcReg, TRI)) {
return false;
}
}
}
return true;
}
bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
assert(ClampIdx != -1);
return Inst.getOperand(ClampIdx).getImm() == 0;
}
return true;
}
bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;
int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
assert(VDataIdx != -1);
assert(DMaskIdx != -1);
assert(TFEIdx != -1);
unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
if (DMask == 0)
DMask = 1;
unsigned DataSize =
(Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
if (hasPackedD16()) {
int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
DataSize = (DataSize + 1) / 2;
}
return (VDataSize / 4) == DataSize + TFESize;
}
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;
if (!Desc.mayLoad() || !Desc.mayStore())
return true; // Not atomic
int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
// This is an incomplete check because image_atomic_cmpswap
// may only use 0x3 and 0xf while other atomic operations
// may use 0x1 and 0x3. However these limitations are
// verified when we check that dmask matches dst size.
return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
}
bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
return true;
int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
// GATHER4 instructions use dmask in a different fashion compared to
// other MIMG instructions. The only useful DMASK values are
// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
// (red,red,red,red) etc.) The ISA document doesn't mention
// this.
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;
int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
if (isCI() || isSI())
return false;
}
return true;
}
static bool IsRevOpcode(const unsigned Opcode)
{
switch (Opcode) {
case AMDGPU::V_SUBREV_F32_e32:
case AMDGPU::V_SUBREV_F32_e64:
case AMDGPU::V_SUBREV_F32_e32_si:
case AMDGPU::V_SUBREV_F32_e32_vi:
case AMDGPU::V_SUBREV_F32_e64_si:
case AMDGPU::V_SUBREV_F32_e64_vi:
case AMDGPU::V_SUBREV_I32_e32:
case AMDGPU::V_SUBREV_I32_e64:
case AMDGPU::V_SUBREV_I32_e32_si:
case AMDGPU::V_SUBREV_I32_e64_si:
case AMDGPU::V_SUBBREV_U32_e32:
case AMDGPU::V_SUBBREV_U32_e64:
case AMDGPU::V_SUBBREV_U32_e32_si:
case AMDGPU::V_SUBBREV_U32_e32_vi:
case AMDGPU::V_SUBBREV_U32_e64_si:
case AMDGPU::V_SUBBREV_U32_e64_vi:
case AMDGPU::V_SUBREV_U32_e32:
case AMDGPU::V_SUBREV_U32_e64:
case AMDGPU::V_SUBREV_U32_e32_gfx9:
case AMDGPU::V_SUBREV_U32_e32_vi:
case AMDGPU::V_SUBREV_U32_e64_gfx9:
case AMDGPU::V_SUBREV_U32_e64_vi:
case AMDGPU::V_SUBREV_F16_e32:
case AMDGPU::V_SUBREV_F16_e64:
case AMDGPU::V_SUBREV_F16_e32_vi:
case AMDGPU::V_SUBREV_F16_e64_vi:
case AMDGPU::V_SUBREV_U16_e32:
case AMDGPU::V_SUBREV_U16_e64:
case AMDGPU::V_SUBREV_U16_e32_vi:
case AMDGPU::V_SUBREV_U16_e64_vi:
case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
case AMDGPU::V_LSHLREV_B32_e32_si:
case AMDGPU::V_LSHLREV_B32_e64_si:
case AMDGPU::V_LSHLREV_B16_e32_vi:
case AMDGPU::V_LSHLREV_B16_e64_vi:
case AMDGPU::V_LSHLREV_B32_e32_vi:
case AMDGPU::V_LSHLREV_B32_e64_vi:
case AMDGPU::V_LSHLREV_B64_vi:
case AMDGPU::V_LSHRREV_B32_e32_si:
case AMDGPU::V_LSHRREV_B32_e64_si:
case AMDGPU::V_LSHRREV_B16_e32_vi:
case AMDGPU::V_LSHRREV_B16_e64_vi:
case AMDGPU::V_LSHRREV_B32_e32_vi:
case AMDGPU::V_LSHRREV_B32_e64_vi:
case AMDGPU::V_LSHRREV_B64_vi:
case AMDGPU::V_ASHRREV_I32_e64_si:
case AMDGPU::V_ASHRREV_I32_e32_si:
case AMDGPU::V_ASHRREV_I16_e32_vi:
case AMDGPU::V_ASHRREV_I16_e64_vi:
case AMDGPU::V_ASHRREV_I32_e32_vi:
case AMDGPU::V_ASHRREV_I32_e64_vi:
case AMDGPU::V_ASHRREV_I64_vi:
case AMDGPU::V_PK_LSHLREV_B16_vi:
case AMDGPU::V_PK_LSHRREV_B16_vi:
case AMDGPU::V_PK_ASHRREV_I16_vi:
return true;
default:
return false;
}
}
bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
using namespace SIInstrFlags;
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
// lds_direct register is defined so that it can be used
// with 9-bit operands only. Ignore encodings which do not accept these.
if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
return true;
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
const int SrcIndices[] = { Src1Idx, Src2Idx };
// lds_direct cannot be specified as either src1 or src2.
for (int SrcIdx : SrcIndices) {
if (SrcIdx == -1) break;
const MCOperand &Src = Inst.getOperand(SrcIdx);
if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
return false;
}
}
if (Src0Idx == -1)
return true;
const MCOperand &Src = Inst.getOperand(Src0Idx);
if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
return true;
// lds_direct is specified as src0. Check additional limitations.
return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
}
bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
return true;
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int OpIndices[] = { Src0Idx, Src1Idx };
unsigned NumLiterals = 0;
uint32_t LiteralValue;
for (int OpIdx : OpIndices) {
if (OpIdx == -1) break;
const MCOperand &MO = Inst.getOperand(OpIdx);
if (MO.isImm() &&
// Exclude special imm operands (like that used by s_set_gpr_idx_on)
AMDGPU::isSISrcOperand(Desc, OpIdx) &&
!isInlineConstant(Inst, OpIdx)) {
uint32_t Value = static_cast<uint32_t>(MO.getImm());
if (NumLiterals == 0 || LiteralValue != Value) {
LiteralValue = Value;
++NumLiterals;
}
}
}
return NumLiterals <= 1;
}
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc) {
if (!validateLdsDirect(Inst)) {
Error(IDLoc,
"invalid use of lds_direct");
return false;
}
if (!validateSOPLiteral(Inst)) {
Error(IDLoc,
"only one literal operand is allowed");
return false;
}
if (!validateConstantBusLimitations(Inst)) {
Error(IDLoc,
"invalid operand (violates constant bus restrictions)");
return false;
}
if (!validateEarlyClobberLimitations(Inst)) {
Error(IDLoc,
"destination must be different than all sources");
return false;
}
if (!validateIntClampSupported(Inst)) {
Error(IDLoc,
"integer clamping is not supported on this GPU");
return false;
}
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(IDLoc,
"d16 modifier is not supported on this GPU");
return false;
}
if (!validateMIMGDataSize(Inst)) {
Error(IDLoc,
"image data size does not match dmask and tfe");
return false;
}
if (!validateMIMGAtomicDMask(Inst)) {
Error(IDLoc,
"invalid atomic image dmask");
return false;
}
if (!validateMIMGGatherDMask(Inst)) {
Error(IDLoc,
"invalid image_gather dmask: only one bit must be set");
return false;
}
return true;
}
static std::string AMDGPUMnemonicSpellCheck(StringRef S,
const FeatureBitset &FBS,
unsigned VariantID = 0);
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned Result = Match_Success;
for (auto Variant : getMatchedVariants()) {
uint64_t EI;
auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
Variant);
// We order match statuses from least to most specific. We use most specific
// status as resulting
// Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
if ((R == Match_Success) ||
(R == Match_PreferE32) ||
(R == Match_MissingFeature && Result != Match_PreferE32) ||
(R == Match_InvalidOperand && Result != Match_MissingFeature
&& Result != Match_PreferE32) ||
(R == Match_MnemonicFail && Result != Match_InvalidOperand
&& Result != Match_MissingFeature
&& Result != Match_PreferE32)) {
Result = R;
ErrorInfo = EI;
}
if (R == Match_Success)
break;
}
switch (Result) {
default: break;
case Match_Success:
if (!validateInstruction(Inst, IDLoc)) {
return true;
}
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction not supported on this GPU");
case Match_MnemonicFail: {
FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = AMDGPUMnemonicSpellCheck(
((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
return Error(IDLoc, "invalid instruction" + Suggestion,
((AMDGPUOperand &)*Operands[0]).getLocRange());
}
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size()) {
return Error(IDLoc, "too few operands for instruction");
}
ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
return Error(ErrorLoc, "invalid operand for instruction");
}
case Match_PreferE32:
return Error(IDLoc, "internal error: instruction without _e64 suffix "
"should be encoded as e32");
}
llvm_unreachable("Implement any new match types added!");
}
bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
int64_t Tmp = -1;
if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
return true;
}
if (getParser().parseAbsoluteExpression(Tmp)) {
return true;
}
Ret = static_cast<uint32_t>(Tmp);
return false;
}
bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
uint32_t &Minor) {
if (ParseAsAbsoluteExpression(Major))
return TokError("invalid major version");
if (getLexer().isNot(AsmToken::Comma))
return TokError("minor version number required, comma expected");
Lex();
if (ParseAsAbsoluteExpression(Minor))
return TokError("invalid minor version");
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
std::string Target;
SMLoc TargetStart = getTok().getLoc();
if (getParser().parseEscapedString(Target))
return true;
SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
if (Target != ExpectedTargetOS.str())
return getParser().Error(TargetRange.Start, "target must match options",
TargetRange);
getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
return false;
}
bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
return getParser().Error(Range.Start, "value out of range", Range);
}
bool AMDGPUAsmParser::calculateGPRBlocks(
const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
IsaVersion Version = getIsaVersion(getSTI().getCPU());
unsigned NumVGPRs = NextFreeVGPR;
unsigned NumSGPRs = NextFreeSGPR;
unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
NumSGPRs > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
NumSGPRs +=
IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
NumSGPRs > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
if (Features.test(FeatureSGPRInitBug))
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
return TokError("directive only supported for amdhsa OS");
StringRef KernelName;
if (getParser().parseIdentifier(KernelName))
return true;
kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
StringSet<> Seen;
IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
unsigned UserSGPRCount = 0;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
bool ReserveXNACK = hasXNACK();
while (true) {
while (getLexer().is(AsmToken::EndOfStatement))
Lex();
if (getLexer().isNot(AsmToken::Identifier))
return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
StringRef ID = getTok().getIdentifier();
SMRange IDRange = getTok().getLocRange();
Lex();
if (ID == ".end_amdhsa_kernel")
break;
if (Seen.find(ID) != Seen.end())
return TokError(".amdhsa_ directives cannot be repeated");
Seen.insert(ID);
SMLoc ValStart = getTok().getLoc();
int64_t IVal;
if (getParser().parseAbsoluteExpression(IVal))
return true;
SMLoc ValEnd = getTok().getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
if (IVal < 0)
return OutOfRangeError(ValRange);
uint64_t Val = IVal;
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
return OutOfRangeError(RANGE); \
AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
if (ID == ".amdhsa_group_segment_fixed_size") {
if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.group_segment_fixed_size = Val;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.private_segment_fixed_size = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
UserSGPRCount++;
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
if (IVersion.Major < 7)
return getParser().Error(IDRange.Start, "directive requires gfx7+",
IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveFlatScr = Val;
} else if (ID == ".amdhsa_reserve_xnack_mask") {
if (IVersion.Major < 8)
return getParser().Error(IDRange.Start, "directive requires gfx8+",
IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveXNACK = Val;
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
Val, ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return getParser().Error(IDRange.Start, "directive requires gfx9+",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
ValRange);
} else if (ID == ".amdhsa_exception_fp_denorm_src") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
Val, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
Val, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
Val, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
Val, ValRange);
} else if (ID == ".amdhsa_exception_int_div_zero") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
Val, ValRange);
} else {
return getParser().Error(IDRange.Start,
"unknown .amdhsa_kernel directive", IDRange);
}
#undef PARSE_BITS_ENTRY
}
if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
return TokError(".amdhsa_next_free_vgpr directive is required");
if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
return TokError(".amdhsa_next_free_sgpr directive is required");
unsigned VGPRBlocks;
unsigned SGPRBlocks;
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
SGPRRange, VGPRBlocks, SGPRBlocks))
return true;
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
VGPRBlocks))
return OutOfRangeError(VGPRRange);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
SGPRBlocks))
return OutOfRangeError(SGPRRange);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
SGPRBlocks);
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
UserSGPRCount);
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
ReserveFlatScr, ReserveXNACK);
return false;
}
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
uint32_t Major;
uint32_t Minor;
if (ParseDirectiveMajorMinor(Major, Minor))
return true;
getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
return false;
}
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
uint32_t Major;
uint32_t Minor;
uint32_t Stepping;
StringRef VendorName;
StringRef ArchName;
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
return false;
}
if (ParseDirectiveMajorMinor(Major, Minor))
return true;
if (getLexer().isNot(AsmToken::Comma))
return TokError("stepping version number required, comma expected");
Lex();
if (ParseAsAbsoluteExpression(Stepping))
return TokError("invalid stepping version");
if (getLexer().isNot(AsmToken::Comma))
return TokError("vendor name required, comma expected");
Lex();
if (getLexer().isNot(AsmToken::String))
return TokError("invalid vendor name");
VendorName = getLexer().getTok().getStringContents();
Lex();
if (getLexer().isNot(AsmToken::Comma))
return TokError("arch name required, comma expected");
Lex();
if (getLexer().isNot(AsmToken::String))
return TokError("invalid arch name");
ArchName = getLexer().getTok().getStringContents();
Lex();
getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
VendorName, ArchName);
return false;
}
bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
Parser.eatToEndOfStatement();
return false;
}
SmallString<40> ErrStr;
raw_svector_ostream Err(ErrStr);
if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
return TokError(Err.str());
}
Lex();
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
// will set the current token to EndOfStatement.
while(getLexer().is(AsmToken::EndOfStatement))
Lex();
if (getLexer().isNot(AsmToken::Identifier))
return TokError("expected value identifier or .end_amd_kernel_code_t");
StringRef ID = getLexer().getTok().getIdentifier();
Lex();
if (ID == ".end_amd_kernel_code_t")
break;
if (ParseAMDKernelCodeTValue(ID, Header))
return true;
}
getTargetStreamer().EmitAMDKernelCodeT(Header);
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
if (getLexer().isNot(AsmToken::Identifier))
return TokError("expected symbol name");
StringRef KernelName = Parser.getTok().getString();
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
Lex();
if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
KernelScope.initialize(getContext());
return false;
}
bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
return Error(getParser().getTok().getLoc(),
".amd_amdgpu_isa directive is not available on non-amdgcn "
"architectures");
}
auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
std::string ISAVersionStringFromSTI;
raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
return Error(getParser().getTok().getLoc(),
".amd_amdgpu_isa directive does not match triple and/or mcpu "
"arguments specified through the command line");
}
getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
Lex();
return false;
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
HSAMD::AssemblerDirectiveEnd);
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getParser().getTok().getLoc(),
(Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
std::string HSAMetadataString;
raw_string_ostream YamlStream(HSAMetadataString);
getLexer().setSkipSpace(false);
bool FoundEnd = false;
while (!getLexer().is(AsmToken::Eof)) {
while (getLexer().is(AsmToken::Space)) {
YamlStream << getLexer().getTok().getString();
Lex();
}
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
if (ID == AssemblerDirectiveEnd) {
Lex();
FoundEnd = true;
break;
}
}
YamlStream << Parser.parseStringToEndOfStatement()
<< getContext().getAsmInfo()->getSeparatorString();
Parser.eatToEndOfStatement();
}
getLexer().setSkipSpace(true);
if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
return TokError(Twine("expected directive ") +
Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
}
YamlStream.flush();
if (IsaInfo::hasCodeObjectV3(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
} else {
if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
}
return false;
}
bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
return Error(getParser().getTok().getLoc(),
(Twine(PALMD::AssemblerDirective) + Twine(" directive is "
"not available on non-amdpal OSes")).str());
}
PALMD::Metadata PALMetadata;
for (;;) {
uint32_t Value;
if (ParseAsAbsoluteExpression(Value)) {
return TokError(Twine("invalid value in ") +
Twine(PALMD::AssemblerDirective));
}
PALMetadata.push_back(Value);
if (getLexer().isNot(AsmToken::Comma))
break;
Lex();
}
getTargetStreamer().EmitPALMetadata(PALMetadata);
return false;
}
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
if (IDVal == ".amdgcn_target")
return ParseDirectiveAMDGCNTarget();
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
// TODO: Restructure/combine with PAL metadata directive.
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
} else {
if (IDVal == ".hsa_code_object_version")
return ParseDirectiveHSACodeObjectVersion();
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
if (IDVal == ".amdgpu_hsa_kernel")
return ParseDirectiveAMDGPUHsaKernel();
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
}
if (IDVal == PALMD::AssemblerDirective)
return ParseDirectivePALMetadata();
return true;
}
bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
unsigned RegNo) const {
for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
R.isValid(); ++R) {
if (*R == RegNo)
return isGFX9();
}
switch (RegNo) {
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
case AMDGPU::TMA:
case AMDGPU::TMA_LO:
case AMDGPU::TMA_HI:
return !isGFX9();
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
return !isCI() && !isSI() && hasXNACK();
default:
break;
}
if (isCI())
return true;
if (isSI()) {
// No flat_scr
switch (RegNo) {
case AMDGPU::FLAT_SCR:
case AMDGPU::FLAT_SCR_LO:
case AMDGPU::FLAT_SCR_HI:
return false;
default:
return true;
}
}
// VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
// SI/CI have.
for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
R.isValid(); ++R) {
if (*R == RegNo)
return false;
}
return true;
}
OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Try to parse with a custom parser
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
//
// If we are parsing after we reach EndOfStatement then this means we
// are appending default values to the Operands list. This is only done
// by custom parser, so we shouldn't continue on to the generic parsing.
if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
getLexer().is(AsmToken::EndOfStatement))
return ResTy;
ResTy = parseRegOrImm(Operands);
if (ResTy == MatchOperand_Success)
return ResTy;
const auto &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
const MCExpr *Expr = nullptr;
if (!Parser.parseExpression(Expr)) {
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
return MatchOperand_Success;
}
// Possibly this is an instruction flag like 'gds'.
if (Tok.getKind() == AsmToken::Identifier) {
Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
Parser.Lex();
return MatchOperand_Success;
}
return MatchOperand_NoMatch;
}
StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
// Clear any forced encodings from the previous instruction.
setForcedEncodingSize(0);
setForcedDPP(false);
setForcedSDWA(false);
if (Name.endswith("_e64")) {
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_e32")) {
setForcedEncodingSize(32);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_dpp")) {
setForcedDPP(true);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_sdwa")) {
setForcedSDWA(true);
return Name.substr(0, Name.size() - 5);
}
return Name;
}
bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// Add the instruction mnemonic
Name = parseMnemonicSuffix(Name);
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
while (!getLexer().is(AsmToken::EndOfStatement)) {
OperandMatchResultTy Res = parseOperand(Operands, Name);
// Eat the comma or space if there is one.
if (getLexer().is(AsmToken::Comma))
Parser.Lex();
switch (Res) {
case MatchOperand_Success: break;
case MatchOperand_ParseFail:
Error(getLexer().getLoc(), "failed parsing operand.");
while (!getLexer().is(AsmToken::EndOfStatement)) {
Parser.Lex();
}
return true;
case MatchOperand_NoMatch:
Error(getLexer().getLoc(), "not a valid operand.");
while (!getLexer().is(AsmToken::EndOfStatement)) {
Parser.Lex();
}
return true;
}
}
return false;
}
//===----------------------------------------------------------------------===//
// Utility functions
//===----------------------------------------------------------------------===//
OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
switch(getLexer().getKind()) {
default: return MatchOperand_NoMatch;
case AsmToken::Identifier: {
StringRef Name = Parser.getTok().getString();
if (!Name.equals(Prefix)) {
return MatchOperand_NoMatch;
}
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon))
return MatchOperand_ParseFail;
Parser.Lex();
bool IsMinus = false;
if (getLexer().getKind() == AsmToken::Minus) {
Parser.Lex();
IsMinus = true;
}
if (getLexer().isNot(AsmToken::Integer))
return MatchOperand_ParseFail;
if (getParser().parseAbsoluteExpression(Int))
return MatchOperand_ParseFail;
if (IsMinus)
Int = -Int;
break;
}
}
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
SMLoc S = Parser.getTok().getLoc();
int64_t Value = 0;
OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
if (Res != MatchOperand_Success)
return Res;
if (ConvertResult && !ConvertResult(Value)) {
return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
return MatchOperand_Success;
}
OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
const char *Prefix,
OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
StringRef Name = Parser.getTok().getString();
if (!Name.equals(Prefix))
return MatchOperand_NoMatch;
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon))
return MatchOperand_ParseFail;
Parser.Lex();
if (getLexer().isNot(AsmToken::LBrac))
return MatchOperand_ParseFail;
Parser.Lex();
unsigned Val = 0;
SMLoc S = Parser.getTok().getLoc();
// FIXME: How to verify the number of elements matches the number of src
// operands?
for (int I = 0; I < 4; ++I) {
if (I != 0) {
if (getLexer().is(AsmToken::RBrac))
break;
if (getLexer().isNot(AsmToken::Comma))
return MatchOperand_ParseFail;
Parser.Lex();
}
if (getLexer().isNot(AsmToken::Integer))
return MatchOperand_ParseFail;
int64_t Op;
if (getParser().parseAbsoluteExpression(Op))
return MatchOperand_ParseFail;
if (Op != 0 && Op != 1)
return MatchOperand_ParseFail;
Val |= (Op << I);
}
Parser.Lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy) {
int64_t Bit = 0;
SMLoc S = Parser.getTok().getLoc();
// We are at the end of the statement, and this is a default argument, so
// use a default value.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
switch(getLexer().getKind()) {
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
if (Tok == "r128" && isGFX9())
Error(S, "r128 modifier is not supported on this GPU");
if (Tok == "a16" && !isGFX9())
Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
Bit = 0;
Parser.Lex();
} else {
return MatchOperand_NoMatch;
}
break;
}
default:
return MatchOperand_NoMatch;
}
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
AMDGPUOperand::ImmTy ImmT,
int64_t Default = 0) {
auto i = OptionalIdx.find(ImmT);
if (i != OptionalIdx.end()) {
unsigned Idx = i->second;
((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
} else {
Inst.addOperand(MCOperand::createImm(Default));
}
}
OperandMatchResultTy
AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
if (getLexer().isNot(AsmToken::Identifier)) {
return MatchOperand_NoMatch;
}
StringRef Tok = Parser.getTok().getString();
if (Tok != Prefix) {
return MatchOperand_NoMatch;
}
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon)) {
return MatchOperand_ParseFail;
}
Parser.Lex();
if (getLexer().isNot(AsmToken::Identifier)) {
return MatchOperand_ParseFail;
}
Value = Parser.getTok().getString();
return MatchOperand_Success;
}
// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
// values to live in a joint format operand in the MCInst encoding.
OperandMatchResultTy
AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
int64_t Dfmt = 0, Nfmt = 0;
// dfmt and nfmt can appear in either order, and each is optional.
bool GotDfmt = false, GotNfmt = false;
while (!GotDfmt || !GotNfmt) {
if (!GotDfmt) {
auto Res = parseIntWithPrefix("dfmt", Dfmt);
if (Res != MatchOperand_NoMatch) {
if (Res != MatchOperand_Success)
return Res;
if (Dfmt >= 16) {
Error(Parser.getTok().getLoc(), "out of range dfmt");
return MatchOperand_ParseFail;
}
GotDfmt = true;
Parser.Lex();
continue;
}
}
if (!GotNfmt) {
auto Res = parseIntWithPrefix("nfmt", Nfmt);
if (Res != MatchOperand_NoMatch) {
if (Res != MatchOperand_Success)
return Res;
if (Nfmt >= 8) {
Error(Parser.getTok().getLoc(), "out of range nfmt");
return MatchOperand_ParseFail;
}
GotNfmt = true;
Parser.Lex();
continue;
}
}
break;
}
if (!GotDfmt && !GotNfmt)
return MatchOperand_NoMatch;
auto Format = Dfmt | Nfmt << 4;
Operands.push_back(
AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
return MatchOperand_Success;
}
//===----------------------------------------------------------------------===//
// ds
//===----------------------------------------------------------------------===//
void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
continue;
}
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
}
void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
bool IsGdsHardcoded) {
OptionalImmIndexMap OptionalIdx;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
continue;
}
if (Op.isToken() && Op.getToken() == "gds") {
IsGdsHardcoded = true;
continue;
}
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
}
AMDGPUOperand::ImmTy OffsetType =
(Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
AMDGPUOperand::ImmTyOffset;
addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
if (!IsGdsHardcoded) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
}
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
}
void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
unsigned OperandIdx[4];
unsigned EnMask = 0;
int SrcIdx = 0;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
assert(SrcIdx < 4);
OperandIdx[SrcIdx] = Inst.size();
Op.addRegOperands(Inst, 1);
++SrcIdx;
continue;
}
if (Op.isOff()) {
assert(SrcIdx < 4);
OperandIdx[SrcIdx] = Inst.size();
Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
++SrcIdx;
continue;
}
if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
Op.addImmOperands(Inst, 1);
continue;
}
if (Op.isToken() && Op.getToken() == "done")
continue;
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
}
assert(SrcIdx == 4);
bool Compr = false;
if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
Compr = true;
Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
}
for (auto i = 0; i < SrcIdx; ++i) {
if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
}
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
Inst.addOperand(MCOperand::createImm(EnMask));
}
//===----------------------------------------------------------------------===//
// s_waitcnt
//===----------------------------------------------------------------------===//
static bool
encodeCnt(
const AMDGPU::IsaVersion ISA,
int64_t &IntVal,
int64_t CntVal,
bool Saturate,
unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
unsigned (*decode)(const IsaVersion &Version, unsigned))
{
bool Failed = false;
IntVal = encode(ISA, IntVal, CntVal);
if (CntVal != decode(ISA, IntVal)) {
if (Saturate) {
IntVal = encode(ISA, IntVal, -1);
} else {
Failed = true;
}
}
return Failed;
}
bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
StringRef CntName = Parser.getTok().getString();
int64_t CntVal;
Parser.Lex();
if (getLexer().isNot(AsmToken::LParen))
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::Integer))
return true;
SMLoc ValLoc = Parser.getTok().getLoc();
if (getParser().parseAbsoluteExpression(CntVal))
return true;
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
bool Failed = true;
bool Sat = CntName.endswith("_sat");
if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
} else if (CntName == "expcnt" || CntName == "expcnt_sat") {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
} else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
}
if (Failed) {
Error(ValLoc, "too large value for " + CntName);
return true;
}
if (getLexer().isNot(AsmToken::RParen)) {
return true;
}
Parser.Lex();
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
const AsmToken NextToken = getLexer().peekTok();
if (NextToken.is(AsmToken::Identifier)) {
Parser.Lex();
}
}
return false;
}
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_ParseFail;
case AsmToken::Integer:
// The operand can be an integer value.
if (getParser().parseAbsoluteExpression(Waitcnt))
return MatchOperand_ParseFail;
break;
case AsmToken::Identifier:
do {
if (parseCnt(Waitcnt))
return MatchOperand_ParseFail;
} while(getLexer().isNot(AsmToken::EndOfStatement));
break;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
return MatchOperand_Success;
}
bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
int64_t &Width) {
using namespace llvm::AMDGPU::Hwreg;
if (Parser.getTok().getString() != "hwreg")
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::LParen))
return true;
Parser.Lex();
if (getLexer().is(AsmToken::Identifier)) {
HwReg.IsSymbolic = true;
HwReg.Id = ID_UNKNOWN_;
const StringRef tok = Parser.getTok().getString();
int Last = ID_SYMBOLIC_LAST_;
if (isSI() || isCI() || isVI())
Last = ID_SYMBOLIC_FIRST_GFX9_;
for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
if (tok == IdSymbolic[i]) {
HwReg.Id = i;
break;
}
}
Parser.Lex();
} else {
HwReg.IsSymbolic = false;
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(HwReg.Id))
return true;
}
if (getLexer().is(AsmToken::RParen)) {
Parser.Lex();
return false;
}
// optional params
if (getLexer().isNot(AsmToken::Comma))
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(Offset))
return true;
if (getLexer().isNot(AsmToken::Comma))
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(Width))
return true;
if (getLexer().isNot(AsmToken::RParen))
return true;
Parser.Lex();
return false;
}
OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
int64_t Imm16Val = 0;
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_NoMatch;
case AsmToken::Integer:
// The operand can be an integer value.
if (getParser().parseAbsoluteExpression(Imm16Val))
return MatchOperand_NoMatch;
if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
Error(S, "invalid immediate: only 16-bit values are legal");
// Do not return error code, but create an imm operand anyway and proceed
// to the next operand, if any. That avoids unneccessary error messages.
}
break;
case AsmToken::Identifier: {
OperandInfoTy HwReg(ID_UNKNOWN_);
int64_t Offset = OFFSET_DEFAULT_;
int64_t Width = WIDTH_M1_DEFAULT_ + 1;
if (parseHwregConstruct(HwReg, Offset, Width))
return MatchOperand_ParseFail;
if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
if (HwReg.IsSymbolic)
Error(S, "invalid symbolic name of hardware register");
else
Error(S, "invalid code of hardware register: only 6-bit values are legal");
}
if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
Error(S, "invalid bit offset: only 5-bit values are legal");
if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
}
break;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
return MatchOperand_Success;
}
bool AMDGPUOperand::isSWaitCnt() const {
return isImm();
}
bool AMDGPUOperand::isHwreg() const {
return isImmTy(ImmTyHwreg);
}
bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
using namespace llvm::AMDGPU::SendMsg;
if (Parser.getTok().getString() != "sendmsg")
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::LParen))
return true;
Parser.Lex();
if (getLexer().is(AsmToken::Identifier)) {
Msg.IsSymbolic = true;
Msg.Id = ID_UNKNOWN_;
const std::string tok = Parser.getTok().getString();
for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
switch(i) {
default: continue; // Omit gaps.
case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break;
}
if (tok == IdSymbolic[i]) {
Msg.Id = i;
break;
}
}
Parser.Lex();
} else {
Msg.IsSymbolic = false;
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(Msg.Id))
return true;
if (getLexer().is(AsmToken::Integer))
if (getParser().parseAbsoluteExpression(Msg.Id))
Msg.Id = ID_UNKNOWN_;
}
if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
return false;
if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
if (getLexer().isNot(AsmToken::RParen))
return true;
Parser.Lex();
return false;
}
if (getLexer().isNot(AsmToken::Comma))
return true;
Parser.Lex();
assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
Operation.Id = ID_UNKNOWN_;
if (getLexer().is(AsmToken::Identifier)) {
Operation.IsSymbolic = true;
const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
const StringRef Tok = Parser.getTok().getString();
for (int i = F; i < L; ++i) {
if (Tok == S[i]) {
Operation.Id = i;
break;
}
}
Parser.Lex();
} else {
Operation.IsSymbolic = false;
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(Operation.Id))
return true;
}
if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
// Stream id is optional.
if (getLexer().is(AsmToken::RParen)) {
Parser.Lex();
return false;
}
if (getLexer().isNot(AsmToken::Comma))
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(StreamId))
return true;
}
if (getLexer().isNot(AsmToken::RParen))
return true;
Parser.Lex();
return false;
}
OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
StringRef Str = Parser.getTok().getString();
int Slot = StringSwitch<int>(Str)
.Case("p10", 0)
.Case("p20", 1)
.Case("p0", 2)
.Default(-1);
SMLoc S = Parser.getTok().getLoc();
if (Slot == -1)
return MatchOperand_ParseFail;
Parser.Lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
AMDGPUOperand::ImmTyInterpSlot));
return MatchOperand_Success;
}
OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
StringRef Str = Parser.getTok().getString();
if (!Str.startswith("attr"))
return MatchOperand_NoMatch;
StringRef Chan = Str.take_back(2);
int AttrChan = StringSwitch<int>(Chan)
.Case(".x", 0)
.Case(".y", 1)
.Case(".z", 2)
.Case(".w", 3)
.Default(-1);
if (AttrChan == -1)
return MatchOperand_ParseFail;
Str = Str.drop_back(2).drop_front(4);
uint8_t Attr;
if (Str.getAsInteger(10, Attr))
return MatchOperand_ParseFail;
SMLoc S = Parser.getTok().getLoc();
Parser.Lex();
if (Attr > 63) {
Error(S, "out of bounds attr");
return MatchOperand_Success;
}
SMLoc SChan = SMLoc::getFromPointer(Chan.data());
Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
AMDGPUOperand::ImmTyInterpAttr));
Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
AMDGPUOperand::ImmTyAttrChan));
return MatchOperand_Success;
}
void AMDGPUAsmParser::errorExpTgt() {
Error(Parser.getTok().getLoc(), "invalid exp target");
}
OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
uint8_t &Val) {
if (Str == "null") {
Val = 9;
return MatchOperand_Success;
}
if (Str.startswith("mrt")) {
Str = Str.drop_front(3);
if (Str == "z") { // == mrtz
Val = 8;
return MatchOperand_Success;
}
if (Str.getAsInteger(10, Val))
return MatchOperand_ParseFail;
if (Val > 7)
errorExpTgt();
return MatchOperand_Success;
}
if (Str.startswith("pos")) {
Str = Str.drop_front(3);
if (Str.getAsInteger(10, Val))
return MatchOperand_ParseFail;
if (Val > 3)
errorExpTgt();
Val += 12;
return MatchOperand_Success;
}
if (Str.startswith("param")) {
Str = Str.drop_front(5);
if (Str.getAsInteger(10, Val))
return MatchOperand_ParseFail;
if (Val >= 32)
errorExpTgt();
Val += 32;
return MatchOperand_Success;
}
if (Str.startswith("invalid_target_")) {
Str = Str.drop_front(15);
if (Str.getAsInteger(10, Val))
return MatchOperand_ParseFail;
errorExpTgt();
return MatchOperand_Success;
}
return MatchOperand_NoMatch;
}
OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
uint8_t Val;
StringRef Str = Parser.getTok().getString();
auto Res = parseExpTgtImpl(Str, Val);
if (Res != MatchOperand_Success)
return Res;
SMLoc S = Parser.getTok().getLoc();
Parser.Lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
AMDGPUOperand::ImmTyExpTgt));
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
using namespace llvm::AMDGPU::SendMsg;
int64_t Imm16Val = 0;
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::Integer:
// The operand can be an integer value.
if (getParser().parseAbsoluteExpression(Imm16Val))
return MatchOperand_NoMatch;
if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
Error(S, "invalid immediate: only 16-bit values are legal");
// Do not return error code, but create an imm operand anyway and proceed
// to the next operand, if any. That avoids unneccessary error messages.
}
break;
case AsmToken::Identifier: {
OperandInfoTy Msg(ID_UNKNOWN_);
OperandInfoTy Operation(OP_UNKNOWN_);
int64_t StreamId = STREAM_ID_DEFAULT_;
if (parseSendMsgConstruct(Msg, Operation, StreamId))
return MatchOperand_ParseFail;
do {
// Validate and encode message ID.
if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
|| Msg.Id == ID_SYSMSG)) {
if (Msg.IsSymbolic)
Error(S, "invalid/unsupported symbolic name of message");
else
Error(S, "invalid/unsupported code of message");
break;
}
Imm16Val = (Msg.Id << ID_SHIFT_);
// Validate and encode operation ID.
if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
if (Operation.IsSymbolic)
Error(S, "invalid symbolic name of GS_OP");
else
Error(S, "invalid code of GS_OP: only 2-bit values are legal");
break;
}
if (Operation.Id == OP_GS_NOP
&& Msg.Id != ID_GS_DONE) {
Error(S, "invalid GS_OP: NOP is for GS_DONE only");
break;
}
Imm16Val |= (Operation.Id << OP_SHIFT_);
}
if (Msg.Id == ID_SYSMSG) {
if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
if (Operation.IsSymbolic)
Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
else
Error(S, "invalid/unsupported code of SYSMSG_OP");
break;
}
Imm16Val |= (Operation.Id << OP_SHIFT_);
}
// Validate and encode stream ID.
if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
Error(S, "invalid stream id: only 2-bit values are legal");
break;
}
Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
}
} while (false);
}
break;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
return MatchOperand_Success;
}
bool AMDGPUOperand::isSendMsg() const {
return isImmTy(ImmTySendMsg);
}
//===----------------------------------------------------------------------===//
// parser helpers
//===----------------------------------------------------------------------===//
bool
AMDGPUAsmParser::trySkipId(const StringRef Id) {
if (getLexer().getKind() == AsmToken::Identifier &&
Parser.getTok().getString() == Id) {
Parser.Lex();
return true;
}
return false;
}
bool
AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
if (getLexer().getKind() == Kind) {
Parser.Lex();
return true;
}
return false;
}
bool
AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
const StringRef ErrMsg) {
if (!trySkipToken(Kind)) {
Error(Parser.getTok().getLoc(), ErrMsg);
return false;
}
return true;
}
bool
AMDGPUAsmParser::parseExpr(int64_t &Imm) {
return !getParser().parseAbsoluteExpression(Imm);
}
bool
AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
SMLoc S = Parser.getTok().getLoc();
if (getLexer().getKind() == AsmToken::String) {
Val = Parser.getTok().getStringContents();
Parser.Lex();
return true;
} else {
Error(S, ErrMsg);
return false;
}
}
//===----------------------------------------------------------------------===//
// swizzle
//===----------------------------------------------------------------------===//
LLVM_READNONE
static unsigned
encodeBitmaskPerm(const unsigned AndMask,
const unsigned OrMask,
const unsigned XorMask) {
using namespace llvm::AMDGPU::Swizzle;
return BITMASK_PERM_ENC |
(AndMask << BITMASK_AND_SHIFT) |
(OrMask << BITMASK_OR_SHIFT) |
(XorMask << BITMASK_XOR_SHIFT);
}
bool
AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
const unsigned MaxVal,
const StringRef ErrMsg) {
for (unsigned i = 0; i < OpNum; ++i) {
if (!skipToken(AsmToken::Comma, "expected a comma")){
return false;
}
SMLoc ExprLoc = Parser.getTok().getLoc();
if (!parseExpr(Op[i])) {
return false;
}
if (Op[i] < MinVal || Op[i] > MaxVal) {
Error(ExprLoc, ErrMsg);
return false;
}
}
return true;
}
bool
AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
int64_t Lane[LANE_NUM];
if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
"expected a 2-bit lane id")) {
Imm = QUAD_PERM_ENC;
for (unsigned I = 0; I < LANE_NUM; ++I) {
Imm |= Lane[I] << (LANE_SHIFT * I);
}
return true;
}
return false;
}
bool
AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
SMLoc S = Parser.getTok().getLoc();
int64_t GroupSize;
int64_t LaneIdx;
if (!parseSwizzleOperands(1, &GroupSize,
2, 32,
"group size must be in the interval [2,32]")) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
Error(S, "group size must be a power of two");
return false;
}
if (parseSwizzleOperands(1, &LaneIdx,
0, GroupSize - 1,
"lane id must be in the interval [0,group size - 1]")) {
Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
return true;
}
return false;
}
bool
AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
SMLoc S = Parser.getTok().getLoc();
int64_t GroupSize;
if (!parseSwizzleOperands(1, &GroupSize,
2, 32, "group size must be in the interval [2,32]")) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
Error(S, "group size must be a power of two");
return false;
}
Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
return true;
}
bool
AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
SMLoc S = Parser.getTok().getLoc();
int64_t GroupSize;
if (!parseSwizzleOperands(1, &GroupSize,
1, 16, "group size must be in the interval [1,16]")) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
Error(S, "group size must be a power of two");
return false;
}
Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
return true;
}
bool
AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
if (!skipToken(AsmToken::Comma, "expected a comma")) {
return false;
}
StringRef Ctl;
SMLoc StrLoc = Parser.getTok().getLoc();
if (!parseString(Ctl)) {
return false;
}
if (Ctl.size() != BITMASK_WIDTH) {
Error(StrLoc, "expected a 5-character mask");
return false;
}
unsigned AndMask = 0;
unsigned OrMask = 0;
unsigned XorMask = 0;
for (size_t i = 0; i < Ctl.size(); ++i) {
unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
switch(Ctl[i]) {
default:
Error(StrLoc, "invalid mask");
return false;
case '0':
break;
case '1':
OrMask |= Mask;
break;
case 'p':
AndMask |= Mask;
break;
case 'i':
AndMask |= Mask;
XorMask |= Mask;
break;
}
}
Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
return true;
}
bool
AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
SMLoc OffsetLoc = Parser.getTok().getLoc();
if (!parseExpr(Imm)) {
return false;
}
if (!isUInt<16>(Imm)) {
Error(OffsetLoc, "expected a 16-bit offset");
return false;
}
return true;
}
bool
AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
SMLoc ModeLoc = Parser.getTok().getLoc();
bool Ok = false;
if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
Ok = parseSwizzleQuadPerm(Imm);
} else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
Ok = parseSwizzleBitmaskPerm(Imm);
} else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
Ok = parseSwizzleBroadcast(Imm);
} else if (trySkipId(IdSymbolic[ID_SWAP])) {
Ok = parseSwizzleSwap(Imm);
} else if (trySkipId(IdSymbolic[ID_REVERSE])) {
Ok = parseSwizzleReverse(Imm);
} else {
Error(ModeLoc, "expected a swizzle mode");
}
return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
}
return false;
}
OperandMatchResultTy
AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
int64_t Imm = 0;
if (trySkipId("offset")) {
bool Ok = false;
if (skipToken(AsmToken::Colon, "expected a colon")) {
if (trySkipId("swizzle")) {
Ok = parseSwizzleMacro(Imm);
} else {
Ok = parseSwizzleOffset(Imm);
}
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
return Ok? MatchOperand_Success : MatchOperand_ParseFail;
} else {
// Swizzle "offset" operand is optional.
// If it is omitted, try parsing other optional operands.
return parseOptionalOpr(Operands);
}
}
bool
AMDGPUOperand::isSwizzle() const {
return isImmTy(ImmTySwizzle);
}
//===----------------------------------------------------------------------===//
// VGPR Index Mode
//===----------------------------------------------------------------------===//
int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
using namespace llvm::AMDGPU::VGPRIndexMode;
if (trySkipToken(AsmToken::RParen)) {
return OFF;
}
int64_t Imm = 0;
while (true) {
unsigned Mode = 0;
SMLoc S = Parser.getTok().getLoc();
for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
if (trySkipId(IdSymbolic[ModeId])) {
Mode = 1 << ModeId;
break;
}
}
if (Mode == 0) {
Error(S, (Imm == 0)?
"expected a VGPR index mode or a closing parenthesis" :
"expected a VGPR index mode");
break;
}
if (Imm & Mode) {
Error(S, "duplicate VGPR index mode");
break;
}
Imm |= Mode;
if (trySkipToken(AsmToken::RParen))
break;
if (!skipToken(AsmToken::Comma,
"expected a comma or a closing parenthesis"))
break;
}
return Imm;
}
OperandMatchResultTy
AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
int64_t Imm = 0;
SMLoc S = Parser.getTok().getLoc();
if (getLexer().getKind() == AsmToken::Identifier &&
Parser.getTok().getString() == "gpr_idx" &&
getLexer().peekTok().is(AsmToken::LParen)) {
Parser.Lex();
Parser.Lex();
// If parse failed, trigger an error but do not return error code
// to avoid excessive error messages.
Imm = parseGPRIdxMacro();
} else {
if (getParser().parseAbsoluteExpression(Imm))
return MatchOperand_NoMatch;
if (Imm < 0 || !isUInt<4>(Imm)) {
Error(S, "invalid immediate: only 4-bit values are legal");
}
}
Operands.push_back(
AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
return MatchOperand_Success;
}
bool AMDGPUOperand::isGPRIdxMode() const {
return isImmTy(ImmTyGprIdxMode);
}
//===----------------------------------------------------------------------===//
// sopp branch targets
//===----------------------------------------------------------------------===//
OperandMatchResultTy
AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
switch (getLexer().getKind()) {
default: return MatchOperand_ParseFail;
case AsmToken::Integer: {
int64_t Imm;
if (getParser().parseAbsoluteExpression(Imm))
return MatchOperand_ParseFail;
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
return MatchOperand_Success;
}
case AsmToken::Identifier:
Operands.push_back(AMDGPUOperand::CreateExpr(this,
MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
Parser.getTok().getString()), getContext()), S));
Parser.Lex();
return MatchOperand_Success;
}
}
//===----------------------------------------------------------------------===//
// mubuf
//===----------------------------------------------------------------------===//
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
}
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
bool IsAtomic,
bool IsAtomicReturn,
bool IsLds) {
bool IsLdsOpcode = IsLds;
bool HasLdsModifier = false;
OptionalImmIndexMap OptionalIdx;
assert(IsAtomicReturn ? IsAtomic : true);
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
continue;
}
// Handle the case where soffset is an immediate
if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
Op.addImmOperands(Inst, 1);
continue;
}
HasLdsModifier = Op.isLDS();
// Handle tokens like 'offen' which are sometimes hard-coded into the
// asm string. There are no MCInst operands for these.
if (Op.isToken()) {
continue;
}
assert(Op.isImm());
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
}
// This is a workaround for an llvm quirk which may result in an
// incorrect instruction selection. Lds and non-lds versions of
// MUBUF instructions are identical except that lds versions
// have mandatory 'lds' modifier. However this modifier follows
// optional modifiers and llvm asm matcher regards this 'lds'
// modifier as an optional one. As a result, an lds version
// of opcode may be selected even if it has no 'lds' modifier.
if (IsLdsOpcode && !HasLdsModifier) {
int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
if (NoLdsOpcode != -1) { // Got lds version - correct it.
Inst.setOpcode(NoLdsOpcode);
IsLdsOpcode = false;
}
}
// Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
if (IsAtomicReturn) {
MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
Inst.insert(I, *I);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
if (!IsAtomic) { // glc is hard-coded.
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
}
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
continue;
}
// Handle the case where soffset is an immediate
if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
Op.addImmOperands(Inst, 1);
continue;
}
// Handle tokens like 'offen' which are sometimes hard-coded into the
// asm string. There are no MCInst operands for these.
if (Op.isToken()) {
continue;
}
assert(Op.isImm());
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
}
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
//===----------------------------------------------------------------------===//
// mimg
//===----------------------------------------------------------------------===//
void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic) {
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
if (IsAtomic) {
// Add src, same as dst
assert(Desc.getNumDefs() == 1);
((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
}
OptionalImmIndexMap OptionalIdx;
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
llvm_unreachable("unexpected operand type");
}
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
}
void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
cvtMIMG(Inst, Operands, true);
}
//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
bool AMDGPUOperand::isSMRDOffset8() const {
return isImm() && isUInt<8>(getImm());
}
bool AMDGPUOperand::isSMRDOffset20() const {
return isImm() && isUInt<20>(getImm());
}
bool AMDGPUOperand::isSMRDLiteralOffset() const {
// 32-bit literals are only supported on CI and we only want to use them
// when the offset is > 8-bits.
return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
//===----------------------------------------------------------------------===//
// vop3
//===----------------------------------------------------------------------===//
static bool ConvertOmodMul(int64_t &Mul) {
if (Mul != 1 && Mul != 2 && Mul != 4)
return false;
Mul >>= 1;
return true;
}
static bool ConvertOmodDiv(int64_t &Div) {
if (Div == 1) {
Div = 0;
return true;
}
if (Div == 2) {
Div = 3;
return true;
}
return false;
}
static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
if (BoundCtrl == 0) {
BoundCtrl = 1;
return true;
}
if (BoundCtrl == -1) {
BoundCtrl = 0;
return true;
}
return false;
}
// Note: the order in this table matches the order of operands in AsmString.
static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
{"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
{"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
{"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
{"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
{"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
{"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
{"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
{"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
{"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
{"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
{"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
{"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
{"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
unsigned size = Operands.size();
assert(size > 0);
OperandMatchResultTy res = parseOptionalOpr(Operands);
// This is a hack to enable hardcoded mandatory operands which follow
// optional operands.
//
// Current design assumes that all operands after the first optional operand
// are also optional. However implementation of some instructions violates
// this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
//
// To alleviate this problem, we have to (implicitly) parse extra operands
// to make sure autogenerated parser of custom operands never hit hardcoded
// mandatory operands.
if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
// We have parsed the first optional operand.
// Parse as many operands as necessary to skip all mandatory operands.
for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
if (res != MatchOperand_Success ||
getLexer().is(AsmToken::EndOfStatement)) break;
if (getLexer().is(AsmToken::Comma)) Parser.Lex();
res = parseOptionalOpr(Operands);
}
}
return res;
}
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
OperandMatchResultTy res;
for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
// try to parse any optional operand here
if (Op.IsBit) {
res = parseNamedBit(Op.Name, Operands, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
res = parseOModOperand(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
res = parseSDWASel(Operands, Op.Name, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
res = parseSDWADstUnused(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
Op.Type == AMDGPUOperand::ImmTyNegLo ||
Op.Type == AMDGPUOperand::ImmTyNegHi) {
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
Op.ConvertResult);
} else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
res = parseDfmtNfmt(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
if (res != MatchOperand_NoMatch) {
return res;
}
}
return MatchOperand_NoMatch;
}
OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
StringRef Name = Parser.getTok().getString();
if (Name == "mul") {
return parseIntWithPrefix("mul", Operands,
AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
}
if (Name == "div") {
return parseIntWithPrefix("div", Operands,
AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
}
return MatchOperand_NoMatch;
}
void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
cvtVOP3P(Inst, Operands);
int Opc = Inst.getOpcode();
int SrcNum;
const int Ops[] = { AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
AMDGPU::OpName::src2 };
for (SrcNum = 0;
SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
++SrcNum);
assert(SrcNum > 0);
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
if ((OpSel & (1 << SrcNum)) != 0) {
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
}
}
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
// 1. This operand is input modifiers
return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
// 2. This is not last operand
&& Desc.NumOperands > (OpNum + 1)
// 3. Next operand is register class
&& Desc.OpInfo[OpNum + 1].RegClass != -1
// 4. Next register is not tied to any other operand
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
}
void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
{
OptionalImmIndexMap OptionalIdx;
unsigned Opc = Inst.getOpcode();
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
} else if (Op.isInterpSlot() ||
Op.isInterpAttr() ||
Op.isAttrChan()) {
Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
llvm_unreachable("unhandled operand type");
}
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}
}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx) {
unsigned Opc = Inst.getOpcode();
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
// This instruction has src modifiers
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
} else if (Op.isRegOrImm()) {
Op.addRegOrImmOperands(Inst, 1);
} else {
llvm_unreachable("unhandled operand type");
}
}
} else {
// No src modifiers
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (Op.isMod()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
Op.addRegOrImmOperands(Inst, 1);
}
}
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
}
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}
// Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
// it has src2 register operand that is tied to dst operand
// we don't allow modifiers for this operand in assembler so src2_modifiers
// should be 0.
if (Opc == AMDGPU::V_MAC_F32_e64_si ||
Opc == AMDGPU::V_MAC_F32_e64_vi ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F32_e64_vi) {
auto it = Inst.begin();
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
++it;
Inst.insert(it, Inst.getOperand(0)); // src2 = dst
}
}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
cvtVOP3(Inst, Operands, OptionalIdx);
}
void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
OptionalImmIndexMap OptIdx;
const int Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
cvtVOP3(Inst, Operands, OptIdx);
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
}
// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
// instruction, and then figure out where to actually put the modifiers
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
if (OpSelHiIdx != -1) {
int DefaultVal = IsPacked ? -1 : 0;
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
DefaultVal);
}
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
if (NegLoIdx != -1) {
assert(IsPacked);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
}
const int Ops[] = { AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
AMDGPU::OpName::src2 };
const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers };
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
unsigned OpSelHi = 0;
unsigned NegLo = 0;
unsigned NegHi = 0;
if (OpSelHiIdx != -1) {
OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
}
if (NegLoIdx != -1) {
int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
NegLo = Inst.getOperand(NegLoIdx).getImm();
NegHi = Inst.getOperand(NegHiIdx).getImm();
}
for (int J = 0; J < 3; ++J) {
int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
if (OpIdx == -1)
break;
uint32_t ModVal = 0;
if ((OpSel & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_0;
if ((OpSelHi & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_1;
if ((NegLo & (1 << J)) != 0)
ModVal |= SISrcMods::NEG;
if ((NegHi & (1 << J)) != 0)
ModVal |= SISrcMods::NEG_HI;
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
}
}
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
bool AMDGPUOperand::isDPPCtrl() const {
using namespace AMDGPU::DPP;
bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
if (result) {
int64_t Imm = getImm();
return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
(Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
(Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
(Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
(Imm == DppCtrl::WAVE_SHL1) ||
(Imm == DppCtrl::WAVE_ROL1) ||
(Imm == DppCtrl::WAVE_SHR1) ||
(Imm == DppCtrl::WAVE_ROR1) ||
(Imm == DppCtrl::ROW_MIRROR) ||
(Imm == DppCtrl::ROW_HALF_MIRROR) ||
(Imm == DppCtrl::BCAST15) ||
(Imm == DppCtrl::BCAST31);
}
return false;
}
bool AMDGPUOperand::isS16Imm() const {
return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
}
bool AMDGPUOperand::isU16Imm() const {
return isImm() && isUInt<16>(getImm());
}
OperandMatchResultTy
AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
using namespace AMDGPU::DPP;
SMLoc S = Parser.getTok().getLoc();
StringRef Prefix;
int64_t Int;
if (getLexer().getKind() == AsmToken::Identifier) {
Prefix = Parser.getTok().getString();
} else {
return MatchOperand_NoMatch;
}
if (Prefix == "row_mirror") {
Int = DppCtrl::ROW_MIRROR;
Parser.Lex();
} else if (Prefix == "row_half_mirror") {
Int = DppCtrl::ROW_HALF_MIRROR;
Parser.Lex();
} else {
// Check to prevent parseDPPCtrlOps from eating invalid tokens
if (Prefix != "quad_perm"
&& Prefix != "row_shl"
&& Prefix != "row_shr"
&& Prefix != "row_ror"
&& Prefix != "wave_shl"
&& Prefix != "wave_rol"
&& Prefix != "wave_shr"
&& Prefix != "wave_ror"
&& Prefix != "row_bcast") {
return MatchOperand_NoMatch;
}
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon))
return MatchOperand_ParseFail;
if (Prefix == "quad_perm") {
// quad_perm:[%d,%d,%d,%d]
Parser.Lex();
if (getLexer().isNot(AsmToken::LBrac))
return MatchOperand_ParseFail;
Parser.Lex();
if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
return MatchOperand_ParseFail;
for (int i = 0; i < 3; ++i) {
if (getLexer().isNot(AsmToken::Comma))
return MatchOperand_ParseFail;
Parser.Lex();
int64_t Temp;
if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
return MatchOperand_ParseFail;
const int shift = i*2 + 2;
Int += (Temp << shift);
}
if (getLexer().isNot(AsmToken::RBrac))
return MatchOperand_ParseFail;
Parser.Lex();
} else {
// sel:%d
Parser.Lex();
if (getParser().parseAbsoluteExpression(Int))
return MatchOperand_ParseFail;
if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
Int |= DppCtrl::ROW_SHL0;
} else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
Int |= DppCtrl::ROW_SHR0;
} else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
Int |= DppCtrl::ROW_ROR0;
} else if (Prefix == "wave_shl" && 1 == Int) {
Int = DppCtrl::WAVE_SHL1;
} else if (Prefix == "wave_rol" && 1 == Int) {
Int = DppCtrl::WAVE_ROL1;
} else if (Prefix == "wave_shr" && 1 == Int) {
Int = DppCtrl::WAVE_SHR1;
} else if (Prefix == "wave_ror" && 1 == Int) {
Int = DppCtrl::WAVE_ROR1;
} else if (Prefix == "row_bcast") {
if (Int == 15) {
Int = DppCtrl::BCAST15;
} else if (Int == 31) {
Int = DppCtrl::BCAST31;
} else {
return MatchOperand_ParseFail;
}
} else {
return MatchOperand_ParseFail;
}
}
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
return MatchOperand_Success;
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
}
void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
for (unsigned E = Operands.size(); I != E; ++I) {
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
if (TiedTo != -1) {
assert((unsigned)TiedTo < Inst.getNumOperands());
// handle tied old or src2 for MAC instructions
Inst.addOperand(Inst.getOperand(TiedTo));
}
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
// Skip it.
continue;
} if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
} else if (Op.isDPPCtrl()) {
Op.addImmOperands(Inst, 1);
} else if (Op.isImm()) {
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = I;
} else {
llvm_unreachable("Invalid operand type");
}
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
}
//===----------------------------------------------------------------------===//
// sdwa
//===----------------------------------------------------------------------===//
OperandMatchResultTy
AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type) {
using namespace llvm::AMDGPU::SDWA;
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
OperandMatchResultTy res;
res = parseStringWithPrefix(Prefix, Value);
if (res != MatchOperand_Success) {
return res;
}
int64_t Int;
Int = StringSwitch<int64_t>(Value)
.Case("BYTE_0", SdwaSel::BYTE_0)
.Case("BYTE_1", SdwaSel::BYTE_1)
.Case("BYTE_2", SdwaSel::BYTE_2)
.Case("BYTE_3", SdwaSel::BYTE_3)
.Case("WORD_0", SdwaSel::WORD_0)
.Case("WORD_1", SdwaSel::WORD_1)
.Case("DWORD", SdwaSel::DWORD)
.Default(0xffffffff);
Parser.Lex(); // eat last token
if (Int == 0xffffffff) {
return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
using namespace llvm::AMDGPU::SDWA;
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
OperandMatchResultTy res;
res = parseStringWithPrefix("dst_unused", Value);
if (res != MatchOperand_Success) {
return res;
}
int64_t Int;
Int = StringSwitch<int64_t>(Value)
.Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
.Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
.Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
.Default(0xffffffff);
Parser.Lex(); // eat last token
if (Int == 0xffffffff) {
return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
return MatchOperand_Success;
}
void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
}
void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
}
void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
}
void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
}
void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType, bool skipVcc) {
using namespace llvm::AMDGPU::SDWA;
OptionalImmIndexMap OptionalIdx;
bool skippedVcc = false;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
// Skip VCC only if we didn't skip it on previous iteration.
if (BasicInstType == SIInstrFlags::VOP2 &&
(Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
skippedVcc = true;
continue;
} else if (BasicInstType == SIInstrFlags::VOPC &&
Inst.getNumOperands() == 0) {
skippedVcc = true;
continue;
}
}
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithInputModsOperands(Inst, 2);
} else if (Op.isImm()) {
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = I;
} else {
llvm_unreachable("Invalid operand type");
}
skippedVcc = false;
}
if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOP2:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOPC:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
default:
llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
}
}
// special case v_mac_{f16, f32}:
// it has src2 register operand that is tied to dst operand
if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
auto it = Inst.begin();
std::advance(
it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
Inst.insert(it, Inst.getOperand(0)); // src2 = dst
}
}
/// Force static initialization.
extern "C" void LLVMInitializeAMDGPUAsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_MNEMONIC_SPELL_CHECKER
#include "AMDGPUGenAsmMatcher.inc"
// This fuction should be defined after auto-generated include so that we have
// MatchClassKind enum defined
unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
// Tokens like "glc" would be parsed as immediate operands in ParseOperand().
// But MatchInstructionImpl() expects to meet token and fails to validate
// operand. This method checks if we are given immediate operand but expect to
// get corresponding token.
AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
switch (Kind) {
case MCK_addr64:
return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
case MCK_gds:
return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
case MCK_lds:
return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
case MCK_glc:
return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
case MCK_idxen:
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
case MCK_offen:
return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
case MCK_SSrcB32:
// When operands have expression values, they will return true for isToken,
// because it is not possible to distinguish between a token and an
// expression at parse time. MatchInstructionImpl() will always try to
// match an operand as a token, when isToken returns true, and when the
// name of the expression is not a valid token, the match will fail,
// so we need to handle it here.
return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
case MCK_SSrcF32:
return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
case MCK_SoppBrTarget:
return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
case MCK_VReg32OrOff:
return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
case MCK_InterpSlot:
return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
case MCK_Attr:
return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
case MCK_AttrChan:
return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
default:
return Match_InvalidOperand;
}
}
//===----------------------------------------------------------------------===//
// endpgm
//===----------------------------------------------------------------------===//
OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
int64_t Imm = 0;
if (!parseExpr(Imm)) {
// The operand is optional, if not present default to 0
Imm = 0;
}
if (!isUInt<16>(Imm)) {
Error(S, "expected a 16-bit value");
return MatchOperand_ParseFail;
}
Operands.push_back(
AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
return MatchOperand_Success;
}
bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }