forked from OSchip/llvm-project
AMDGPU: Add VOP3P instruction format
Add a few non-VOP3P but instructions related to packed. Includes hack with dummy operands for the benefit of the assembler llvm-svn: 296368
This commit is contained in:
parent
10c7fb4187
commit
9be7b0d485
|
@ -190,6 +190,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
|
|||
"Has i16/f16 instructions"
|
||||
>;
|
||||
|
||||
def FeatureVOP3P : SubtargetFeature<"vop3p",
|
||||
"HasVOP3PInsts",
|
||||
"true",
|
||||
"Has VOP3P packed instructions"
|
||||
>;
|
||||
|
||||
def FeatureMovrel : SubtargetFeature<"movrel",
|
||||
"HasMovrel",
|
||||
"true",
|
||||
|
@ -400,7 +406,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
|
|||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
|
||||
FeatureApertureRegs, FeatureGFX9Insts
|
||||
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P
|
||||
]
|
||||
>;
|
||||
|
||||
|
@ -575,7 +581,10 @@ def isCIVI : Predicate <
|
|||
|
||||
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
|
||||
|
||||
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
|
||||
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
|
||||
AssemblerPredicate<"Feature16BitInsts">;
|
||||
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
|
||||
AssemblerPredicate<"FeatureVOP3P">;
|
||||
|
||||
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
|
||||
AssemblerPredicate<"FeatureSDWA">;
|
||||
|
|
|
@ -117,6 +117,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
SGPRInitBug(false),
|
||||
HasSMemRealTime(false),
|
||||
Has16BitInsts(false),
|
||||
HasVOP3PInsts(false),
|
||||
HasMovrel(false),
|
||||
HasVGPRIndexMode(false),
|
||||
HasScalarStores(false),
|
||||
|
|
|
@ -136,6 +136,7 @@ protected:
|
|||
bool SGPRInitBug;
|
||||
bool HasSMemRealTime;
|
||||
bool Has16BitInsts;
|
||||
bool HasVOP3PInsts;
|
||||
bool HasMovrel;
|
||||
bool HasVGPRIndexMode;
|
||||
bool HasScalarStores;
|
||||
|
@ -216,6 +217,10 @@ public:
|
|||
return Has16BitInsts;
|
||||
}
|
||||
|
||||
bool hasVOP3PInsts() const {
|
||||
return HasVOP3PInsts;
|
||||
}
|
||||
|
||||
bool hasHWFP64() const {
|
||||
return FP64;
|
||||
}
|
||||
|
|
|
@ -157,7 +157,11 @@ public:
|
|||
ImmTySendMsg,
|
||||
ImmTyInterpSlot,
|
||||
ImmTyInterpAttr,
|
||||
ImmTyAttrChan
|
||||
ImmTyAttrChan,
|
||||
ImmTyOpSel,
|
||||
ImmTyOpSelHi,
|
||||
ImmTyNegLo,
|
||||
ImmTyNegHi
|
||||
};
|
||||
|
||||
struct TokOp {
|
||||
|
@ -294,6 +298,10 @@ public:
|
|||
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
|
||||
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
|
||||
bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
|
||||
bool isOpSel() const { return isImmTy(ImmTyOpSel); }
|
||||
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
|
||||
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
|
||||
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
|
||||
|
||||
bool isMod() const {
|
||||
return isClampSI() || isOModSI();
|
||||
|
@ -313,6 +321,10 @@ public:
|
|||
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
|
||||
}
|
||||
|
||||
bool isSCSrcV2B16() const {
|
||||
return isSCSrcB16();
|
||||
}
|
||||
|
||||
bool isSCSrcB32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
|
||||
}
|
||||
|
@ -325,6 +337,10 @@ public:
|
|||
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
|
||||
}
|
||||
|
||||
bool isSCSrcV2F16() const {
|
||||
return isSCSrcF16();
|
||||
}
|
||||
|
||||
bool isSCSrcF32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
|
||||
}
|
||||
|
@ -341,6 +357,11 @@ public:
|
|||
return isSCSrcB16() || isLiteralImm(MVT::i16);
|
||||
}
|
||||
|
||||
bool isSSrcV2B16() const {
|
||||
llvm_unreachable("cannot happen");
|
||||
return isSSrcB16();
|
||||
}
|
||||
|
||||
bool isSSrcB64() const {
|
||||
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
|
||||
// See isVSrc64().
|
||||
|
@ -359,6 +380,11 @@ public:
|
|||
return isSCSrcB16() || isLiteralImm(MVT::f16);
|
||||
}
|
||||
|
||||
bool isSSrcV2F16() const {
|
||||
llvm_unreachable("cannot happen");
|
||||
return isSSrcF16();
|
||||
}
|
||||
|
||||
bool isVCSrcB32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
|
||||
}
|
||||
|
@ -371,6 +397,10 @@ public:
|
|||
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
|
||||
}
|
||||
|
||||
bool isVCSrcV2B16() const {
|
||||
return isVCSrcB16();
|
||||
}
|
||||
|
||||
bool isVCSrcF32() const {
|
||||
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
|
||||
}
|
||||
|
@ -383,6 +413,10 @@ public:
|
|||
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
|
||||
}
|
||||
|
||||
bool isVCSrcV2F16() const {
|
||||
return isVCSrcF16();
|
||||
}
|
||||
|
||||
bool isVSrcB32() const {
|
||||
return isVCSrcF32() || isLiteralImm(MVT::i32);
|
||||
}
|
||||
|
@ -395,6 +429,11 @@ public:
|
|||
return isVCSrcF16() || isLiteralImm(MVT::i16);
|
||||
}
|
||||
|
||||
bool isVSrcV2B16() const {
|
||||
llvm_unreachable("cannot happen");
|
||||
return isVSrcB16();
|
||||
}
|
||||
|
||||
bool isVSrcF32() const {
|
||||
return isVCSrcF32() || isLiteralImm(MVT::f32);
|
||||
}
|
||||
|
@ -407,6 +446,11 @@ public:
|
|||
return isVCSrcF16() || isLiteralImm(MVT::f16);
|
||||
}
|
||||
|
||||
bool isVSrcV2F16() const {
|
||||
llvm_unreachable("cannot happen");
|
||||
return isVSrcF16();
|
||||
}
|
||||
|
||||
bool isKImmFP32() const {
|
||||
return isLiteralImm(MVT::f32);
|
||||
}
|
||||
|
@ -607,6 +651,10 @@ public:
|
|||
case ImmTyInterpSlot: OS << "InterpSlot"; break;
|
||||
case ImmTyInterpAttr: OS << "InterpAttr"; break;
|
||||
case ImmTyAttrChan: OS << "AttrChan"; break;
|
||||
case ImmTyOpSel: OS << "OpSel"; break;
|
||||
case ImmTyOpSelHi: OS << "OpSelHi"; break;
|
||||
case ImmTyNegLo: OS << "NegLo"; break;
|
||||
case ImmTyNegHi: OS << "NegHi"; break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -783,6 +831,8 @@ public:
|
|||
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
|
||||
};
|
||||
|
||||
typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
|
||||
|
||||
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
|
||||
const MCInstrInfo &MII,
|
||||
const MCTargetOptions &Options)
|
||||
|
@ -881,10 +931,18 @@ public:
|
|||
//bool ProcessInstruction(MCInst &Inst);
|
||||
|
||||
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
|
||||
|
||||
OperandMatchResultTy
|
||||
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
|
||||
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
|
||||
bool (*ConvertResult)(int64_t &) = nullptr);
|
||||
|
||||
OperandMatchResultTy parseOperandArrayWithPrefix(
|
||||
const char *Prefix,
|
||||
OperandVector &Operands,
|
||||
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
|
||||
bool (*ConvertResult)(int64_t&) = nullptr);
|
||||
|
||||
OperandMatchResultTy
|
||||
parseNamedBit(const char *Name, OperandVector &Operands,
|
||||
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
|
||||
|
@ -951,7 +1009,12 @@ public:
|
|||
|
||||
void cvtId(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
void cvtVOP3Impl(MCInst &Inst,
|
||||
const OperandVector &Operands,
|
||||
OptionalImmIndexMap &OptionalIdx);
|
||||
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
|
||||
|
@ -999,6 +1062,30 @@ static const fltSemantics *getFltSemantics(MVT VT) {
|
|||
return getFltSemantics(VT.getSizeInBits() / 8);
|
||||
}
|
||||
|
||||
static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
|
||||
switch (OperandType) {
|
||||
case AMDGPU::OPERAND_REG_IMM_INT32:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
|
||||
return &APFloat::IEEEsingle();
|
||||
case AMDGPU::OPERAND_REG_IMM_INT64:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
|
||||
return &APFloat::IEEEdouble();
|
||||
case AMDGPU::OPERAND_REG_IMM_INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
return &APFloat::IEEEhalf();
|
||||
default:
|
||||
llvm_unreachable("unsupported fp type");
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Operand
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1044,7 +1131,7 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
|
|||
|
||||
if (type.getScalarSizeInBits() == 16) {
|
||||
return AMDGPU::isInlinableLiteral16(
|
||||
static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
|
||||
static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
|
||||
AsmParser->hasInv2PiInlineImm());
|
||||
}
|
||||
|
||||
|
@ -1136,13 +1223,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
|
|||
// Check that this operand accepts literals
|
||||
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
|
||||
|
||||
auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
|
||||
APInt Literal(64, Val);
|
||||
uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
|
||||
|
||||
if (Imm.IsFPImm) { // We got fp literal token
|
||||
APInt Literal(64, Val);
|
||||
|
||||
switch (OpSize) {
|
||||
case 8:
|
||||
switch (OpTy) {
|
||||
case AMDGPU::OPERAND_REG_IMM_INT64:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
|
||||
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
|
||||
AsmParser->hasInv2PiInlineImm())) {
|
||||
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
|
||||
|
@ -1166,17 +1255,32 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
|
|||
// unclear how we should encode them. This case should be checked earlier
|
||||
// in predicate methods (isLiteralImm())
|
||||
llvm_unreachable("fp literal in 64-bit integer instruction.");
|
||||
|
||||
case 4:
|
||||
case 2: {
|
||||
}
|
||||
case AMDGPU::OPERAND_REG_IMM_INT32:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
|
||||
case AMDGPU::OPERAND_REG_IMM_INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
|
||||
bool lost;
|
||||
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
|
||||
// Convert literal to single precision
|
||||
FPLiteral.convert(*getFltSemantics(OpSize),
|
||||
FPLiteral.convert(*getOpFltSemantics(OpTy),
|
||||
APFloat::rmNearestTiesToEven, &lost);
|
||||
// We allow precision lost but not overflow or underflow. This should be
|
||||
// checked earlier in isLiteralImm()
|
||||
Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
|
||||
|
||||
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
|
||||
if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
|
||||
OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
|
||||
ImmVal |= (ImmVal << 16);
|
||||
}
|
||||
|
||||
Inst.addOperand(MCOperand::createImm(ImmVal));
|
||||
return;
|
||||
}
|
||||
default:
|
||||
|
@ -1189,8 +1293,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
|
|||
// We got int literal token.
|
||||
// Only sign extend inline immediates.
|
||||
// FIXME: No errors on truncation
|
||||
switch (OpSize) {
|
||||
case 4:
|
||||
switch (OpTy) {
|
||||
case AMDGPU::OPERAND_REG_IMM_INT32:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
|
||||
if (isInt<32>(Val) &&
|
||||
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
|
||||
AsmParser->hasInv2PiInlineImm())) {
|
||||
|
@ -1200,18 +1307,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
|
|||
|
||||
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
|
||||
return;
|
||||
|
||||
case 8:
|
||||
if (AMDGPU::isInlinableLiteral64(Val,
|
||||
AsmParser->hasInv2PiInlineImm())) {
|
||||
}
|
||||
case AMDGPU::OPERAND_REG_IMM_INT64:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
|
||||
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
|
||||
Inst.addOperand(MCOperand::createImm(Val));
|
||||
return;
|
||||
}
|
||||
|
||||
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
|
||||
return;
|
||||
|
||||
case 2:
|
||||
}
|
||||
case AMDGPU::OPERAND_REG_IMM_INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
|
||||
if (isInt<16>(Val) &&
|
||||
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
|
||||
AsmParser->hasInv2PiInlineImm())) {
|
||||
|
@ -1221,7 +1333,18 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
|
|||
|
||||
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
|
||||
return;
|
||||
}
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
|
||||
auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
|
||||
assert(AMDGPU::isInlinableLiteral16(LiteralVal,
|
||||
AsmParser->hasInv2PiInlineImm()));
|
||||
|
||||
uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
|
||||
static_cast<uint32_t>(LiteralVal);
|
||||
Inst.addOperand(MCOperand::createImm(ImmVal));
|
||||
return;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("invalid operand size");
|
||||
}
|
||||
|
@ -2268,6 +2391,56 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
|
|||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
|
||||
const char *Prefix,
|
||||
OperandVector &Operands,
|
||||
AMDGPUOperand::ImmTy ImmTy,
|
||||
bool (*ConvertResult)(int64_t&)) {
|
||||
StringRef Name = Parser.getTok().getString();
|
||||
if (!Name.equals(Prefix))
|
||||
return MatchOperand_NoMatch;
|
||||
|
||||
Parser.Lex();
|
||||
if (getLexer().isNot(AsmToken::Colon))
|
||||
return MatchOperand_ParseFail;
|
||||
|
||||
Parser.Lex();
|
||||
if (getLexer().isNot(AsmToken::LBrac))
|
||||
return MatchOperand_ParseFail;
|
||||
Parser.Lex();
|
||||
|
||||
unsigned Val = 0;
|
||||
SMLoc S = Parser.getTok().getLoc();
|
||||
|
||||
// FIXME: How to verify the number of elements matches the number of src
|
||||
// operands?
|
||||
for (int I = 0; I < 3; ++I) {
|
||||
if (I != 0) {
|
||||
if (getLexer().is(AsmToken::RBrac))
|
||||
break;
|
||||
|
||||
if (getLexer().isNot(AsmToken::Comma))
|
||||
return MatchOperand_ParseFail;
|
||||
Parser.Lex();
|
||||
}
|
||||
|
||||
if (getLexer().isNot(AsmToken::Integer))
|
||||
return MatchOperand_ParseFail;
|
||||
|
||||
int64_t Op;
|
||||
if (getParser().parseAbsoluteExpression(Op))
|
||||
return MatchOperand_ParseFail;
|
||||
|
||||
if (Op != 0 && Op != 1)
|
||||
return MatchOperand_ParseFail;
|
||||
Val |= (Op << I);
|
||||
}
|
||||
|
||||
Parser.Lex();
|
||||
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
|
||||
AMDGPUOperand::ImmTy ImmTy) {
|
||||
|
@ -2300,12 +2473,11 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
|
|||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
|
||||
|
||||
static void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
|
||||
OptionalImmIndexMap& OptionalIdx,
|
||||
AMDGPUOperand::ImmTy ImmT,
|
||||
int64_t Default = 0) {
|
||||
static void addOptionalImmOperand(
|
||||
MCInst& Inst, const OperandVector& Operands,
|
||||
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
|
||||
AMDGPUOperand::ImmTy ImmT,
|
||||
int64_t Default = 0) {
|
||||
auto i = OptionalIdx.find(ImmT);
|
||||
if (i != OptionalIdx.end()) {
|
||||
unsigned Idx = i->second;
|
||||
|
@ -3214,6 +3386,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
|
|||
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
|
||||
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
|
||||
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
|
||||
{"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
|
||||
{"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
|
||||
{"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
|
||||
{"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
|
||||
};
|
||||
|
||||
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
|
||||
|
@ -3230,6 +3406,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan
|
|||
res = parseSDWASel(Operands, Op.Name, Op.Type);
|
||||
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
|
||||
res = parseSDWADstUnused(Operands);
|
||||
} else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
|
||||
Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
|
||||
Op.Type == AMDGPUOperand::ImmTyNegLo ||
|
||||
Op.Type == AMDGPUOperand::ImmTyNegHi) {
|
||||
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
|
||||
Op.ConvertResult);
|
||||
} else {
|
||||
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
|
||||
}
|
||||
|
@ -3285,8 +3467,8 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
|
|||
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
|
||||
OptionalImmIndexMap OptionalIdx;
|
||||
void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands,
|
||||
OptionalImmIndexMap &OptionalIdx) {
|
||||
unsigned I = 1;
|
||||
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
|
||||
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
|
||||
|
@ -3303,6 +3485,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
|
|||
llvm_unreachable("unhandled operand type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
|
||||
OptionalImmIndexMap OptionalIdx;
|
||||
|
||||
cvtVOP3Impl(Inst, Operands, OptionalIdx);
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
|
||||
|
@ -3327,6 +3515,74 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
|
|||
}
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
|
||||
OptionalImmIndexMap OptIdx;
|
||||
|
||||
cvtVOP3Impl(Inst, Operands, OptIdx);
|
||||
|
||||
// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
|
||||
// instruction, and then figure out where to actually put the modifiers
|
||||
int Opc = Inst.getOpcode();
|
||||
|
||||
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
|
||||
}
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
|
||||
|
||||
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
|
||||
if (NegLoIdx != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
|
||||
}
|
||||
|
||||
const int Ops[] = { AMDGPU::OpName::src0,
|
||||
AMDGPU::OpName::src1,
|
||||
AMDGPU::OpName::src2 };
|
||||
const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
|
||||
AMDGPU::OpName::src1_modifiers,
|
||||
AMDGPU::OpName::src2_modifiers };
|
||||
|
||||
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
|
||||
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
|
||||
|
||||
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
|
||||
unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
|
||||
unsigned NegLo = 0;
|
||||
unsigned NegHi = 0;
|
||||
|
||||
if (NegLoIdx != -1) {
|
||||
int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
|
||||
NegLo = Inst.getOperand(NegLoIdx).getImm();
|
||||
NegHi = Inst.getOperand(NegHiIdx).getImm();
|
||||
}
|
||||
|
||||
for (int J = 0; J < 3; ++J) {
|
||||
int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
|
||||
if (OpIdx == -1)
|
||||
break;
|
||||
|
||||
uint32_t ModVal = 0;
|
||||
|
||||
if ((OpSel & (1 << J)) != 0)
|
||||
ModVal |= SISrcMods::OP_SEL_0;
|
||||
|
||||
if ((OpSelHi & (1 << J)) != 0)
|
||||
ModVal |= SISrcMods::OP_SEL_1;
|
||||
|
||||
if ((NegLo & (1 << J)) != 0)
|
||||
ModVal |= SISrcMods::NEG;
|
||||
|
||||
if ((NegHi & (1 << J)) != 0)
|
||||
ModVal |= SISrcMods::NEG_HI;
|
||||
|
||||
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
|
||||
|
||||
Inst.getOperand(ModIdx).setImm(ModVal);
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// dpp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -97,6 +97,14 @@ static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
|
|||
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
|
||||
}
|
||||
|
||||
static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
|
||||
unsigned Imm,
|
||||
uint64_t Addr,
|
||||
const void *Decoder) {
|
||||
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
|
||||
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
|
||||
}
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
#undef GET_SUBTARGETINFO_ENUM
|
||||
|
@ -264,6 +272,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
|
|||
return decodeSrcOp(OPW16, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
|
||||
return decodeSrcOp(OPWV216, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
|
||||
// Some instructions have operand restrictions beyond what the encoding
|
||||
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
|
||||
|
@ -424,6 +436,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
|
|||
case OPW64:
|
||||
return MCOperand::createImm(getInlineImmVal64(Imm));
|
||||
case OPW16:
|
||||
case OPWV216:
|
||||
return MCOperand::createImm(getInlineImmVal16(Imm));
|
||||
default:
|
||||
llvm_unreachable("implement me");
|
||||
|
@ -437,6 +450,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
|
|||
default: // fall
|
||||
case OPW32:
|
||||
case OPW16:
|
||||
case OPWV216:
|
||||
return VGPR_32RegClassID;
|
||||
case OPW64: return VReg_64RegClassID;
|
||||
case OPW128: return VReg_128RegClassID;
|
||||
|
@ -450,6 +464,7 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
|
|||
default: // fall
|
||||
case OPW32:
|
||||
case OPW16:
|
||||
case OPWV216:
|
||||
return SGPR_32RegClassID;
|
||||
case OPW64: return SGPR_64RegClassID;
|
||||
case OPW128: return SGPR_128RegClassID;
|
||||
|
@ -463,6 +478,7 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
|
|||
default: // fall
|
||||
case OPW32:
|
||||
case OPW16:
|
||||
case OPWV216:
|
||||
return TTMP_32RegClassID;
|
||||
case OPW64: return TTMP_64RegClassID;
|
||||
case OPW128: return TTMP_128RegClassID;
|
||||
|
@ -498,6 +514,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
|
|||
switch (Width) {
|
||||
case OPW32:
|
||||
case OPW16:
|
||||
case OPWV216:
|
||||
return decodeSpecialReg32(Val);
|
||||
case OPW64:
|
||||
return decodeSpecialReg64(Val);
|
||||
|
|
|
@ -67,6 +67,7 @@ public:
|
|||
MCOperand decodeOperand_VS_32(unsigned Val) const;
|
||||
MCOperand decodeOperand_VS_64(unsigned Val) const;
|
||||
MCOperand decodeOperand_VSrc16(unsigned Val) const;
|
||||
MCOperand decodeOperand_VSrcV216(unsigned Val) const;
|
||||
|
||||
MCOperand decodeOperand_VReg_64(unsigned Val) const;
|
||||
MCOperand decodeOperand_VReg_96(unsigned Val) const;
|
||||
|
@ -85,6 +86,7 @@ public:
|
|||
OPW64,
|
||||
OPW128,
|
||||
OPW16,
|
||||
OPWV216,
|
||||
OPW_LAST_,
|
||||
OPW_FIRST_ = OPW32
|
||||
};
|
||||
|
|
|
@ -375,6 +375,14 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
|
|||
O << formatHex(static_cast<uint64_t>(Imm));
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
uint16_t Lo16 = static_cast<uint16_t>(Imm);
|
||||
assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
|
||||
printImmediate16(Lo16, STI, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
|
@ -489,6 +497,10 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
|||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
printImmediate16(Op.getImm(), STI, O);
|
||||
break;
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
printImmediateV216(Op.getImm(), STI, O);
|
||||
break;
|
||||
case MCOI::OPERAND_UNKNOWN:
|
||||
case MCOI::OPERAND_PCREL:
|
||||
O << formatDec(Op.getImm());
|
||||
|
@ -738,6 +750,71 @@ void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
|
|||
}
|
||||
}
|
||||
|
||||
static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
|
||||
int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
|
||||
|
||||
for (int I = 0; I < NumOps; ++I) {
|
||||
if (!!(Ops[I] & Mod) != DefaultValue)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
|
||||
raw_ostream &O) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
int NumOps = 0;
|
||||
int Ops[3];
|
||||
|
||||
for (int OpName : { AMDGPU::OpName::src0_modifiers,
|
||||
AMDGPU::OpName::src1_modifiers,
|
||||
AMDGPU::OpName::src2_modifiers }) {
|
||||
int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
|
||||
if (Idx == -1)
|
||||
break;
|
||||
|
||||
Ops[NumOps++] = MI->getOperand(Idx).getImm();
|
||||
}
|
||||
|
||||
if (allOpsDefaultValue(Ops, NumOps, Mod))
|
||||
return;
|
||||
|
||||
O << Name;
|
||||
for (int I = 0; I < NumOps; ++I) {
|
||||
if (I != 0)
|
||||
O << ',';
|
||||
|
||||
O << !!(Ops[I] & Mod);
|
||||
}
|
||||
|
||||
O << ']';
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
|
|
|
@ -90,6 +90,8 @@ private:
|
|||
raw_ostream &O);
|
||||
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
|
||||
|
@ -117,6 +119,14 @@ private:
|
|||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printOpSel(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printOpSelHi(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printNegLo(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printNegHi(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printInterpSlot(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printInterpAttr(const MCInst *MI, unsigned OpNo,
|
||||
|
|
|
@ -220,15 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
|
|||
Imm = MO.getImm();
|
||||
}
|
||||
|
||||
switch (AMDGPU::getOperandSize(OpInfo)) {
|
||||
case 4:
|
||||
switch (OpInfo.OperandType) {
|
||||
case AMDGPU::OPERAND_REG_IMM_INT32:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
|
||||
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
|
||||
case 8:
|
||||
|
||||
case AMDGPU::OPERAND_REG_IMM_INT64:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
|
||||
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
|
||||
case 2:
|
||||
|
||||
case AMDGPU::OPERAND_REG_IMM_INT16:
|
||||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
// FIXME Is this correct? What do inline immediates do on SI for f16 src
|
||||
// which does not have f16 support?
|
||||
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
|
||||
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
|
||||
uint16_t Lo16 = static_cast<uint16_t>(Imm);
|
||||
assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
|
||||
uint32_t Encoding = getLit16Encoding(Lo16, STI);
|
||||
assert(Encoding != 255 && "packed constants can only be inline immediates");
|
||||
return Encoding;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("invalid operand size");
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ enum : uint64_t {
|
|||
|
||||
// TODO: Should this be spilt into VOP3 a and b?
|
||||
VOP3 = 1 << 10,
|
||||
VOP3P = 1 << 12,
|
||||
|
||||
VINTRP = 1 << 13,
|
||||
SDWA = 1 << 14,
|
||||
|
@ -102,12 +103,14 @@ namespace AMDGPU {
|
|||
OPERAND_REG_INLINE_C_FP16,
|
||||
OPERAND_REG_INLINE_C_FP32,
|
||||
OPERAND_REG_INLINE_C_FP64,
|
||||
OPERAND_REG_INLINE_C_V2FP16,
|
||||
OPERAND_REG_INLINE_C_V2INT16,
|
||||
|
||||
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
|
||||
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
|
||||
|
||||
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
|
||||
OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
|
||||
OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
|
||||
|
||||
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
|
||||
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
|
||||
|
@ -125,9 +128,12 @@ namespace AMDGPU {
|
|||
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
|
||||
namespace SISrcMods {
|
||||
enum {
|
||||
NEG = 1 << 0, // Floating-point negate modifier
|
||||
ABS = 1 << 1, // Floating-point absolute modifier
|
||||
SEXT = 1 << 0 // Integer sign-extend modifier
|
||||
NEG = 1 << 0, // Floating-point negate modifier
|
||||
ABS = 1 << 1, // Floating-point absolute modifier
|
||||
SEXT = 1 << 0, // Integer sign-extend modifier
|
||||
NEG_HI = ABS, // Floating-point negate high packed component modifier.
|
||||
OP_SEL_0 = 1 << 2,
|
||||
OP_SEL_1 = 1 << 3
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm = "",
|
|||
field bit VOP2 = 0;
|
||||
field bit VOPC = 0;
|
||||
field bit VOP3 = 0;
|
||||
field bit VOP3P = 0;
|
||||
field bit VINTRP = 0;
|
||||
field bit SDWA = 0;
|
||||
field bit DPP = 0;
|
||||
|
@ -96,6 +97,7 @@ class InstSI <dag outs, dag ins, string asm = "",
|
|||
let TSFlags{8} = VOP2;
|
||||
let TSFlags{9} = VOPC;
|
||||
let TSFlags{10} = VOP3;
|
||||
let TSFlags{12} = VOP3P;
|
||||
|
||||
let TSFlags{13} = VINTRP;
|
||||
let TSFlags{14} = SDWA;
|
||||
|
|
|
@ -440,6 +440,14 @@ public:
|
|||
return get(Opcode).TSFlags & SIInstrFlags::DPP;
|
||||
}
|
||||
|
||||
static bool isVOP3P(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
|
||||
}
|
||||
|
||||
bool isVOP3P(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
|
||||
}
|
||||
|
||||
static bool isScalarUnit(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
|
||||
}
|
||||
|
|
|
@ -458,6 +458,12 @@ class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
|
|||
let ParserMatchClass = MatchClass;
|
||||
}
|
||||
|
||||
class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
|
||||
OperandWithDefaultOps<i32, (ops (i32 0))> {
|
||||
let PrintMethod = "print"#Name;
|
||||
let ParserMatchClass = MatchClass;
|
||||
}
|
||||
|
||||
let OperandType = "OPERAND_IMMEDIATE" in {
|
||||
|
||||
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
|
||||
|
@ -495,6 +501,11 @@ def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
|
|||
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
|
||||
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
|
||||
|
||||
def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
|
||||
def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
|
||||
def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
|
||||
def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
|
||||
|
||||
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
|
||||
|
||||
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
|
||||
|
@ -534,6 +545,7 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
|
|||
let ParserMethod = "parseRegOrImmWithFPInputMods";
|
||||
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
|
||||
}
|
||||
|
||||
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
|
||||
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
|
||||
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
|
||||
|
@ -586,6 +598,33 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
|
|||
let PrintMethod = "printOperandAndIntInputMods";
|
||||
}
|
||||
|
||||
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
|
||||
let Name = "PackedFP"#opSize#"InputMods";
|
||||
let ParserMethod = "parseRegOrImm";
|
||||
let PredicateMethod = "isRegOrImm";
|
||||
// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
|
||||
}
|
||||
|
||||
class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
|
||||
let Name = "PackedInt"#opSize#"InputMods";
|
||||
let ParserMethod = "parseRegOrImm";
|
||||
let PredicateMethod = "isRegOrImm";
|
||||
// let PredicateMethod = "isPackedInt"#opSize#"InputMods";
|
||||
}
|
||||
|
||||
def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
|
||||
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
|
||||
|
||||
class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
|
||||
// let PrintMethod = "printPackedFPInputMods";
|
||||
}
|
||||
|
||||
class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
|
||||
//let PrintMethod = "printPackedIntInputMods";
|
||||
}
|
||||
|
||||
def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
|
||||
def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Complex patterns
|
||||
|
@ -602,10 +641,13 @@ def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
|
|||
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
|
||||
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
||||
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
|
||||
|
||||
// VOP3Mods, but the input source is known to never be NaN.
|
||||
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
|
||||
|
||||
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
|
||||
def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI assembler operands
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -729,12 +771,34 @@ class getVALUDstForVT<ValueType VT> {
|
|||
// instructions for the given VT.
|
||||
class getVOPSrc0ForVT<ValueType VT> {
|
||||
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
|
||||
!if(!eq(VT.Value, v2f16.Value), 1,
|
||||
!if(!eq(VT.Value, f32.Value), 1,
|
||||
!if(!eq(VT.Value, f64.Value), 1,
|
||||
0)));
|
||||
RegisterOperand ret = !if(isFP,
|
||||
!if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
|
||||
!if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
|
||||
0))));
|
||||
|
||||
RegisterOperand ret =
|
||||
!if(isFP,
|
||||
!if(!eq(VT.Size, 64),
|
||||
VSrc_f64,
|
||||
!if(!eq(VT.Value, f16.Value),
|
||||
VSrc_f16,
|
||||
!if(!eq(VT.Value, v2f16.Value),
|
||||
VCSrc_v2f16,
|
||||
VSrc_f32
|
||||
)
|
||||
)
|
||||
),
|
||||
!if(!eq(VT.Size, 64),
|
||||
VSrc_b64,
|
||||
!if(!eq(VT.Value, i16.Value),
|
||||
VSrc_b16,
|
||||
!if(!eq(VT.Value, v2i16.Value),
|
||||
VCSrc_v2b16,
|
||||
VSrc_b32
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Returns the vreg register class to use for source operand given VT
|
||||
|
@ -748,25 +812,38 @@ class getVregSrcForVT<ValueType VT> {
|
|||
// given VT.
|
||||
class getVOP3SrcForVT<ValueType VT> {
|
||||
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
|
||||
!if(!eq(VT.Value, v2f16.Value), 1,
|
||||
!if(!eq(VT.Value, f32.Value), 1,
|
||||
!if(!eq(VT.Value, f64.Value), 1,
|
||||
0)));
|
||||
0))));
|
||||
RegisterOperand ret =
|
||||
!if(!eq(VT.Size, 128),
|
||||
VSrc_128,
|
||||
!if(!eq(VT.Size, 64),
|
||||
VSrc_128,
|
||||
!if(!eq(VT.Size, 64),
|
||||
!if(isFP,
|
||||
VCSrc_f64,
|
||||
VCSrc_b64),
|
||||
VCSrc_f64,
|
||||
VCSrc_b64),
|
||||
!if(!eq(VT.Value, i1.Value),
|
||||
SCSrc_b64,
|
||||
!if(isFP,
|
||||
!if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
|
||||
!if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
SCSrc_b64,
|
||||
!if(isFP,
|
||||
!if(!eq(VT.Value, f16.Value),
|
||||
VCSrc_f16,
|
||||
!if(!eq(VT.Value, v2f16.Value),
|
||||
VCSrc_v2f16,
|
||||
VCSrc_f32
|
||||
)
|
||||
),
|
||||
!if(!eq(VT.Value, i16.Value),
|
||||
VCSrc_b16,
|
||||
!if(!eq(VT.Value, v2i16.Value),
|
||||
VCSrc_v2b16,
|
||||
VCSrc_b32
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Returns 1 if the source arguments have modifiers, 0 if they do not.
|
||||
|
@ -776,7 +853,8 @@ class isFloatType<ValueType SrcVT> {
|
|||
!if(!eq(SrcVT.Value, f16.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f32.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f64.Value), 1,
|
||||
0)));
|
||||
!if(!eq(SrcVT.Value, v2f16.Value), 1,
|
||||
0))));
|
||||
}
|
||||
|
||||
class isIntType<ValueType SrcVT> {
|
||||
|
@ -787,6 +865,23 @@ class isIntType<ValueType SrcVT> {
|
|||
0)));
|
||||
}
|
||||
|
||||
class isPackedType<ValueType SrcVT> {
|
||||
bit ret =
|
||||
!if(!eq(SrcVT.Value, v2i16.Value), 1,
|
||||
!if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
|
||||
);
|
||||
}
|
||||
|
||||
// Float or packed int
|
||||
class isModifierType<ValueType SrcVT> {
|
||||
bit ret =
|
||||
!if(!eq(SrcVT.Value, f16.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f32.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f64.Value), 1,
|
||||
!if(!eq(SrcVT.Value, v2f16.Value), 1,
|
||||
!if(!eq(SrcVT.Value, v2i16.Value), 1,
|
||||
0)))));
|
||||
}
|
||||
|
||||
// Return type of input modifiers operand for specified input operand
|
||||
class getSrcMod <ValueType VT> {
|
||||
|
@ -794,6 +889,7 @@ class getSrcMod <ValueType VT> {
|
|||
!if(!eq(VT.Value, f32.Value), 1,
|
||||
!if(!eq(VT.Value, f64.Value), 1,
|
||||
0)));
|
||||
bit isPacked = isPackedType<VT>.ret;
|
||||
Operand ret = !if(!eq(VT.Size, 64),
|
||||
!if(isFP, FP64InputMods, Int64InputMods),
|
||||
!if(isFP,
|
||||
|
@ -824,8 +920,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
|
|||
// Returns the input arguments for VOP3 instructions for the given SrcVT.
|
||||
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
||||
RegisterOperand Src2RC, int NumSrcArgs,
|
||||
bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
|
||||
Operand Src2Mod> {
|
||||
bit HasModifiers, bit HasOMod,
|
||||
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
|
||||
|
||||
dag ret =
|
||||
!if (!eq(NumSrcArgs, 0),
|
||||
|
@ -844,9 +940,13 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
|||
!if (!eq(NumSrcArgs, 2),
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP 2 with modifiers
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, omod:$omod)
|
||||
!if( !eq(HasOMod, 1),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, omod:$omod),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp))
|
||||
/* else */,
|
||||
// VOP2 without modifiers
|
||||
(ins Src0RC:$src0, Src1RC:$src1)
|
||||
|
@ -854,16 +954,57 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
|||
/* NumSrcArgs == 3 */,
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP3 with modifiers
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
clampmod:$clamp, omod:$omod)
|
||||
!if (!eq(HasOMod, 1),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
clampmod:$clamp, omod:$omod),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
clampmod:$clamp))
|
||||
/* else */,
|
||||
// VOP3 without modifiers
|
||||
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
|
||||
/* endif */ ))));
|
||||
}
|
||||
|
||||
/// XXX - src1 may only allow VGPRs?
|
||||
|
||||
// The modifiers (except clamp) are dummy operands for the benefit of
|
||||
// printing and parsing. They defer their values to looking at the
|
||||
// srcN_modifiers for what to print.
|
||||
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
||||
RegisterOperand Src2RC, int NumSrcArgs,
|
||||
bit HasClamp,
|
||||
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
|
||||
dag ret = !if (!eq(NumSrcArgs, 2),
|
||||
!if (HasClamp,
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp,
|
||||
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
||||
neg_lo:$neg_lo, neg_hi:$neg_hi),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
||||
neg_lo:$neg_lo, neg_hi:$neg_hi)),
|
||||
// else NumSrcArgs == 3
|
||||
!if (HasClamp,
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
clampmod:$clamp,
|
||||
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
||||
neg_lo:$neg_lo, neg_hi:$neg_hi),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
||||
neg_lo:$neg_lo, neg_hi:$neg_hi))
|
||||
);
|
||||
}
|
||||
|
||||
class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
||||
bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
|
||||
|
||||
|
@ -947,7 +1088,8 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
|
|||
|
||||
// Returns the assembly string for the inputs and outputs of a VOP3
|
||||
// instruction.
|
||||
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
||||
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers,
|
||||
bit HasOMod, ValueType DstVT = i32> {
|
||||
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
|
||||
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
||||
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
||||
|
@ -957,7 +1099,26 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
|
|||
string ret =
|
||||
!if(!eq(HasModifiers, 0),
|
||||
getAsm32<HasDst, NumSrcArgs, DstVT>.ret,
|
||||
dst#", "#src0#src1#src2#"$clamp"#"$omod");
|
||||
dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
|
||||
}
|
||||
|
||||
// Returns the assembly string for the inputs and outputs of a VOP3P
|
||||
// instruction.
|
||||
class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
|
||||
bit HasClamp, ValueType DstVT = i32> {
|
||||
string dst = " $vdst";
|
||||
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
|
||||
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
||||
!if(!eq(NumSrcArgs, 2), " $src1",
|
||||
" $src1,"));
|
||||
string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
|
||||
|
||||
string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
|
||||
string clamp = !if(HasClamp, "$clamp", "");
|
||||
|
||||
// Each modifier is printed as an array of bits for each operand, so
|
||||
// all operands are printed as part of src0_modifiers.
|
||||
string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
|
||||
}
|
||||
|
||||
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
||||
|
@ -1069,7 +1230,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
|||
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
|
||||
|
||||
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
|
||||
field bit HasModifiers = isFloatType<Src0VT>.ret;
|
||||
field bit HasModifiers = isModifierType<Src0VT>.ret;
|
||||
|
||||
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
|
||||
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
|
||||
|
@ -1083,13 +1244,20 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
|||
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
|
||||
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
|
||||
|
||||
field bit HasOMod = HasModifiers;
|
||||
field bit HasClamp = HasModifiers;
|
||||
field bit HasSDWAClamp = HasSrc0;
|
||||
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
|
||||
|
||||
field bit IsPacked = isPackedType<Src0VT>.ret;
|
||||
field bit HasOpSel = IsPacked;
|
||||
field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
|
||||
|
||||
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
|
||||
|
||||
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
|
||||
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
|
||||
field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
|
||||
|
||||
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
|
||||
|
||||
// VOP3b instructions are a special case with a second explicit
|
||||
|
@ -1101,7 +1269,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
|||
|
||||
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
|
||||
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
|
||||
HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
|
||||
HasModifiers, HasOMod, Src0Mod, Src1Mod,
|
||||
Src2Mod>.ret;
|
||||
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
|
||||
NumSrcArgs, HasClamp,
|
||||
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
|
||||
|
||||
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
|
||||
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
|
||||
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
|
||||
|
@ -1109,7 +1282,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
|||
DstVT>.ret;
|
||||
|
||||
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
|
||||
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
|
||||
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
|
||||
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
}
|
||||
|
@ -1130,6 +1304,13 @@ def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
|
|||
def VOP_I16_I16_I16_I16 : VOPProfile <[i32, i32, i32, i32, untyped]>;
|
||||
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
|
||||
|
||||
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
|
||||
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
|
||||
def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
|
||||
|
||||
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
|
||||
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
|
||||
|
||||
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
|
||||
|
||||
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
|
||||
|
|
|
@ -616,6 +616,12 @@ def : BitConvert <i32, f32, VGPR_32>;
|
|||
def : BitConvert <f32, i32, VGPR_32>;
|
||||
def : BitConvert <i32, f32, SReg_32>;
|
||||
def : BitConvert <f32, i32, SReg_32>;
|
||||
def : BitConvert <v2i16, i32, SReg_32>;
|
||||
def : BitConvert <i32, v2i16, SReg_32>;
|
||||
def : BitConvert <v2f16, i32, SReg_32>;
|
||||
def : BitConvert <i32, v2f16, SReg_32>;
|
||||
def : BitConvert <v2i16, v2f16, SReg_32>;
|
||||
def : BitConvert <v2f16, v2i16, SReg_32>;
|
||||
|
||||
// 64-bit bitcast
|
||||
def : BitConvert <i64, f64, VReg_64>;
|
||||
|
|
|
@ -133,7 +133,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
|
|||
// TODO: Do we need to set DwarfRegAlias on register tuples?
|
||||
|
||||
// SGPR 32-bit registers
|
||||
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add (sequence "SGPR%u", 0, 103))> {
|
||||
// Give all SGPR classes higher priority than VGPR classes, because
|
||||
// we want to spill SGPRs to VGPRs.
|
||||
|
@ -184,7 +184,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
|||
(add (decimate (shl SGPR_32, 15), 4))]>;
|
||||
|
||||
// Trap handler TMP 32-bit registers
|
||||
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
|
||||
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
|
||||
(add (sequence "TTMP%u", 0, 11))> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
@ -202,7 +202,8 @@ def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
|
|||
(add (decimate (shl TTMP_32, 3), 4))]>;
|
||||
|
||||
// VGPR 32-bit registers
|
||||
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
// i16/f16 only on VI+
|
||||
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add (sequence "VGPR%u", 0, 255))> {
|
||||
let AllocationPriority = 1;
|
||||
let Size = 32;
|
||||
|
@ -263,7 +264,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
|||
|
||||
// Subset of SReg_32 without M0 for SMRD instructions and alike.
|
||||
// See comments in SIInstructions.td for more info.
|
||||
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
|
||||
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
|
||||
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
|
||||
|
@ -276,7 +277,7 @@ def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
|||
}
|
||||
|
||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
|
||||
let AllocationPriority = 7;
|
||||
}
|
||||
|
@ -372,7 +373,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
|
|||
let Size = 32;
|
||||
}
|
||||
|
||||
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add VGPR_32, SReg_32)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
@ -423,6 +424,18 @@ multiclass SIRegOperand <string rc, string MatchName, string opType> {
|
|||
let OperandType = opType#"_FP64";
|
||||
let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
|
||||
}
|
||||
|
||||
def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
|
||||
let OperandType = opType#"_V2INT16";
|
||||
let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
|
||||
let DecoderMethod = "decodeOperand_VSrcV216";
|
||||
}
|
||||
|
||||
def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
|
||||
let OperandType = opType#"_V2FP16";
|
||||
let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
|
||||
let DecoderMethod = "decodeOperand_VSrcV216";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -438,6 +438,11 @@ let Defs = [SCC] in {
|
|||
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
|
||||
} // End Defs = [SCC]
|
||||
|
||||
let SubtargetPredicate = isGFX9 in {
|
||||
def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
|
||||
def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
|
||||
def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOPK Instructions
|
||||
|
@ -1207,6 +1212,9 @@ def S_BFE_U64_vi : SOP2_Real_vi <0x27, S_BFE_U64>;
|
|||
def S_BFE_I64_vi : SOP2_Real_vi <0x28, S_BFE_I64>;
|
||||
def S_CBRANCH_G_FORK_vi : SOP2_Real_vi <0x29, S_CBRANCH_G_FORK>;
|
||||
def S_ABSDIFF_I32_vi : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
|
||||
def S_PACK_LL_B32_B16_vi : SOP2_Real_vi <0x32, S_PACK_LL_B32_B16>;
|
||||
def S_PACK_LH_B32_B16_vi : SOP2_Real_vi <0x33, S_PACK_LH_B32_B16>;
|
||||
def S_PACK_HH_B32_B16_vi : SOP2_Real_vi <0x34, S_PACK_HH_B32_B16>;
|
||||
|
||||
def S_MOVK_I32_vi : SOPK_Real_vi <0x00, S_MOVK_I32>;
|
||||
def S_CMOVK_I32_vi : SOPK_Real_vi <0x01, S_CMOVK_I32>;
|
||||
|
|
|
@ -564,6 +564,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
|
|||
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -682,6 +683,14 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
|
|||
Val == 0x3118; // 1/2pi
|
||||
}
|
||||
|
||||
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
|
||||
assert(HasInv2Pi);
|
||||
|
||||
int16_t Lo16 = static_cast<int16_t>(Literal);
|
||||
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
|
||||
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
|
||||
}
|
||||
|
||||
bool isUniformMMO(const MachineMemOperand *MMO) {
|
||||
const Value *Ptr = MMO->getValue();
|
||||
// UndefValue means this is a load of a kernel input. These are uniform.
|
||||
|
|
|
@ -301,6 +301,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
|
|||
case AMDGPU::OPERAND_REG_IMM_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
||||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
||||
return 2;
|
||||
|
||||
default:
|
||||
|
@ -323,6 +325,9 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
|
|||
LLVM_READNONE
|
||||
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
|
||||
|
||||
LLVM_READNONE
|
||||
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
|
||||
|
||||
bool isUniformMMO(const MachineMemOperand *MMO);
|
||||
|
||||
/// \returns The encoding that will be used for \p ByteOffset in the SMRD
|
||||
|
|
|
@ -237,7 +237,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
|
|||
src0_sel:$src0_sel);
|
||||
|
||||
let Asm32 = getAsm32<1, 1>.ret;
|
||||
let Asm64 = getAsm64<1, 1, 0>.ret;
|
||||
let Asm64 = getAsm64<1, 1, 0, 1>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
|
||||
|
||||
|
|
|
@ -182,7 +182,7 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
|
|||
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
|
||||
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
|
||||
HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
|
||||
HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
|
||||
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
|
||||
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
|
||||
VGPR_32:$src2, // stub argument
|
||||
|
@ -194,6 +194,7 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
|||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
let Asm32 = getAsm32<1, 2, vt>.ret;
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
|
||||
let HasSrc2 = 0;
|
||||
|
@ -204,13 +205,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
|||
def VOP_MAC_F16 : VOP_MAC <f16> {
|
||||
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
|
||||
// 'not a string initializer' error.
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret;
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret;
|
||||
}
|
||||
|
||||
def VOP_MAC_F32 : VOP_MAC <f32> {
|
||||
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
|
||||
// 'not a string initializer' error.
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret;
|
||||
}
|
||||
|
||||
// Write out to vcc or arbitrary SGPR.
|
||||
|
|
|
@ -29,6 +29,26 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
|
|||
ret1));
|
||||
}
|
||||
|
||||
class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
ret1));
|
||||
}
|
||||
|
||||
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
|
||||
|
@ -263,6 +283,10 @@ defm: Tenary_i16_Pats<mul, add, V_MAD_I16, sext>;
|
|||
|
||||
} // End Predicates = [isVI]
|
||||
|
||||
let SubtargetPredicate = isGFX9 in {
|
||||
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target
|
||||
|
@ -449,3 +473,5 @@ defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
|
|||
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
|
||||
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
|
||||
defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>;
|
||||
|
||||
defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP3P Classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
|
||||
VOP3P_Pseudo<OpName, P,
|
||||
!if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
|
||||
>;
|
||||
|
||||
// Non-packed instructions that use the VOP3P encoding. i.e. where
|
||||
// omod/abs are used.
|
||||
class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
|
||||
VOP3P_Pseudo<OpName, P,
|
||||
!if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
|
||||
>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
|
||||
def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
|
||||
def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
|
||||
def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
|
||||
def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
|
||||
|
||||
def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
|
||||
def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
}
|
||||
|
||||
def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
|
||||
|
||||
// XXX - Commutable?
|
||||
def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
|
||||
def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
|
||||
def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
|
||||
|
||||
|
||||
multiclass VOP3P_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3Pe <op, !cast<VOP3P_Pseudo>(NAME).Pfl> {
|
||||
let AssemblerPredicates = [HasVOP3PInsts];
|
||||
let DecoderNamespace = "VI";
|
||||
}
|
||||
}
|
||||
|
||||
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
|
||||
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
|
||||
defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
|
||||
defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
|
||||
defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
|
||||
defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
|
||||
defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
|
||||
defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
|
||||
|
||||
defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
|
||||
defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
|
||||
defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
|
||||
defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
|
||||
defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
|
||||
defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
|
||||
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
|
||||
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
|
||||
|
||||
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
|
||||
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
|
||||
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
|
|
@ -68,8 +68,9 @@ class VOP3Common <dag outs, dag ins, string asm = "",
|
|||
let hasPostISelHook = 1;
|
||||
}
|
||||
|
||||
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
|
||||
InstSI <P.Outs64, P.Ins64, "", pattern>,
|
||||
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
|
||||
bit VOP3Only = 0, bit isVOP3P = 0> :
|
||||
InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#"_e64", opName> {
|
||||
|
@ -79,7 +80,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
|
|||
let UseNamedOperandTable = 1;
|
||||
|
||||
string Mnemonic = opName;
|
||||
string AsmOperands = P.Asm64;
|
||||
string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
|
||||
|
||||
let Size = 8;
|
||||
let mayLoad = 0;
|
||||
|
@ -106,18 +107,24 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
|
|||
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
||||
let AsmMatchConverter =
|
||||
!if(!eq(VOP3Only,1),
|
||||
"cvtVOP3",
|
||||
!if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
|
||||
!if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
|
||||
|
||||
VOPProfile Pfl = P;
|
||||
}
|
||||
|
||||
class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
|
||||
VOP3_Pseudo<opName, P, pattern, 1, 1> {
|
||||
let VOP3P = 1;
|
||||
}
|
||||
|
||||
class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
|
||||
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
||||
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
|
||||
|
||||
let isPseudo = 0;
|
||||
let isCodeGenOnly = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
|
||||
let Constraints = ps.Constraints;
|
||||
let DisableEncoding = ps.DisableEncoding;
|
||||
|
@ -131,6 +138,11 @@ class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
|
|||
let TSFlags = ps.TSFlags;
|
||||
}
|
||||
|
||||
// XXX - Is there any reason to distingusih this from regular VOP3
|
||||
// here?
|
||||
class VOP3P_Real<VOP3P_Pseudo ps, int EncodingFamily> :
|
||||
VOP3_Real<ps, EncodingFamily>;
|
||||
|
||||
class VOP3a<VOPProfile P> : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
|
@ -198,6 +210,42 @@ class VOP3be <VOPProfile P> : Enc64 {
|
|||
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
|
||||
}
|
||||
|
||||
class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
|
||||
bits<8> vdst;
|
||||
// neg, neg_hi, op_sel put in srcN_modifiers
|
||||
bits<4> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<4> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<4> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<1> clamp;
|
||||
|
||||
let Inst{7-0} = vdst;
|
||||
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
|
||||
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
|
||||
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
|
||||
|
||||
let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
|
||||
let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
|
||||
let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
|
||||
|
||||
let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2)
|
||||
|
||||
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
|
||||
|
||||
let Inst{25-16} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
|
||||
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
|
||||
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
|
||||
let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0)
|
||||
let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1)
|
||||
let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
|
||||
let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
|
||||
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
|
||||
}
|
||||
|
||||
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
|
||||
let Inst{25-17} = op;
|
||||
}
|
||||
|
@ -349,3 +397,4 @@ include "VOPCInstructions.td"
|
|||
include "VOP1Instructions.td"
|
||||
include "VOP2Instructions.td"
|
||||
include "VOP3Instructions.td"
|
||||
include "VOP3PInstructions.td"
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
v_pk_add_f16 v1, -17, v2
|
||||
// GFX9: :19: error: invalid operand for instruction
|
||||
|
||||
v_pk_add_f16 v1, 65, v2
|
||||
// GFX9: :18: error: invalid operand for instruction
|
||||
|
||||
v_pk_add_f16 v1, 64.0, v2
|
||||
// GFX9: :18: error: invalid operand for instruction
|
||||
|
||||
v_pk_add_f16 v1, -0.15915494, v2
|
||||
// GFX9: :19: error: invalid operand for instruction
|
||||
|
||||
v_pk_add_f16 v1, -0.0, v2
|
||||
// GFX9: :19: error: invalid operand for instruction
|
||||
|
||||
v_pk_add_f16 v1, -32768, v2
|
||||
// GFX9: :19: error: invalid operand for instruction
|
||||
|
||||
v_pk_add_f16 v1, 32767, v2
|
||||
// GFX9: :18: error: invalid operand for instruction
|
|
@ -0,0 +1,112 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
v_pk_add_f16 v1, 0, v2
|
||||
// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, v2, 0
|
||||
// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
|
||||
|
||||
v_pk_add_f16 v1, v2, 0.0
|
||||
// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 1.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -1.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -0.5, v2
|
||||
// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0.5, v2
|
||||
// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 2.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -2.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 4.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -4.0, v2
|
||||
// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0.15915494, v2
|
||||
// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -1, v2
|
||||
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -2, v2
|
||||
// GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -3, v2
|
||||
// GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, -16, v2
|
||||
// GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 1, v2
|
||||
// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 2, v2
|
||||
// GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 3, v2
|
||||
// GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 4, v2
|
||||
// GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 15, v2
|
||||
// GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 16, v2
|
||||
// GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 63, v2
|
||||
// GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 64, v2
|
||||
// GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0x0001, v2
|
||||
// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0xffff, v2
|
||||
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0x3c00, v2
|
||||
// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0xbc00, v2
|
||||
// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0x3800, v2
|
||||
// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0xb800, v2
|
||||
// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0x4000, v2
|
||||
// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0xc000, v2
|
||||
// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0x4400, v2
|
||||
// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0xc400, v2
|
||||
// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 0x3118, v2
|
||||
// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
|
||||
|
||||
v_pk_add_f16 v1, 65535, v2
|
||||
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
|
|
@ -0,0 +1,113 @@
|
|||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
// GFX9: 31: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel
|
||||
|
||||
// GFX9: 32: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:
|
||||
|
||||
// GFX9: 33: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[
|
||||
|
||||
// GFX9: 33: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[]
|
||||
|
||||
// GFX9: 34: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[,]
|
||||
|
||||
// XXGFX9: 34: error: failed parsing operand.
|
||||
// v_pk_add_u16 v1, v2, v3 op_sel:[0]
|
||||
|
||||
// GFX9: 35: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,]
|
||||
|
||||
// XXGFX9: 36: error: failed parsing operand.
|
||||
// v_pk_add_u16 v1, v2, v3 op_sel:[,0]
|
||||
|
||||
// GFX9: 36: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,2]
|
||||
|
||||
// GFX9: 35: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[2,0]
|
||||
|
||||
// GFX9: 33: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[-1,0]
|
||||
|
||||
// GFX9: 35: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,-1]
|
||||
|
||||
// GFX9: 40: error: not a valid operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0]
|
||||
|
||||
// XXGFX9: invalid operand for instruction
|
||||
v_pk_add_u16 v1, v2, v3 neg_lo:[0,0]
|
||||
|
||||
//
|
||||
// Regular modifiers on packed instructions
|
||||
//
|
||||
|
||||
// FIXME: should be invalid operand for instruction
|
||||
// GFX9: :18: error: not a valid operand.
|
||||
v_pk_add_f16 v1, |v2|, v3
|
||||
|
||||
// GFX9: :21: error: not a valid operand.
|
||||
v_pk_add_f16 v1, abs(v2), v3
|
||||
|
||||
// GFX9: :22: error: not a valid operand.
|
||||
v_pk_add_f16 v1, v2, |v3|
|
||||
|
||||
// GFX9: :25: error: not a valid operand.
|
||||
v_pk_add_f16 v1, v2, abs(v3)
|
||||
|
||||
// GFX9: :19: error: invalid operand for instruction
|
||||
v_pk_add_f16 v1, -v2, v3
|
||||
|
||||
// GFX9: :23: error: invalid operand for instruction
|
||||
v_pk_add_f16 v1, v2, -v3
|
||||
|
||||
// GFX9: :21: error: not a valid operand.
|
||||
v_pk_add_u16 v1, abs(v2), v3
|
||||
|
||||
// GFX9: :19: error: invalid operand for instruction
|
||||
v_pk_add_u16 v1, -v2, v3
|
||||
|
||||
|
||||
//
|
||||
// Packed operands on the non-packed VOP3P instructions
|
||||
//
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mix_f32 v1, v2, v3, v4 op_sel:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mix_f32 v1, v2, v3, v4 op_sel_hi:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mix_f32 v1, v2, v3, v4 neg_lo:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mix_f32 v1, v2, v3, v4 neg_hi:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixlo_f16 v1, v2, v3, v4 op_sel:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixlo_f16 v1, v2, v3, v4 op_sel_hi:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixlo_f16 v1, v2, v3, v4 neg_lo:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixlo_f16 v1, v2, v3, v4 neg_hi:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixhi_f16 v1, v2, v3, v4 op_sel:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixhi_f16 v1, v2, v3, v4 op_sel_hi:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixhi_f16 v1, v2, v3, v4 neg_lo:[0,0,0]
|
||||
|
||||
// GFX9: invalid operand for instruction
|
||||
v_mad_mixhi_f16 v1, v2, v3, v4 neg_hi:[0,0,0]
|
|
@ -0,0 +1,216 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
//
|
||||
// Test op_sel/op_sel_hi
|
||||
//
|
||||
|
||||
v_pk_add_u16 v1, v2, v3
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[1,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[1,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x10]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x08]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x08]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x10]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x10]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x08]
|
||||
|
||||
//
|
||||
// Test src2 op_sel/op_sel_hi
|
||||
//
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x8e,0xd3,0x00,0x01,0x04,0x04]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x8e,0xd3,0x00,0x01,0x04,0x04]
|
||||
|
||||
//
|
||||
// Test neg_lo/neg_hi
|
||||
//
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0xfc]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0xfc]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x3c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x5c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x9c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1]
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
|
||||
// Test clamp
|
||||
v_pk_fma_f16 v8, v0, s0, v1 clamp
|
||||
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 clamp ; encoding: [0x08,0xc0,0x8e,0xd3,0x00,0x01,0x04,0x1c]
|
||||
|
||||
v_pk_add_u16 v1, v2, v3 clamp
|
||||
// GFX9: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x8a,0xd3,0x02,0x07,0x02,0x18]
|
||||
|
||||
v_pk_min_i16 v0, v1, v2 clamp
|
||||
// GFX9: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0x80,0x88,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
//
|
||||
// Instruction tests:
|
||||
//
|
||||
|
||||
v_pk_mul_lo_u16 v0, v1, v2
|
||||
// GFX9: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x81,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_add_i16 v0, v1, v2
|
||||
// GFX9: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x82,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_sub_i16 v0, v1, v2
|
||||
// GFX9: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x83,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_lshlrev_b16 v0, v1, v2
|
||||
// GFX9: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x84,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_lshrrev_b16 v0, v1, v2
|
||||
// GFX9: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x85,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_ashrrev_i16 v0, v1, v2
|
||||
// GFX9: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x86,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_max_i16 v0, v1, v2
|
||||
// GFX9: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x87,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_min_i16 v0, v1, v2
|
||||
// GFX9: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x88,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_add_u16 v0, v1, v2
|
||||
// GFX9: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_max_u16 v0, v1, v2
|
||||
// GFX9: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_min_u16 v0, v1, v2
|
||||
// GFX9: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_fma_f16 v0, v1, v2, v3
|
||||
// GFX9: v_pk_fma_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
|
||||
v_pk_add_f16 v0, v1, v2
|
||||
// GFX9: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_mul_f16 v0, v1, v2
|
||||
// GFX9: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x90,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_min_f16 v0, v1, v2
|
||||
// GFX9: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x91,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_pk_max_f16 v0, v1, v2
|
||||
// GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x92,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3
|
||||
// GFX9: v_mad_mixlo_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3
|
||||
// GFX9: v_mad_mixhi_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
|
||||
//
|
||||
// Regular source modifiers on non-packed instructions
|
||||
//
|
||||
|
||||
v_mad_mix_f32 v0, abs(v1), v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, abs(v2), v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, abs(v3)
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, -v1, v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_mad_mix_f32 v0, v1, -v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, -v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_mad_mix_f32 v0, -abs(v1), v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_mad_mix_f32 v0, v1, -abs(v2), v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, -abs(v3)
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3)
|
||||
// GFX9: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3)
|
||||
// GFX9: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4]
|
Loading…
Reference in New Issue