AMDGPU: Add VOP3P instruction format

Add a few non-VOP3P but instructions related to packed.

Includes hack with dummy operands for the benefit of the assembler

llvm-svn: 296368
This commit is contained in:
Matt Arsenault 2017-02-27 18:49:11 +00:00
parent 10c7fb4187
commit 9be7b0d485
27 changed files with 1342 additions and 86 deletions

View File

@ -190,6 +190,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
"Has i16/f16 instructions"
>;
def FeatureVOP3P : SubtargetFeature<"vop3p",
"HasVOP3PInsts",
"true",
"Has VOP3P packed instructions"
>;
def FeatureMovrel : SubtargetFeature<"movrel",
"HasMovrel",
"true",
@ -400,7 +406,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P
]
>;
@ -575,7 +581,10 @@ def isCIVI : Predicate <
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
AssemblerPredicate<"Feature16BitInsts">;
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<"FeatureVOP3P">;
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA">;

View File

@ -117,6 +117,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
HasVOP3PInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),

View File

@ -136,6 +136,7 @@ protected:
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
bool HasVOP3PInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
@ -216,6 +217,10 @@ public:
return Has16BitInsts;
}
bool hasVOP3PInsts() const {
return HasVOP3PInsts;
}
bool hasHWFP64() const {
return FP64;
}

View File

@ -157,7 +157,11 @@ public:
ImmTySendMsg,
ImmTyInterpSlot,
ImmTyInterpAttr,
ImmTyAttrChan
ImmTyAttrChan,
ImmTyOpSel,
ImmTyOpSelHi,
ImmTyNegLo,
ImmTyNegHi
};
struct TokOp {
@ -294,6 +298,10 @@ public:
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
bool isOpSel() const { return isImmTy(ImmTyOpSel); }
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isMod() const {
return isClampSI() || isOModSI();
@ -313,6 +321,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
}
bool isSCSrcV2B16() const {
return isSCSrcB16();
}
bool isSCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
}
@ -325,6 +337,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
}
bool isSCSrcV2F16() const {
return isSCSrcF16();
}
bool isSCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
}
@ -341,6 +357,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::i16);
}
bool isSSrcV2B16() const {
llvm_unreachable("cannot happen");
return isSSrcB16();
}
bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
@ -359,6 +380,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::f16);
}
bool isSSrcV2F16() const {
llvm_unreachable("cannot happen");
return isSSrcF16();
}
bool isVCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@ -371,6 +397,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
bool isVCSrcV2B16() const {
return isVCSrcB16();
}
bool isVCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
@ -383,6 +413,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
bool isVCSrcV2F16() const {
return isVCSrcF16();
}
bool isVSrcB32() const {
return isVCSrcF32() || isLiteralImm(MVT::i32);
}
@ -395,6 +429,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::i16);
}
bool isVSrcV2B16() const {
llvm_unreachable("cannot happen");
return isVSrcB16();
}
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32);
}
@ -407,6 +446,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
bool isVSrcV2F16() const {
llvm_unreachable("cannot happen");
return isVSrcF16();
}
bool isKImmFP32() const {
return isLiteralImm(MVT::f32);
}
@ -607,6 +651,10 @@ public:
case ImmTyInterpSlot: OS << "InterpSlot"; break;
case ImmTyInterpAttr: OS << "InterpAttr"; break;
case ImmTyAttrChan: OS << "AttrChan"; break;
case ImmTyOpSel: OS << "OpSel"; break;
case ImmTyOpSelHi: OS << "OpSelHi"; break;
case ImmTyNegLo: OS << "NegLo"; break;
case ImmTyNegHi: OS << "NegHi"; break;
}
}
@ -783,6 +831,8 @@ public:
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
@ -881,10 +931,18 @@ public:
//bool ProcessInstruction(MCInst &Inst);
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
OperandMatchResultTy
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t &) = nullptr);
OperandMatchResultTy parseOperandArrayWithPrefix(
const char *Prefix,
OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t&) = nullptr);
OperandMatchResultTy
parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
@ -951,7 +1009,12 @@ public:
void cvtId(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3Impl(MCInst &Inst,
const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
@ -999,6 +1062,30 @@ static const fltSemantics *getFltSemantics(MVT VT) {
return getFltSemantics(VT.getSizeInBits() / 8);
}
static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
}
}
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
@ -1044,7 +1131,7 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
if (type.getScalarSizeInBits() == 16) {
return AMDGPU::isInlinableLiteral16(
static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
@ -1136,13 +1223,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// Check that this operand accepts literals
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
APInt Literal(64, Val);
uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
if (Imm.IsFPImm) { // We got fp literal token
APInt Literal(64, Val);
switch (OpSize) {
case 8:
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
@ -1166,17 +1255,32 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// unclear how we should encode them. This case should be checked earlier
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
case 4:
case 2: {
}
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
FPLiteral.convert(*getFltSemantics(OpSize),
FPLiteral.convert(*getOpFltSemantics(OpTy),
APFloat::rmNearestTiesToEven, &lost);
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
ImmVal |= (ImmVal << 16);
}
Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
@ -1189,8 +1293,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// We got int literal token.
// Only sign extend inline immediates.
// FIXME: No errors on truncation
switch (OpSize) {
case 4:
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
if (isInt<32>(Val) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@ -1200,18 +1307,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
return;
case 8:
if (AMDGPU::isInlinableLiteral64(Val,
AsmParser->hasInv2PiInlineImm())) {
}
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
return;
case 2:
}
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Val) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@ -1221,7 +1333,18 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
return;
}
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
assert(AMDGPU::isInlinableLiteral16(LiteralVal,
AsmParser->hasInv2PiInlineImm()));
uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
static_cast<uint32_t>(LiteralVal);
Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
llvm_unreachable("invalid operand size");
}
@ -2268,6 +2391,56 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
return MatchOperand_Success;
}
OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
const char *Prefix,
OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
StringRef Name = Parser.getTok().getString();
if (!Name.equals(Prefix))
return MatchOperand_NoMatch;
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon))
return MatchOperand_ParseFail;
Parser.Lex();
if (getLexer().isNot(AsmToken::LBrac))
return MatchOperand_ParseFail;
Parser.Lex();
unsigned Val = 0;
SMLoc S = Parser.getTok().getLoc();
// FIXME: How to verify the number of elements matches the number of src
// operands?
for (int I = 0; I < 3; ++I) {
if (I != 0) {
if (getLexer().is(AsmToken::RBrac))
break;
if (getLexer().isNot(AsmToken::Comma))
return MatchOperand_ParseFail;
Parser.Lex();
}
if (getLexer().isNot(AsmToken::Integer))
return MatchOperand_ParseFail;
int64_t Op;
if (getParser().parseAbsoluteExpression(Op))
return MatchOperand_ParseFail;
if (Op != 0 && Op != 1)
return MatchOperand_ParseFail;
Val |= (Op << I);
}
Parser.Lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy) {
@ -2300,12 +2473,11 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
return MatchOperand_Success;
}
typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
static void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
OptionalImmIndexMap& OptionalIdx,
AMDGPUOperand::ImmTy ImmT,
int64_t Default = 0) {
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
AMDGPUOperand::ImmTy ImmT,
int64_t Default = 0) {
auto i = OptionalIdx.find(ImmT);
if (i != OptionalIdx.end()) {
unsigned Idx = i->second;
@ -3214,6 +3386,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
{"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
{"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
{"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
{"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
@ -3230,6 +3406,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan
res = parseSDWASel(Operands, Op.Name, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
res = parseSDWADstUnused(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
Op.Type == AMDGPUOperand::ImmTyNegLo ||
Op.Type == AMDGPUOperand::ImmTyNegHi) {
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
Op.ConvertResult);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@ -3285,8 +3467,8 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx) {
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
@ -3303,6 +3485,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
llvm_unreachable("unhandled operand type");
}
}
}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
cvtVOP3Impl(Inst, Operands, OptionalIdx);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
@ -3327,6 +3515,74 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
}
}
void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptIdx;
cvtVOP3Impl(Inst, Operands, OptIdx);
// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
// instruction, and then figure out where to actually put the modifiers
int Opc = Inst.getOpcode();
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
}
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
if (NegLoIdx != -1) {
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
}
const int Ops[] = { AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
AMDGPU::OpName::src2 };
const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers };
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
unsigned NegLo = 0;
unsigned NegHi = 0;
if (NegLoIdx != -1) {
int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
NegLo = Inst.getOperand(NegLoIdx).getImm();
NegHi = Inst.getOperand(NegHiIdx).getImm();
}
for (int J = 0; J < 3; ++J) {
int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
if (OpIdx == -1)
break;
uint32_t ModVal = 0;
if ((OpSel & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_0;
if ((OpSelHi & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_1;
if ((NegLo & (1 << J)) != 0)
ModVal |= SISrcMods::NEG;
if ((NegHi & (1 << J)) != 0)
ModVal |= SISrcMods::NEG_HI;
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
Inst.getOperand(ModIdx).setImm(ModVal);
}
}
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//

View File

@ -97,6 +97,14 @@ static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
}
static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
unsigned Imm,
uint64_t Addr,
const void *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
@ -264,6 +272,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
return decodeSrcOp(OPW16, Val);
}
MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
return decodeSrcOp(OPWV216, Val);
}
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
// Some instructions have operand restrictions beyond what the encoding
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
@ -424,6 +436,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
case OPW64:
return MCOperand::createImm(getInlineImmVal64(Imm));
case OPW16:
case OPWV216:
return MCOperand::createImm(getInlineImmVal16(Imm));
default:
llvm_unreachable("implement me");
@ -437,6 +450,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
case OPWV216:
return VGPR_32RegClassID;
case OPW64: return VReg_64RegClassID;
case OPW128: return VReg_128RegClassID;
@ -450,6 +464,7 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
case OPWV216:
return SGPR_32RegClassID;
case OPW64: return SGPR_64RegClassID;
case OPW128: return SGPR_128RegClassID;
@ -463,6 +478,7 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
case OPWV216:
return TTMP_32RegClassID;
case OPW64: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
@ -498,6 +514,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
switch (Width) {
case OPW32:
case OPW16:
case OPWV216:
return decodeSpecialReg32(Val);
case OPW64:
return decodeSpecialReg64(Val);

View File

@ -67,6 +67,7 @@ public:
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
MCOperand decodeOperand_VSrc16(unsigned Val) const;
MCOperand decodeOperand_VSrcV216(unsigned Val) const;
MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;
@ -85,6 +86,7 @@ public:
OPW64,
OPW128,
OPW16,
OPWV216,
OPW_LAST_,
OPW_FIRST_ = OPW32
};

View File

@ -375,6 +375,14 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}
void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
printImmediate16(Lo16, STI, O);
}
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@ -489,6 +497,10 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
printImmediateV216(Op.getImm(), STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
case MCOI::OPERAND_PCREL:
O << formatDec(Op.getImm());
@ -738,6 +750,71 @@ void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
}
}
static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
for (int I = 0; I < NumOps; ++I) {
if (!!(Ops[I] & Mod) != DefaultValue)
return false;
}
return true;
}
static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
int NumOps = 0;
int Ops[3];
for (int OpName : { AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers }) {
int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
if (Idx == -1)
break;
Ops[NumOps++] = MI->getOperand(Idx).getImm();
}
if (allOpsDefaultValue(Ops, NumOps, Mod))
return;
O << Name;
for (int I = 0; I < NumOps; ++I) {
if (I != 0)
O << ',';
O << !!(Ops[I] & Mod);
}
O << ']';
}
void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
}
void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
}
void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
}
void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
}
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {

View File

@ -90,6 +90,8 @@ private:
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
@ -117,6 +119,14 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printOpSel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printOpSelHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printNegLo(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printNegHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,

View File

@ -220,15 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
Imm = MO.getImm();
}
switch (AMDGPU::getOperandSize(OpInfo)) {
case 4:
switch (OpInfo.OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
case 8:
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
case 2:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
uint32_t Encoding = getLit16Encoding(Lo16, STI);
assert(Encoding != 255 && "packed constants can only be inline immediates");
return Encoding;
}
default:
llvm_unreachable("invalid operand size");
}

View File

@ -36,6 +36,7 @@ enum : uint64_t {
// TODO: Should this be spilt into VOP3 a and b?
VOP3 = 1 << 10,
VOP3P = 1 << 12,
VINTRP = 1 << 13,
SDWA = 1 << 14,
@ -102,12 +103,14 @@ namespace AMDGPU {
OPERAND_REG_INLINE_C_FP16,
OPERAND_REG_INLINE_C_FP32,
OPERAND_REG_INLINE_C_FP64,
OPERAND_REG_INLINE_C_V2FP16,
OPERAND_REG_INLINE_C_V2INT16,
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
@ -125,9 +128,12 @@ namespace AMDGPU {
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
enum {
NEG = 1 << 0, // Floating-point negate modifier
ABS = 1 << 1, // Floating-point absolute modifier
SEXT = 1 << 0 // Integer sign-extend modifier
NEG = 1 << 0, // Floating-point negate modifier
ABS = 1 << 1, // Floating-point absolute modifier
SEXT = 1 << 0, // Integer sign-extend modifier
NEG_HI = ABS, // Floating-point negate high packed component modifier.
OP_SEL_0 = 1 << 2,
OP_SEL_1 = 1 << 3
};
}

View File

@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit VOP2 = 0;
field bit VOPC = 0;
field bit VOP3 = 0;
field bit VOP3P = 0;
field bit VINTRP = 0;
field bit SDWA = 0;
field bit DPP = 0;
@ -96,6 +97,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{8} = VOP2;
let TSFlags{9} = VOPC;
let TSFlags{10} = VOP3;
let TSFlags{12} = VOP3P;
let TSFlags{13} = VINTRP;
let TSFlags{14} = SDWA;

View File

@ -440,6 +440,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP;
}
static bool isVOP3P(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
}
bool isVOP3P(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
}
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}

View File

@ -458,6 +458,12 @@ class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let ParserMatchClass = MatchClass;
}
class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
OperandWithDefaultOps<i32, (ops (i32 0))> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
}
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
@ -495,6 +501,11 @@ def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
@ -534,6 +545,7 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let ParserMethod = "parseRegOrImmWithFPInputMods";
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@ -586,6 +598,33 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "PackedFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImm";
let PredicateMethod = "isRegOrImm";
// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
}
class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "PackedInt"#opSize#"InputMods";
let ParserMethod = "parseRegOrImm";
let PredicateMethod = "isRegOrImm";
// let PredicateMethod = "isPackedInt"#opSize#"InputMods";
}
def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
// let PrintMethod = "printPackedFPInputMods";
}
class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
//let PrintMethod = "printPackedIntInputMods";
}
def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
//===----------------------------------------------------------------------===//
// Complex patterns
@ -602,10 +641,13 @@ def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@ -729,12 +771,34 @@ class getVALUDstForVT<ValueType VT> {
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
RegisterOperand ret = !if(isFP,
!if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
!if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
0))));
RegisterOperand ret =
!if(isFP,
!if(!eq(VT.Size, 64),
VSrc_f64,
!if(!eq(VT.Value, f16.Value),
VSrc_f16,
!if(!eq(VT.Value, v2f16.Value),
VCSrc_v2f16,
VSrc_f32
)
)
),
!if(!eq(VT.Size, 64),
VSrc_b64,
!if(!eq(VT.Value, i16.Value),
VSrc_b16,
!if(!eq(VT.Value, v2i16.Value),
VCSrc_v2b16,
VSrc_b32
)
)
)
);
}
// Returns the vreg register class to use for source operand given VT
@ -748,25 +812,38 @@ class getVregSrcForVT<ValueType VT> {
// given VT.
class getVOP3SrcForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
0))));
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VSrc_128,
!if(!eq(VT.Size, 64),
VSrc_128,
!if(!eq(VT.Size, 64),
!if(isFP,
VCSrc_f64,
VCSrc_b64),
VCSrc_f64,
VCSrc_b64),
!if(!eq(VT.Value, i1.Value),
SCSrc_b64,
!if(isFP,
!if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
!if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
)
)
)
);
SCSrc_b64,
!if(isFP,
!if(!eq(VT.Value, f16.Value),
VCSrc_f16,
!if(!eq(VT.Value, v2f16.Value),
VCSrc_v2f16,
VCSrc_f32
)
),
!if(!eq(VT.Value, i16.Value),
VCSrc_b16,
!if(!eq(VT.Value, v2i16.Value),
VCSrc_v2b16,
VCSrc_b32
)
)
)
)
)
);
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
@ -776,7 +853,8 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f16.Value), 1,
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
0)));
!if(!eq(SrcVT.Value, v2f16.Value), 1,
0))));
}
class isIntType<ValueType SrcVT> {
@ -787,6 +865,23 @@ class isIntType<ValueType SrcVT> {
0)));
}
class isPackedType<ValueType SrcVT> {
bit ret =
!if(!eq(SrcVT.Value, v2i16.Value), 1,
!if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
);
}
// Float or packed int
class isModifierType<ValueType SrcVT> {
bit ret =
!if(!eq(SrcVT.Value, f16.Value), 1,
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
!if(!eq(SrcVT.Value, v2f16.Value), 1,
!if(!eq(SrcVT.Value, v2i16.Value), 1,
0)))));
}
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT> {
@ -794,6 +889,7 @@ class getSrcMod <ValueType VT> {
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
!if(isFP,
@ -824,8 +920,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
Operand Src2Mod> {
bit HasModifiers, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
!if (!eq(NumSrcArgs, 0),
@ -844,9 +940,13 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (!eq(NumSrcArgs, 2),
!if (!eq(HasModifiers, 1),
// VOP 2 with modifiers
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, omod:$omod)
!if( !eq(HasOMod, 1),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, omod:$omod),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp))
/* else */,
// VOP2 without modifiers
(ins Src0RC:$src0, Src1RC:$src1)
@ -854,16 +954,57 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
// VOP3 with modifiers
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
clampmod:$clamp, omod:$omod)
!if (!eq(HasOMod, 1),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
clampmod:$clamp, omod:$omod),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
clampmod:$clamp))
/* else */,
// VOP3 without modifiers
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
/* endif */ ))));
}
/// XXX - src1 may only allow VGPRs?
// The modifiers (except clamp) are dummy operands for the benefit of
// printing and parsing. They defer their values to looking at the
// srcN_modifiers for what to print.
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret = !if (!eq(NumSrcArgs, 2),
!if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi)),
// else NumSrcArgs == 3
!if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
clampmod:$clamp,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi))
);
}
class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
@ -947,7 +1088,8 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers,
bit HasOMod, ValueType DstVT = i32> {
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@ -957,7 +1099,26 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<HasDst, NumSrcArgs, DstVT>.ret,
dst#", "#src0#src1#src2#"$clamp"#"$omod");
dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
}
// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
bit HasClamp, ValueType DstVT = i32> {
string dst = " $vdst";
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1",
" $src1,"));
string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
string clamp = !if(HasClamp, "$clamp", "");
// Each modifier is printed as an array of bits for each operand, so
// all operands are printed as part of src0_modifiers.
string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@ -1069,7 +1230,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
field bit HasModifiers = isFloatType<Src0VT>.ret;
field bit HasModifiers = isModifierType<Src0VT>.ret;
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
@ -1083,13 +1244,20 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
field bit HasOMod = HasModifiers;
field bit HasClamp = HasModifiers;
field bit HasSDWAClamp = HasSrc0;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
field bit IsPacked = isPackedType<Src0VT>.ret;
field bit HasOpSel = IsPacked;
field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
// VOP3b instructions are a special case with a second explicit
@ -1101,7 +1269,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
HasModifiers, HasOMod, Src0Mod, Src1Mod,
Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
@ -1109,7 +1282,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
DstVT>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
}
@ -1130,6 +1304,13 @@ def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
def VOP_I16_I16_I16_I16 : VOPProfile <[i32, i32, i32, i32, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;

View File

@ -616,6 +616,12 @@ def : BitConvert <i32, f32, VGPR_32>;
def : BitConvert <f32, i32, VGPR_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <f32, i32, SReg_32>;
def : BitConvert <v2i16, i32, SReg_32>;
def : BitConvert <i32, v2i16, SReg_32>;
def : BitConvert <v2f16, i32, SReg_32>;
def : BitConvert <i32, v2f16, SReg_32>;
def : BitConvert <v2i16, v2f16, SReg_32>;
def : BitConvert <v2f16, v2i16, SReg_32>;
// 64-bit bitcast
def : BitConvert <i64, f64, VReg_64>;

View File

@ -133,7 +133,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
// TODO: Do we need to set DwarfRegAlias on register tuples?
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 103))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
@ -184,7 +184,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 15), 4))]>;
// Trap handler TMP 32-bit registers
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 11))> {
let isAllocatable = 0;
}
@ -202,7 +202,8 @@ def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
(add (decimate (shl TTMP_32, 3), 4))]>;
// VGPR 32-bit registers
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
// i16/f16 only on VI+
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
@ -263,7 +264,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
@ -276,7 +277,7 @@ def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
}
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
}
@ -372,7 +373,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
let Size = 32;
}
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32)> {
let isAllocatable = 0;
}
@ -423,6 +424,18 @@ multiclass SIRegOperand <string rc, string MatchName, string opType> {
let OperandType = opType#"_FP64";
let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
}
def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
let OperandType = opType#"_V2INT16";
let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
let DecoderMethod = "decodeOperand_VSrcV216";
}
def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
let OperandType = opType#"_V2FP16";
let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
let DecoderMethod = "decodeOperand_VSrcV216";
}
}
}

View File

@ -438,6 +438,11 @@ let Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
} // End Defs = [SCC]
let SubtargetPredicate = isGFX9 in {
def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
}
//===----------------------------------------------------------------------===//
// SOPK Instructions
@ -1207,6 +1212,9 @@ def S_BFE_U64_vi : SOP2_Real_vi <0x27, S_BFE_U64>;
def S_BFE_I64_vi : SOP2_Real_vi <0x28, S_BFE_I64>;
def S_CBRANCH_G_FORK_vi : SOP2_Real_vi <0x29, S_CBRANCH_G_FORK>;
def S_ABSDIFF_I32_vi : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
def S_PACK_LL_B32_B16_vi : SOP2_Real_vi <0x32, S_PACK_LL_B32_B16>;
def S_PACK_LH_B32_B16_vi : SOP2_Real_vi <0x33, S_PACK_LH_B32_B16>;
def S_PACK_HH_B32_B16_vi : SOP2_Real_vi <0x34, S_PACK_HH_B32_B16>;
def S_MOVK_I32_vi : SOPK_Real_vi <0x00, S_MOVK_I32>;
def S_CMOVK_I32_vi : SOPK_Real_vi <0x01, S_CMOVK_I32>;

View File

@ -564,6 +564,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return true;
default:
return false;
@ -682,6 +683,14 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3118; // 1/2pi
}
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
int16_t Lo16 = static_cast<int16_t>(Literal);
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
}
bool isUniformMMO(const MachineMemOperand *MMO) {
const Value *Ptr = MMO->getValue();
// UndefValue means this is a load of a kernel input. These are uniform.

View File

@ -301,6 +301,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return 2;
default:
@ -323,6 +325,9 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
bool isUniformMMO(const MachineMemOperand *MMO);
/// \returns The encoding that will be used for \p ByteOffset in the SMRD

View File

@ -237,7 +237,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0>.ret;
let Asm64 = getAsm64<1, 1, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;

View File

@ -182,7 +182,7 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
@ -194,6 +194,7 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
let HasSrc2 = 0;
@ -204,13 +205,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MAC_F16 : VOP_MAC <f16> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret;
}
def VOP_MAC_F32 : VOP_MAC <f32> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret;
}
// Write out to vcc or arbitrary SGPR.

View File

@ -29,6 +29,26 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
ret1));
}
class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
(node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
(node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
ret1));
}
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
@ -263,6 +283,10 @@ defm: Tenary_i16_Pats<mul, add, V_MAD_I16, sext>;
} // End Predicates = [isVI]
let SubtargetPredicate = isGFX9 in {
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
}
//===----------------------------------------------------------------------===//
// Target
@ -449,3 +473,5 @@ defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>;
defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;

View File

@ -0,0 +1,82 @@
//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// VOP3P Classes
//===----------------------------------------------------------------------===//
class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
VOP3P_Pseudo<OpName, P,
!if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
>;
// Non-packed instructions that use the VOP3P encoding. i.e. where
// omod/abs are used.
class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
VOP3P_Pseudo<OpName, P,
!if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
>;
let isCommutable = 1 in {
def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
}
def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
// XXX - Commutable?
def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe <op, !cast<VOP3P_Pseudo>(NAME).Pfl> {
let AssemblerPredicates = [HasVOP3PInsts];
let DecoderNamespace = "VI";
}
}
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;

View File

@ -68,8 +68,9 @@ class VOP3Common <dag outs, dag ins, string asm = "",
let hasPostISelHook = 1;
}
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
InstSI <P.Outs64, P.Ins64, "", pattern>,
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
bit VOP3Only = 0, bit isVOP3P = 0> :
InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
VOP <opName>,
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e64", opName> {
@ -79,7 +80,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
let UseNamedOperandTable = 1;
string Mnemonic = opName;
string AsmOperands = P.Asm64;
string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
let Size = 8;
let mayLoad = 0;
@ -106,18 +107,24 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
!if(!eq(VOP3Only,1),
"cvtVOP3",
!if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
!if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
VOPProfile Pfl = P;
}
class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<opName, P, pattern, 1, 1> {
let VOP3P = 1;
}
class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let isPseudo = 0;
let isCodeGenOnly = 0;
let UseNamedOperandTable = 1;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@ -131,6 +138,11 @@ class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
let TSFlags = ps.TSFlags;
}
// XXX - Is there any reason to distingusih this from regular VOP3
// here?
class VOP3P_Real<VOP3P_Pseudo ps, int EncodingFamily> :
VOP3_Real<ps, EncodingFamily>;
class VOP3a<VOPProfile P> : Enc64 {
bits<2> src0_modifiers;
bits<9> src0;
@ -198,6 +210,42 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
bits<8> vdst;
// neg, neg_hi, op_sel put in srcN_modifiers
bits<4> src0_modifiers;
bits<9> src0;
bits<4> src1_modifiers;
bits<9> src1;
bits<4> src2_modifiers;
bits<9> src2;
bits<1> clamp;
let Inst{7-0} = vdst;
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2)
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
let Inst{25-16} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0)
let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1)
let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
}
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
let Inst{25-17} = op;
}
@ -349,3 +397,4 @@ include "VOPCInstructions.td"
include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
include "VOP3PInstructions.td"

View File

@ -0,0 +1,22 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
v_pk_add_f16 v1, -17, v2
// GFX9: :19: error: invalid operand for instruction
v_pk_add_f16 v1, 65, v2
// GFX9: :18: error: invalid operand for instruction
v_pk_add_f16 v1, 64.0, v2
// GFX9: :18: error: invalid operand for instruction
v_pk_add_f16 v1, -0.15915494, v2
// GFX9: :19: error: invalid operand for instruction
v_pk_add_f16 v1, -0.0, v2
// GFX9: :19: error: invalid operand for instruction
v_pk_add_f16 v1, -32768, v2
// GFX9: :19: error: invalid operand for instruction
v_pk_add_f16 v1, 32767, v2
// GFX9: :18: error: invalid operand for instruction

View File

@ -0,0 +1,112 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
v_pk_add_f16 v1, 0, v2
// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
v_pk_add_f16 v1, 0.0, v2
// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
v_pk_add_f16 v1, v2, 0
// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
v_pk_add_f16 v1, v2, 0.0
// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
v_pk_add_f16 v1, 1.0, v2
// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
v_pk_add_f16 v1, -1.0, v2
// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
v_pk_add_f16 v1, -0.5, v2
// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
v_pk_add_f16 v1, 0.5, v2
// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
v_pk_add_f16 v1, 2.0, v2
// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
v_pk_add_f16 v1, -2.0, v2
// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
v_pk_add_f16 v1, 4.0, v2
// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
v_pk_add_f16 v1, -4.0, v2
// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
v_pk_add_f16 v1, 0.15915494, v2
// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
v_pk_add_f16 v1, -1, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
v_pk_add_f16 v1, -2, v2
// GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18]
v_pk_add_f16 v1, -3, v2
// GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18]
v_pk_add_f16 v1, -16, v2
// GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18]
v_pk_add_f16 v1, 1, v2
// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
v_pk_add_f16 v1, 2, v2
// GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18]
v_pk_add_f16 v1, 3, v2
// GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18]
v_pk_add_f16 v1, 4, v2
// GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18]
v_pk_add_f16 v1, 15, v2
// GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18]
v_pk_add_f16 v1, 16, v2
// GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18]
v_pk_add_f16 v1, 63, v2
// GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18]
v_pk_add_f16 v1, 64, v2
// GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x0001, v2
// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xffff, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x3c00, v2
// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xbc00, v2
// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x3800, v2
// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xb800, v2
// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x4000, v2
// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xc000, v2
// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x4400, v2
// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xc400, v2
// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x3118, v2
// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
v_pk_add_f16 v1, 65535, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]

View File

@ -0,0 +1,113 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
// GFX9: 31: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel
// GFX9: 32: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:
// GFX9: 33: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[
// GFX9: 33: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[]
// GFX9: 34: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[,]
// XXGFX9: 34: error: failed parsing operand.
// v_pk_add_u16 v1, v2, v3 op_sel:[0]
// GFX9: 35: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[0,]
// XXGFX9: 36: error: failed parsing operand.
// v_pk_add_u16 v1, v2, v3 op_sel:[,0]
// GFX9: 36: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[0,2]
// GFX9: 35: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[2,0]
// GFX9: 33: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[-1,0]
// GFX9: 35: error: failed parsing operand.
v_pk_add_u16 v1, v2, v3 op_sel:[0,-1]
// GFX9: 40: error: not a valid operand.
v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0]
// XXGFX9: invalid operand for instruction
v_pk_add_u16 v1, v2, v3 neg_lo:[0,0]
//
// Regular modifiers on packed instructions
//
// FIXME: should be invalid operand for instruction
// GFX9: :18: error: not a valid operand.
v_pk_add_f16 v1, |v2|, v3
// GFX9: :21: error: not a valid operand.
v_pk_add_f16 v1, abs(v2), v3
// GFX9: :22: error: not a valid operand.
v_pk_add_f16 v1, v2, |v3|
// GFX9: :25: error: not a valid operand.
v_pk_add_f16 v1, v2, abs(v3)
// GFX9: :19: error: invalid operand for instruction
v_pk_add_f16 v1, -v2, v3
// GFX9: :23: error: invalid operand for instruction
v_pk_add_f16 v1, v2, -v3
// GFX9: :21: error: not a valid operand.
v_pk_add_u16 v1, abs(v2), v3
// GFX9: :19: error: invalid operand for instruction
v_pk_add_u16 v1, -v2, v3
//
// Packed operands on the non-packed VOP3P instructions
//
// GFX9: invalid operand for instruction
v_mad_mix_f32 v1, v2, v3, v4 op_sel:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mix_f32 v1, v2, v3, v4 op_sel_hi:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mix_f32 v1, v2, v3, v4 neg_lo:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mix_f32 v1, v2, v3, v4 neg_hi:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixlo_f16 v1, v2, v3, v4 op_sel:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixlo_f16 v1, v2, v3, v4 op_sel_hi:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixlo_f16 v1, v2, v3, v4 neg_lo:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixlo_f16 v1, v2, v3, v4 neg_hi:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixhi_f16 v1, v2, v3, v4 op_sel:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixhi_f16 v1, v2, v3, v4 op_sel_hi:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixhi_f16 v1, v2, v3, v4 neg_lo:[0,0,0]
// GFX9: invalid operand for instruction
v_mad_mixhi_f16 v1, v2, v3, v4 neg_hi:[0,0,0]

216
llvm/test/MC/AMDGPU/vop3p.s Normal file
View File

@ -0,0 +1,216 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
//
// Test op_sel/op_sel_hi
//
v_pk_add_u16 v1, v2, v3
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel:[0,0]
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1]
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00]
v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00]
v_pk_add_u16 v1, v2, v3 op_sel:[1,0]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel:[0,1]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel:[1,1]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x10]
v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x08]
v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x08]
v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x10]
v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x10]
v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x08]
//
// Test src2 op_sel/op_sel_hi
//
v_pk_fma_f16 v8, v0, s0, v1
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x8e,0xd3,0x00,0x01,0x04,0x04]
v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x8e,0xd3,0x00,0x01,0x04,0x04]
//
// Test neg_lo/neg_hi
//
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0xfc]
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0xfc]
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x3c]
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x5c]
v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x9c]
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1]
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x8e,0xd3,0x00,0x01,0x04,0x1c]
// Test clamp
v_pk_fma_f16 v8, v0, s0, v1 clamp
// GFX9: v_pk_fma_f16 v8, v0, s0, v1 clamp ; encoding: [0x08,0xc0,0x8e,0xd3,0x00,0x01,0x04,0x1c]
v_pk_add_u16 v1, v2, v3 clamp
// GFX9: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x8a,0xd3,0x02,0x07,0x02,0x18]
v_pk_min_i16 v0, v1, v2 clamp
// GFX9: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0x80,0x88,0xd3,0x01,0x05,0x02,0x18]
//
// Instruction tests:
//
v_pk_mul_lo_u16 v0, v1, v2
// GFX9: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x81,0xd3,0x01,0x05,0x02,0x18]
v_pk_add_i16 v0, v1, v2
// GFX9: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x82,0xd3,0x01,0x05,0x02,0x18]
v_pk_sub_i16 v0, v1, v2
// GFX9: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x83,0xd3,0x01,0x05,0x02,0x18]
v_pk_lshlrev_b16 v0, v1, v2
// GFX9: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x84,0xd3,0x01,0x05,0x02,0x18]
v_pk_lshrrev_b16 v0, v1, v2
// GFX9: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x85,0xd3,0x01,0x05,0x02,0x18]
v_pk_ashrrev_i16 v0, v1, v2
// GFX9: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x86,0xd3,0x01,0x05,0x02,0x18]
v_pk_max_i16 v0, v1, v2
// GFX9: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x87,0xd3,0x01,0x05,0x02,0x18]
v_pk_min_i16 v0, v1, v2
// GFX9: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x88,0xd3,0x01,0x05,0x02,0x18]
v_pk_add_u16 v0, v1, v2
// GFX9: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18]
v_pk_max_u16 v0, v1, v2
// GFX9: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18]
v_pk_min_u16 v0, v1, v2
// GFX9: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18]
v_pk_fma_f16 v0, v1, v2, v3
// GFX9: v_pk_fma_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c]
v_pk_add_f16 v0, v1, v2
// GFX9: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18]
v_pk_mul_f16 v0, v1, v2
// GFX9: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x90,0xd3,0x01,0x05,0x02,0x18]
v_pk_min_f16 v0, v1, v2
// GFX9: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x91,0xd3,0x01,0x05,0x02,0x18]
v_pk_max_f16 v0, v1, v2
// GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x92,0xd3,0x01,0x05,0x02,0x18]
v_mad_mix_f32 v0, v1, v2, v3
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
v_mad_mixlo_f16 v0, v1, v2, v3
// GFX9: v_mad_mixlo_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa1,0xd3,0x01,0x05,0x0e,0x04]
v_mad_mixhi_f16 v0, v1, v2, v3
// GFX9: v_mad_mixhi_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa2,0xd3,0x01,0x05,0x0e,0x04]
//
// Regular source modifiers on non-packed instructions
//
v_mad_mix_f32 v0, abs(v1), v2, v3
// GFX9: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04]
v_mad_mix_f32 v0, v1, abs(v2), v3
// GFX9: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04]
v_mad_mix_f32 v0, v1, v2, abs(v3)
// GFX9: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04]
v_mad_mix_f32 v0, -v1, v2, v3
// GFX9: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24]
v_mad_mix_f32 v0, v1, -v2, v3
// GFX9: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44]
v_mad_mix_f32 v0, v1, v2, -v3
// GFX9: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84]
v_mad_mix_f32 v0, -abs(v1), v2, v3
// GFX9: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24]
v_mad_mix_f32 v0, v1, -abs(v2), v3
// GFX9: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44]
v_mad_mix_f32 v0, v1, v2, -abs(v3)
// GFX9: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84]
v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3)
// GFX9: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44]
v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3)
// GFX9: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4]