2016-09-19 22:39:49 +08:00
|
|
|
//===-- VOPInstructions.td - Vector Instruction Defintions ----------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
// dummies for outer let
|
|
|
|
class LetDummies {
|
|
|
|
bit isCommutable;
|
|
|
|
bit isConvertibleToThreeAddress;
|
|
|
|
bit isMoveImm;
|
|
|
|
bit isReMaterializable;
|
|
|
|
bit isAsCheapAsAMove;
|
|
|
|
bit VOPAsmPrefer32Bit;
|
|
|
|
Predicate SubtargetPredicate;
|
|
|
|
string Constraints;
|
|
|
|
string DisableEncoding;
|
|
|
|
list<SchedReadWrite> SchedRW;
|
|
|
|
list<Register> Uses;
|
|
|
|
list<Register> Defs;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOP <string opName> {
|
|
|
|
string OpName = opName;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
|
|
|
|
InstSI <outs, ins, asm, pattern> {
|
|
|
|
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 0;
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
let VALU = 1;
|
2016-11-18 12:42:59 +08:00
|
|
|
let Uses = [EXEC];
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
2018-03-26 21:56:53 +08:00
|
|
|
class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
|
|
|
|
string asm, list<dag> pattern> :
|
|
|
|
InstSI <outs, ins, asm, pattern>,
|
|
|
|
VOP <opName>,
|
|
|
|
SIMCInstr <opName#suffix, SIEncodingFamily.NONE>,
|
|
|
|
MnemonicAlias<opName#suffix, opName> {
|
|
|
|
|
|
|
|
let isPseudo = 1;
|
|
|
|
let isCodeGenOnly = 1;
|
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
string Mnemonic = opName;
|
|
|
|
VOPProfile Pfl = P;
|
|
|
|
|
|
|
|
string AsmOperands;
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3Common <dag outs, dag ins, string asm = "",
|
|
|
|
list<dag> pattern = [], bit HasMods = 0,
|
|
|
|
bit VOP3Only = 0> :
|
|
|
|
VOPAnyCommon <outs, ins, asm, pattern> {
|
|
|
|
|
|
|
|
// Using complex patterns gives VOP3 patterns a very high complexity rating,
|
2016-11-18 19:04:02 +08:00
|
|
|
// but standalone patterns are almost always preferred, so we need to adjust the
|
2016-09-23 17:08:07 +08:00
|
|
|
// priority lower. The goal is to use a high number to reduce complexity to
|
|
|
|
// zero (or less than zero).
|
|
|
|
let AddedComplexity = -1000;
|
|
|
|
|
|
|
|
let VOP3 = 1;
|
|
|
|
|
|
|
|
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
2017-07-07 23:21:52 +08:00
|
|
|
let AsmMatchConverter = !if(!eq(HasMods,1), "cvtVOP3", "");
|
2016-09-23 17:08:07 +08:00
|
|
|
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
|
|
|
|
int Size = 8;
|
|
|
|
|
|
|
|
// Because SGPRs may be allowed if there are multiple operands, we
|
|
|
|
// need a post-isel hook to insert copies in order to avoid
|
|
|
|
// violating constant bus requirements.
|
|
|
|
let hasPostISelHook = 1;
|
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
|
2017-07-21 21:54:11 +08:00
|
|
|
bit VOP3Only = 0, bit isVOP3P = 0, bit isVop3OpSel = 0> :
|
2018-03-26 21:56:53 +08:00
|
|
|
VOP_Pseudo <opName, "_e64", P, P.Outs64,
|
|
|
|
!if(isVop3OpSel,
|
|
|
|
P.InsVOP3OpSel,
|
|
|
|
!if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64)),
|
|
|
|
"", pattern> {
|
2016-09-19 22:39:49 +08:00
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
let VOP3_OPSEL = isVop3OpSel;
|
2017-11-17 23:15:40 +08:00
|
|
|
let IsPacked = P.IsPacked;
|
2016-09-19 22:39:49 +08:00
|
|
|
|
2018-03-26 21:56:53 +08:00
|
|
|
let AsmOperands = !if(isVop3OpSel,
|
|
|
|
P.AsmVOP3OpSel,
|
|
|
|
!if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
|
2016-09-19 22:39:49 +08:00
|
|
|
|
|
|
|
let Size = 8;
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 0;
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let SubtargetPredicate = isGCN;
|
|
|
|
|
|
|
|
// Because SGPRs may be allowed if there are multiple operands, we
|
|
|
|
// need a post-isel hook to insert copies in order to avoid
|
|
|
|
// violating constant bus requirements.
|
|
|
|
let hasPostISelHook = 1;
|
|
|
|
|
|
|
|
// Using complex patterns gives VOP3 patterns a very high complexity rating,
|
2016-11-18 19:04:02 +08:00
|
|
|
// but standalone patterns are almost always preferred, so we need to adjust the
|
2016-09-19 22:39:49 +08:00
|
|
|
// priority lower. The goal is to use a high number to reduce complexity to
|
|
|
|
// zero (or less than zero).
|
|
|
|
let AddedComplexity = -1000;
|
|
|
|
|
|
|
|
let VOP3 = 1;
|
|
|
|
let VALU = 1;
|
2017-02-23 07:27:53 +08:00
|
|
|
let FPClamp = P.HasFPClamp;
|
2017-09-01 07:53:50 +08:00
|
|
|
let IntClamp = P.HasIntClamp;
|
|
|
|
let ClampLo = P.HasClampLo;
|
|
|
|
let ClampHi = P.HasClampHi;
|
|
|
|
|
2016-09-19 22:39:49 +08:00
|
|
|
let Uses = [EXEC];
|
|
|
|
|
|
|
|
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
|
|
|
let AsmMatchConverter =
|
2017-11-17 23:15:40 +08:00
|
|
|
!if(isVOP3P,
|
2017-07-07 23:21:52 +08:00
|
|
|
"cvtVOP3P",
|
2017-11-17 23:15:40 +08:00
|
|
|
!if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)),
|
|
|
|
"cvtVOP3",
|
|
|
|
""));
|
2016-09-19 22:39:49 +08:00
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
|
|
|
|
VOP3_Pseudo<opName, P, pattern, 1, 1> {
|
|
|
|
let VOP3P = 1;
|
|
|
|
}
|
|
|
|
|
2018-03-26 21:56:53 +08:00
|
|
|
class VOP3_Real <VOP_Pseudo ps, int EncodingFamily> :
|
2016-09-19 22:39:49 +08:00
|
|
|
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
|
|
|
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
|
|
|
|
|
|
|
|
let isPseudo = 0;
|
|
|
|
let isCodeGenOnly = 0;
|
2017-02-28 02:49:11 +08:00
|
|
|
let UseNamedOperandTable = 1;
|
2016-09-19 22:39:49 +08:00
|
|
|
|
2016-12-22 19:30:48 +08:00
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
|
2016-09-19 22:39:49 +08:00
|
|
|
// copy relevant pseudo op flags
|
|
|
|
let SubtargetPredicate = ps.SubtargetPredicate;
|
|
|
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
|
|
|
let AsmVariantName = ps.AsmVariantName;
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
let TSFlags = ps.TSFlags;
|
2017-03-03 22:31:06 +08:00
|
|
|
let UseNamedOperandTable = ps.UseNamedOperandTable;
|
|
|
|
let Uses = ps.Uses;
|
2018-01-16 01:55:35 +08:00
|
|
|
let Defs = ps.Defs;
|
2017-07-18 22:23:26 +08:00
|
|
|
|
|
|
|
VOPProfile Pfl = ps.Pfl;
|
2016-09-19 22:39:49 +08:00
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
// XXX - Is there any reason to distingusih this from regular VOP3
|
|
|
|
// here?
|
2018-03-26 21:56:53 +08:00
|
|
|
class VOP3P_Real<VOP_Pseudo ps, int EncodingFamily> :
|
2017-02-28 02:49:11 +08:00
|
|
|
VOP3_Real<ps, EncodingFamily>;
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3a<VOPProfile P> : Enc64 {
|
2017-07-21 21:54:11 +08:00
|
|
|
bits<4> src0_modifiers;
|
2016-09-19 22:39:49 +08:00
|
|
|
bits<9> src0;
|
2017-07-21 21:54:11 +08:00
|
|
|
bits<3> src1_modifiers;
|
2016-09-19 22:39:49 +08:00
|
|
|
bits<9> src1;
|
2017-07-21 21:54:11 +08:00
|
|
|
bits<3> src2_modifiers;
|
2016-09-19 22:39:49 +08:00
|
|
|
bits<9> src2;
|
|
|
|
bits<1> clamp;
|
|
|
|
bits<2> omod;
|
|
|
|
|
|
|
|
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
|
|
|
|
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
|
|
|
|
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
|
|
|
|
|
|
|
|
let Inst{31-26} = 0x34; //encoding
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
|
2016-09-19 22:39:49 +08:00
|
|
|
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
|
|
|
|
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
|
|
|
|
let Inst{60-59} = !if(P.HasOMod, omod, 0);
|
|
|
|
let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
|
|
|
|
let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
|
|
|
|
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3a_si <bits<9> op, VOPProfile P> : VOP3a<P> {
|
2016-09-19 22:39:49 +08:00
|
|
|
let Inst{25-17} = op;
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{11} = !if(P.HasClamp, clamp{0}, 0);
|
2016-09-19 22:39:49 +08:00
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
|
2016-09-19 22:39:49 +08:00
|
|
|
let Inst{25-16} = op;
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
|
2016-09-19 22:39:49 +08:00
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3e_si <bits<9> op, VOPProfile P> : VOP3a_si <op, P> {
|
2016-09-20 18:41:16 +08:00
|
|
|
bits<8> vdst;
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
|
2016-09-20 18:41:16 +08:00
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
|
2016-09-20 18:41:16 +08:00
|
|
|
bits<8> vdst;
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
|
2016-09-20 18:41:16 +08:00
|
|
|
}
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
|
|
|
|
let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
|
|
|
|
let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
|
|
|
|
let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
|
|
|
|
let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
|
|
|
|
}
|
|
|
|
|
2017-08-07 21:14:12 +08:00
|
|
|
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
|
|
|
|
class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
|
|
|
|
bits<2> attrchan;
|
|
|
|
bits<6> attr;
|
|
|
|
bits<1> high;
|
|
|
|
|
|
|
|
let Inst{8} = 0; // No modifiers for src0
|
|
|
|
let Inst{61} = 0;
|
|
|
|
|
|
|
|
let Inst{9} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
|
|
|
|
let Inst{62} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
|
|
|
|
|
|
|
|
let Inst{37-32} = attr;
|
|
|
|
let Inst{39-38} = attrchan;
|
|
|
|
let Inst{40} = !if(P.HasHigh, high, 0);
|
|
|
|
|
|
|
|
let Inst{49-41} = src0;
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3be <VOPProfile P> : Enc64 {
|
2016-09-20 18:41:16 +08:00
|
|
|
bits<8> vdst;
|
|
|
|
bits<2> src0_modifiers;
|
|
|
|
bits<9> src0;
|
|
|
|
bits<2> src1_modifiers;
|
|
|
|
bits<9> src1;
|
|
|
|
bits<2> src2_modifiers;
|
|
|
|
bits<9> src2;
|
|
|
|
bits<7> sdst;
|
|
|
|
bits<2> omod;
|
|
|
|
|
|
|
|
let Inst{7-0} = vdst;
|
|
|
|
let Inst{14-8} = sdst;
|
|
|
|
let Inst{31-26} = 0x34; //encoding
|
|
|
|
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
|
|
|
|
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
|
|
|
|
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
|
|
|
|
let Inst{60-59} = !if(P.HasOMod, omod, 0);
|
|
|
|
let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
|
|
|
|
let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
|
|
|
|
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
|
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
|
|
|
|
bits<8> vdst;
|
|
|
|
// neg, neg_hi, op_sel put in srcN_modifiers
|
|
|
|
bits<4> src0_modifiers;
|
|
|
|
bits<9> src0;
|
|
|
|
bits<4> src1_modifiers;
|
|
|
|
bits<9> src1;
|
|
|
|
bits<4> src2_modifiers;
|
|
|
|
bits<9> src2;
|
|
|
|
bits<1> clamp;
|
|
|
|
|
|
|
|
let Inst{7-0} = vdst;
|
|
|
|
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
|
|
|
|
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
|
|
|
|
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
|
|
|
|
|
2017-06-22 00:00:54 +08:00
|
|
|
let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
|
|
|
|
let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
|
|
|
|
let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
|
2017-02-28 02:49:11 +08:00
|
|
|
|
2017-06-22 00:00:54 +08:00
|
|
|
let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, 0); // op_sel_hi(2)
|
2017-02-28 02:49:11 +08:00
|
|
|
|
|
|
|
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
|
|
|
|
|
|
|
|
let Inst{25-16} = op;
|
|
|
|
let Inst{31-26} = 0x34; //encoding
|
|
|
|
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
|
|
|
|
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
|
|
|
|
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
|
2017-06-22 00:00:54 +08:00
|
|
|
let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, 0); // op_sel_hi(0)
|
|
|
|
let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, 0); // op_sel_hi(1)
|
2017-02-28 02:49:11 +08:00
|
|
|
let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
|
|
|
|
let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
|
|
|
|
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
|
2016-09-20 18:41:16 +08:00
|
|
|
let Inst{25-17} = op;
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
|
2016-09-20 18:41:16 +08:00
|
|
|
bits<1> clamp;
|
|
|
|
let Inst{25-16} = op;
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
|
2016-09-20 18:41:16 +08:00
|
|
|
}
|
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
def SDWA {
|
|
|
|
// sdwa_sel
|
|
|
|
int BYTE_0 = 0;
|
|
|
|
int BYTE_1 = 1;
|
|
|
|
int BYTE_2 = 2;
|
|
|
|
int BYTE_3 = 3;
|
|
|
|
int WORD_0 = 4;
|
|
|
|
int WORD_1 = 5;
|
|
|
|
int DWORD = 6;
|
|
|
|
|
|
|
|
// dst_unused
|
|
|
|
int UNUSED_PAD = 0;
|
|
|
|
int UNUSED_SEXT = 1;
|
|
|
|
int UNUSED_PRESERVE = 2;
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class VOP_SDWAe<VOPProfile P> : Enc64 {
|
2016-09-19 22:39:49 +08:00
|
|
|
bits<8> src0;
|
|
|
|
bits<3> src0_sel;
|
2016-09-23 17:08:07 +08:00
|
|
|
bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
|
2016-09-19 22:39:49 +08:00
|
|
|
bits<3> src1_sel;
|
2016-09-23 17:08:07 +08:00
|
|
|
bits<2> src1_modifiers;
|
2016-09-19 22:39:49 +08:00
|
|
|
bits<3> dst_sel;
|
|
|
|
bits<2> dst_unused;
|
|
|
|
bits<1> clamp;
|
|
|
|
|
|
|
|
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
|
2018-03-16 23:40:27 +08:00
|
|
|
let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, 0);
|
|
|
|
let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, 0);
|
2016-09-19 22:39:49 +08:00
|
|
|
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
|
2018-03-16 23:40:27 +08:00
|
|
|
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0);
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
|
2017-05-23 18:08:55 +08:00
|
|
|
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
|
2018-03-16 23:40:27 +08:00
|
|
|
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0);
|
2017-05-23 18:08:55 +08:00
|
|
|
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
// GFX9 adds two features to SDWA:
|
|
|
|
// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD.
|
|
|
|
// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather
|
|
|
|
// than VGPRs (at most 1 can be an SGPR);
|
|
|
|
// b. OMOD is the standard output modifier (result *2, *4, /2)
|
|
|
|
// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This
|
|
|
|
// replaces OMOD and the dest fields with SD and SDST (SGPR destination)
|
|
|
|
// field.
|
|
|
|
// a. When SD=1, the SDST is used as the destination for the compare result;
|
|
|
|
// b. When SD=0, VCC is used.
|
|
|
|
//
|
|
|
|
// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
// gfx9 SDWA basic encoding
|
|
|
|
class VOP_SDWA9e<VOPProfile P> : Enc64 {
|
|
|
|
bits<9> src0; // {src0_sgpr{0}, src0{7-0}}
|
|
|
|
bits<3> src0_sel;
|
|
|
|
bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
|
|
|
|
bits<3> src1_sel;
|
|
|
|
bits<2> src1_modifiers;
|
|
|
|
bits<1> src1_sgpr;
|
|
|
|
|
|
|
|
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
|
2018-03-16 23:40:27 +08:00
|
|
|
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0);
|
2017-05-23 18:08:55 +08:00
|
|
|
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
|
|
|
|
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
|
|
|
|
let Inst{55} = !if(P.HasSrc0, src0{8}, 0);
|
2018-03-16 23:40:27 +08:00
|
|
|
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0);
|
2016-09-23 17:08:07 +08:00
|
|
|
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
|
2017-05-23 18:08:55 +08:00
|
|
|
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
|
|
|
|
let Inst{63} = 0; // src1_sgpr - should be specified in subclass
|
|
|
|
}
|
|
|
|
|
|
|
|
// gfx9 SDWA-A
|
|
|
|
class VOP_SDWA9Ae<VOPProfile P> : VOP_SDWA9e<P> {
|
|
|
|
bits<3> dst_sel;
|
|
|
|
bits<2> dst_unused;
|
|
|
|
bits<1> clamp;
|
|
|
|
bits<2> omod;
|
|
|
|
|
2018-03-16 23:40:27 +08:00
|
|
|
let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, 0);
|
|
|
|
let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, 0);
|
2017-05-23 18:08:55 +08:00
|
|
|
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
|
|
|
|
let Inst{47-46} = !if(P.HasSDWAOMod, omod{1-0}, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// gfx9 SDWA-B
|
|
|
|
class VOP_SDWA9Be<VOPProfile P> : VOP_SDWA9e<P> {
|
|
|
|
bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}}
|
|
|
|
|
|
|
|
let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0);
|
|
|
|
let Inst{47} = !if(P.EmitDst, sdst{7}, 0);
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
|
|
|
|
InstSI <P.OutsSDWA, P.InsSDWA, "", pattern>,
|
|
|
|
VOP <opName>,
|
|
|
|
SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE>,
|
|
|
|
MnemonicAlias <opName#"_sdwa", opName> {
|
2017-02-10 10:42:31 +08:00
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
let isPseudo = 1;
|
|
|
|
let isCodeGenOnly = 1;
|
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
string Mnemonic = opName;
|
|
|
|
string AsmOperands = P.AsmSDWA;
|
2017-06-21 16:53:38 +08:00
|
|
|
string AsmOperands9 = P.AsmSDWA9;
|
2016-12-22 20:57:41 +08:00
|
|
|
|
|
|
|
let Size = 8;
|
2016-09-23 17:08:07 +08:00
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 0;
|
2017-02-10 10:42:31 +08:00
|
|
|
let hasSideEffects = 0;
|
2016-12-22 20:57:41 +08:00
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
let VALU = 1;
|
|
|
|
let SDWA = 1;
|
2016-12-22 20:57:41 +08:00
|
|
|
let Uses = [EXEC];
|
2017-02-10 10:42:31 +08:00
|
|
|
|
2018-09-28 04:49:00 +08:00
|
|
|
let SubtargetPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
|
|
|
|
let AssemblerPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
|
|
|
|
let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
|
|
|
|
AMDGPUAsmVariants.Disable);
|
2016-09-23 17:08:07 +08:00
|
|
|
let DecoderNamespace = "SDWA";
|
2016-12-22 20:57:41 +08:00
|
|
|
|
|
|
|
VOPProfile Pfl = P;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
|
|
|
|
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
2017-06-21 16:53:38 +08:00
|
|
|
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA> {
|
2016-12-22 20:57:41 +08:00
|
|
|
|
|
|
|
let isPseudo = 0;
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
|
|
|
|
let Defs = ps.Defs;
|
|
|
|
let Uses = ps.Uses;
|
2017-05-23 18:08:55 +08:00
|
|
|
let SchedRW = ps.SchedRW;
|
|
|
|
let hasSideEffects = ps.hasSideEffects;
|
|
|
|
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
|
|
|
|
// Copy relevant pseudo op flags
|
|
|
|
let SubtargetPredicate = ps.SubtargetPredicate;
|
|
|
|
let AssemblerPredicate = ps.AssemblerPredicate;
|
|
|
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
|
|
|
let AsmVariantName = ps.AsmVariantName;
|
|
|
|
let UseNamedOperandTable = ps.UseNamedOperandTable;
|
|
|
|
let DecoderNamespace = ps.DecoderNamespace;
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
let TSFlags = ps.TSFlags;
|
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
|
|
|
|
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []>,
|
|
|
|
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9> {
|
2017-05-23 18:08:55 +08:00
|
|
|
|
|
|
|
let isPseudo = 0;
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
|
|
|
|
let Defs = ps.Defs;
|
|
|
|
let Uses = ps.Uses;
|
2016-12-22 20:57:41 +08:00
|
|
|
let SchedRW = ps.SchedRW;
|
|
|
|
let hasSideEffects = ps.hasSideEffects;
|
|
|
|
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
|
2018-09-28 04:49:00 +08:00
|
|
|
let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
|
|
|
|
let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
|
|
|
|
let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
|
|
|
|
AMDGPUAsmVariants.Disable);
|
2017-06-21 16:53:38 +08:00
|
|
|
let DecoderNamespace = "SDWA9";
|
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
// Copy relevant pseudo op flags
|
|
|
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
|
|
|
let UseNamedOperandTable = ps.UseNamedOperandTable;
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
let TSFlags = ps.TSFlags;
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class VOP_DPPe<VOPProfile P> : Enc64 {
|
|
|
|
bits<2> src0_modifiers;
|
|
|
|
bits<8> src0;
|
|
|
|
bits<2> src1_modifiers;
|
|
|
|
bits<9> dpp_ctrl;
|
|
|
|
bits<1> bound_ctrl;
|
|
|
|
bits<4> bank_mask;
|
|
|
|
bits<4> row_mask;
|
|
|
|
|
|
|
|
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
|
|
|
|
let Inst{48-40} = dpp_ctrl;
|
|
|
|
let Inst{51} = bound_ctrl;
|
|
|
|
let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
|
|
|
|
let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
|
|
|
|
let Inst{54} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg
|
|
|
|
let Inst{55} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs
|
|
|
|
let Inst{59-56} = bank_mask;
|
|
|
|
let Inst{63-60} = row_mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOP_DPP <string OpName, VOPProfile P> :
|
|
|
|
InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
|
|
|
|
VOP_DPPe<P> {
|
|
|
|
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 0;
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
let VALU = 1;
|
|
|
|
let DPP = 1;
|
|
|
|
let Size = 8;
|
|
|
|
|
|
|
|
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
|
2017-01-20 18:01:25 +08:00
|
|
|
let SubtargetPredicate = HasDPP;
|
2018-09-28 04:49:00 +08:00
|
|
|
let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
|
|
|
|
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
|
|
|
|
AMDGPUAsmVariants.Disable);
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
|
|
|
|
let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
|
2016-09-23 17:08:07 +08:00
|
|
|
let DecoderNamespace = "DPP";
|
2016-09-19 22:39:49 +08:00
|
|
|
}
|
|
|
|
|
2018-09-21 18:31:22 +08:00
|
|
|
class getNumNodeArgs<SDPatternOperator Op> {
|
|
|
|
SDNode N = !cast<SDNode>(Op);
|
|
|
|
SDTypeProfile TP = N.TypeProfile;
|
|
|
|
int ret = TP.NumOperands;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class getDivergentFrag<SDPatternOperator Op> {
|
|
|
|
|
|
|
|
int NumSrcArgs = getNumNodeArgs<Op>.ret;
|
|
|
|
PatFrag ret = PatFrag <
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
|
|
|
(ops node:$src0),
|
|
|
|
!if(!eq(NumSrcArgs, 2),
|
|
|
|
(ops node:$src0, node:$src1),
|
|
|
|
(ops node:$src0, node:$src1, node:$src2))),
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
|
|
|
(Op $src0),
|
|
|
|
!if(!eq(NumSrcArgs, 2),
|
|
|
|
(Op $src0, $src1),
|
|
|
|
(Op $src0, $src1, $src2))),
|
|
|
|
[{ return N->isDivergent(); }]
|
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
|
|
|
|
|
|
|
|
PatFrag Operator = getDivergentFrag < Op >.ret;
|
|
|
|
|
|
|
|
dag Ins = !foreach(tmp, P.Ins32, !subst(ins, Operator,
|
|
|
|
!subst(P.Src0RC32, P.Src0VT,
|
|
|
|
!subst(P.Src1RC32, P.Src1VT, tmp))));
|
|
|
|
|
|
|
|
|
|
|
|
dag Outs = !foreach(tmp, P.Outs32, !subst(outs, set,
|
|
|
|
!subst(P.DstRC, P.DstVT, tmp)));
|
|
|
|
|
|
|
|
list<dag> ret = [!con(Outs, (set Ins))];
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
|
|
|
|
list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
|
|
|
|
}
|
|
|
|
|
2018-10-01 19:06:35 +08:00
|
|
|
class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> {
|
|
|
|
SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern),
|
|
|
|
!if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op);
|
|
|
|
}
|
|
|
|
|
2016-09-19 22:39:49 +08:00
|
|
|
include "VOPCInstructions.td"
|
2016-09-23 17:08:07 +08:00
|
|
|
include "VOP1Instructions.td"
|
|
|
|
include "VOP2Instructions.td"
|
2016-09-20 18:41:16 +08:00
|
|
|
include "VOP3Instructions.td"
|
2017-02-28 02:49:11 +08:00
|
|
|
include "VOP3PInstructions.td"
|