[AMDGPU] Assembler: basic support for SDWA instructions

Support for SDWA instructions for VOP1 and VOP2 encoding.
Not done yet:
  - converters for support optional operands and modifiers
  - VOPC
  - sext() modifier
  - intrinsics
  - VOP2b (see vop_dpp.s)
  - V_MAC_F32 (see vop_dpp.s)

Differential Revision: http://reviews.llvm.org/D19360

llvm-svn: 267553
This commit is contained in:
Sam Kolton 2016-04-26 13:33:56 +00:00
parent 323ab3975b
commit 3025e7f25f
9 changed files with 452 additions and 56 deletions

View File

@ -74,6 +74,8 @@ public:
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
ImmTySdwaSel,
ImmTySdwaDstUnused,
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
@ -253,6 +255,14 @@ public:
return isImmTy(ImmTyDppBoundCtrl);
}
bool isSDWASel() const {
return isImmTy(ImmTySdwaSel);
}
bool isSDWADstUnused() const {
return isImmTy(ImmTySdwaDstUnused);
}
void setModifiers(unsigned Mods) {
assert(isReg() || (isImm() && Imm.Modifiers == 0));
if (isReg())
@ -522,6 +532,7 @@ public:
OperandMatchResultTy parseOptionalOps(
const ArrayRef<OptionalOperand> &OptionalOps,
OperandVector &Operands);
OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
@ -569,6 +580,9 @@ public:
void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands);
void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands);
void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods);
OperandMatchResultTy parseSDWASel(OperandVector &Operands);
OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
};
struct OptionalOperand {
@ -1396,6 +1410,30 @@ AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
return MatchOperand_NoMatch;
}
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseStringWithPrefix(const char *Prefix, StringRef &Value) {
if (getLexer().isNot(AsmToken::Identifier)) {
return MatchOperand_NoMatch;
}
StringRef Tok = Parser.getTok().getString();
if (Tok != Prefix) {
return MatchOperand_NoMatch;
}
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon)) {
return MatchOperand_ParseFail;
}
Parser.Lex();
if (getLexer().isNot(AsmToken::Identifier)) {
return MatchOperand_ParseFail;
}
Value = Parser.getTok().getString();
return MatchOperand_Success;
}
//===----------------------------------------------------------------------===//
// ds
//===----------------------------------------------------------------------===//
@ -2296,6 +2334,80 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
}
//===----------------------------------------------------------------------===//
// sdwa
//===----------------------------------------------------------------------===//
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
AMDGPUAsmParser::OperandMatchResultTy res;
res = parseStringWithPrefix("dst_sel", Value);
if (res == MatchOperand_ParseFail) {
return MatchOperand_ParseFail;
} else if (res == MatchOperand_NoMatch) {
res = parseStringWithPrefix("src0_sel", Value);
if (res == MatchOperand_ParseFail) {
return MatchOperand_ParseFail;
} else if (res == MatchOperand_NoMatch) {
res = parseStringWithPrefix("src1_sel", Value);
if (res != MatchOperand_Success) {
return res;
}
}
}
int64_t Int;
Int = StringSwitch<int64_t>(Value)
.Case("BYTE_0", 0)
.Case("BYTE_1", 1)
.Case("BYTE_2", 2)
.Case("BYTE_3", 3)
.Case("WORD_0", 4)
.Case("WORD_1", 5)
.Case("DWORD", 6)
.Default(0xffffffff);
Parser.Lex(); // eat last token
if (Int == 0xffffffff) {
return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
AMDGPUOperand::ImmTySdwaSel));
return MatchOperand_Success;
}
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
AMDGPUAsmParser::OperandMatchResultTy res;
res = parseStringWithPrefix("dst_unused", Value);
if (res != MatchOperand_Success) {
return res;
}
int64_t Int;
Int = StringSwitch<int64_t>(Value)
.Case("UNUSED_PAD", 0)
.Case("UNUSED_SEXT", 1)
.Case("UNUSED_PRESERVE", 2)
.Default(0xffffffff);
Parser.Lex(); // eat last token
if (Int == 0xffffffff) {
return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
AMDGPUOperand::ImmTySdwaDstUnused));
return MatchOperand_Success;
}
/// Force static initialization.
extern "C" void LLVMInitializeAMDGPUAsmParser() {

View File

@ -282,6 +282,8 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
O << "_e64 ";
else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
O << "_dpp ";
else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
O << "_sdwa ";
else
O << "_e32 ";
@ -479,6 +481,51 @@ void AMDGPUInstPrinter::printBoundCtrlOperand(const MCInst *MI, unsigned OpNo,
}
}
void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
switch (Imm) {
case 0: O << "BYTE_0"; break;
case 1: O << "BYTE_1"; break;
case 2: O << "BYTE_2"; break;
case 3: O << "BYTE_3"; break;
case 4: O << "WORD_0"; break;
case 5: O << "WORD_1"; break;
case 6: O << "DWORD"; break;
default: llvm_unreachable("Invalid SDWA data select operand");
}
}
void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << "dst_sel:";
printSDWASel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << "src0_sel:";
printSDWASel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << "src1_sel:";
printSDWASel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
O << "dst_unused:";
unsigned Imm = MI->getOperand(OpNo).getImm();
switch (Imm) {
case 0: O << "UNUSED_PAD"; break;
case 1: O << "UNUSED_SEXT"; break;
case 2: O << "UNUSED_PRESERVE"; break;
default: llvm_unreachable("Invalid SDWA dest_unused operand");
}
}
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();

View File

@ -67,6 +67,11 @@ private:
void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,

View File

@ -29,17 +29,18 @@ enum {
VOP2 = 1 << 11,
VOP3 = 1 << 12,
VOPC = 1 << 13,
DPP = 1 << 14,
SDWA = 1 << 14,
DPP = 1 << 15,
MUBUF = 1 << 15,
MTBUF = 1 << 16,
SMRD = 1 << 17,
DS = 1 << 18,
MIMG = 1 << 19,
FLAT = 1 << 20,
WQM = 1 << 21,
VGPRSpill = 1 << 22,
VOPAsmPrefer32Bit = 1 << 23
MUBUF = 1 << 16,
MTBUF = 1 << 17,
SMRD = 1 << 18,
DS = 1 << 19,
MIMG = 1 << 20,
FLAT = 1 << 21,
WQM = 1 << 22,
VGPRSpill = 1 << 23,
VOPAsmPrefer32Bit = 1 << 24
};
}

View File

@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VOP2 = 0;
field bits<1> VOP3 = 0;
field bits<1> VOPC = 0;
field bits<1> SDWA = 0;
field bits<1> DPP = 0;
field bits<1> MUBUF = 0;
@ -64,17 +65,18 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{11} = VOP2;
let TSFlags{12} = VOP3;
let TSFlags{13} = VOPC;
let TSFlags{14} = DPP;
let TSFlags{14} = SDWA;
let TSFlags{15} = DPP;
let TSFlags{15} = MUBUF;
let TSFlags{16} = MTBUF;
let TSFlags{17} = SMRD;
let TSFlags{18} = DS;
let TSFlags{19} = MIMG;
let TSFlags{20} = FLAT;
let TSFlags{21} = WQM;
let TSFlags{22} = VGPRSpill;
let TSFlags{23} = VOPAsmPrefer32Bit;
let TSFlags{16} = MUBUF;
let TSFlags{17} = MTBUF;
let TSFlags{18} = SMRD;
let TSFlags{19} = DS;
let TSFlags{20} = MIMG;
let TSFlags{21} = FLAT;
let TSFlags{22} = WQM;
let TSFlags{23} = VGPRSpill;
let TSFlags{24} = VOPAsmPrefer32Bit;
let SchedRW = [Write32Bit];

View File

@ -576,6 +576,22 @@ class DPPOptionalMatchClass <string OpName>: AsmOperandClass {
let IsOptional = 1;
}
def SDWASelMatchClass : AsmOperandClass {
let Name = "SDWASel";
let PredicateMethod = "isSDWASel";
let ParserMethod = "parseSDWASel";
let RenderMethod = "addImmOperands";
let IsOptional = 1;
}
def SDWADstUnusedMatchClass : AsmOperandClass {
let Name = "SDWADstUnused";
let PredicateMethod = "isSDWADstUnused";
let ParserMethod = "parseSDWADstUnused";
let RenderMethod = "addImmOperands";
let IsOptional = 1;
}
class OptionalImmAsmOperand <string OpName> : AsmOperandClass {
let Name = "Imm"#OpName;
let PredicateMethod = "isImm";
@ -737,11 +753,31 @@ def bound_ctrl : Operand <i1> {
let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">;
}
def dst_sel : Operand <i32> {
let PrintMethod = "printSDWADstSel";
let ParserMatchClass = SDWASelMatchClass;
}
def src0_sel : Operand <i32> {
let PrintMethod = "printSDWASrc0Sel";
let ParserMatchClass = SDWASelMatchClass;
}
def src1_sel : Operand <i32> {
let PrintMethod = "printSDWASrc1Sel";
let ParserMatchClass = SDWASelMatchClass;
}
def hwreg : Operand <i16> {
let PrintMethod = "printHwreg";
let ParserMatchClass = HwregMatchClass;
}
def dst_unused : Operand <i32> {
let PrintMethod = "printSDWADstUnused";
let ParserMatchClass = SDWADstUnusedMatchClass;
}
} // End OperandType = "OPERAND_IMMEDIATE"
@ -1316,16 +1352,11 @@ class getVOPSrc0ForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
}
// Returns the register class to use for source 1 of VOP[12C] for the
// given VT.
class getVOPSrc1ForVT<ValueType VT> {
// Returns the vreg register class to use for source operand given VT
class getVregSrcForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
}
// Returns the register class to use for DPP source operands.
class getDPPSrcForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
}
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
@ -1431,7 +1462,40 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
/* endif */)));
}
class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
bit HasModifiers> {
dag ret = !if (!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
(ins),
!if (!eq(NumSrcArgs, 1),
!if (!eq(HasModifiers, 1),
// VOP1_SDWA with modifiers
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel)
/* else */,
// VOP1_SDWA without modifiers
(ins Src0RC:$src0, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel)
/* endif */)
/* NumSrcArgs == 2 */,
!if (!eq(HasModifiers, 1),
// VOP2_SDWA with modifiers
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel)
/* else */,
// VOP2_DPP without modifiers
(ins Src0RC:$src0, Src1RC:$src1,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel)
/* endif */)));
}
// Outs for DPP and SDWA
class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
dag ret = !if(HasDst,
!if(!eq(DstVT.Size, 1),
(outs DstRCDPP:$sdst), // sdst for VOPC
@ -1484,20 +1548,41 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl";
}
class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
"$sdst",
"$vdst"),
""); // use $sdst for VOPC
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
" $src1_modifiers,"));
string args = !if(!eq(HasModifiers, 0),
getAsm32<0, NumSrcArgs, DstVT>.ret,
", "#src0#src1#", $clamp");
string sdwa = !if(!eq(NumSrcArgs, 0),
"",
!if(!eq(NumSrcArgs, 1),
" $dst_sel $dst_unused $src0_sel",
" $dst_sel $dst_unused $src0_sel $src1_sel"
)
);
string ret = dst#args#sdwa;
}
// Function that checks if instruction supports DPP and SDWA
class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
bit ret = !if(!eq(NumSrcArgs, 3),
0, // NumSrcArgs == 3 - No DPP for VOP3
!if(!eq(DstVT.Size, 1),
0, // No DPP for VOPC
!if(!eq(DstVT.Size, 64),
0, // 64-bit dst - No DPP for 64-bit operands
0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
!if(!eq(DstVT.Size, 64),
0, // 64-bit dst - No DPP or SDWA for 64-bit operands
!if(!eq(Src0VT.Size, 64),
0, // 64-bit src0
!if(!eq(Src0VT.Size, 64),
0, // 64-bit src0
!if(!eq(Src0VT.Size, 64),
0, // 64-bit src2
1
)
0, // 64-bit src2
1
)
)
)
@ -1514,41 +1599,47 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src2VT = ArgVT[3];
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
field RegisterClass Src0DPP = getDPPSrcForVT<Src0VT>.ret;
field RegisterClass Src1DPP = getDPPSrcForVT<Src1VT>.ret;
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
field bit HasDst32 = HasDst;
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
field bit HasModifiers = hasModifiers<Src0VT>.ret;
field bit HasDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
// VOP3b instructions are a special case with a second explicit
// output. This is manually overridden for them.
field dag Outs32 = Outs;
field dag Outs64 = Outs;
field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
}
class VOP_NO_DPP <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasDPP = 0;
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExt = 0;
}
// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
@ -1659,12 +1750,12 @@ def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm);
field string Asm32 = "$vdst, $src0, $src1, $imm";
field bit HasDPP = 0;
field bit HasExt = 0;
}
def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1);
field string Asm32 = "$vdst, $src0, $imm, $src1";
field bit HasDPP = 0;
field bit HasExt = 0;
}
def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
@ -1675,9 +1766,15 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0,
InputModsNoDefault:$src1_modifiers, Src1RC32:$src1,
VGPR_32:$src2, // stub argument
ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, f32>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@ -1787,13 +1884,37 @@ multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
VOP1_DPPe <op.VI>,
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "DPP";
let DisableDecoder = DisableVIDecoder;
let src0_modifiers = !if(p.HasModifiers, ?, 0);
let src1_modifiers = 0;
}
class SDWADisableFields <VOPProfile p> {
bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
bits<3> src0_modifiers = !if(p.HasModifiers, ?, 0);
bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
!if(!eq(p.NumSrcArgs, 1), 6,
?));
bits<3> src1_modifiers = !if(!eq(p.NumSrcArgs, 0), 0,
!if(!eq(p.NumSrcArgs, 1), 0,
!if(p.HasModifiers, ?, 0)));
bits<3> dst_sel = !if(p.HasDst, ?, 6);
bits<2> dst_unused = !if(p.HasDst, ?, 0);
bits<1> clamp = !if(p.HasModifiers, ?, 0);
}
class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
VOP1_SDWAe <op.VI>,
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
SDWADisableFields <p> {
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "SDWA";
let DisableDecoder = DisableVIDecoder;
}
multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
string asm = opName#p.Asm32> {
@ -1851,13 +1972,22 @@ multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
VOP2_DPPe <op.VI>,
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "DPP";
let DisableDecoder = DisableVIDecoder;
let src0_modifiers = !if(p.HasModifiers, ?, 0);
let src1_modifiers = !if(p.HasModifiers, ?, 0);
}
class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
VOP2_SDWAe <op.VI>,
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
SDWADisableFields <p> {
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "SDWA";
let DisableDecoder = DisableVIDecoder;
}
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
@ -2089,6 +2219,8 @@ multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
p.HasModifiers>;
def _dpp : VOP1_DPP <op, opName, p>;
def _sdwa : VOP1_SDWA <op, opName, p>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@ -2122,6 +2254,8 @@ multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
revOp, p.HasModifiers>;
def _dpp : VOP2_DPP <op, opName, p>;
def _sdwa : VOP2_SDWA <op, opName, p>;
}
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,

View File

@ -1394,13 +1394,13 @@ defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
VOP_F32_F32, int_amdgcn_frexp_mant
>;
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_DPP<VOP_NONE>>;
defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
}
let Uses = [M0, EXEC] in {
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_DPP<VOP_I32_I32>>;
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_DPP<VOP_I32_I32>>;
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_DPP<VOP_I32_I32>>;
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_EXT<VOP_I32_I32>>;
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_EXT<VOP_I32_I32>>;
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
} // End Uses = [M0, EXEC]
// These instruction only exist on SI and CI

View File

@ -225,6 +225,61 @@ class VOP2_DPPe <bits<6> op> : VOP_DPPe {
let Inst{31} = 0x0; //encoding
}
class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
VOPAnyCommon <outs, ins, asm, pattern> {
let SDWA = 1;
let Size = 8;
}
class VOP_SDWAe : Enc64 {
bits<8> src0;
bits<3> src0_sel;
bits<3> src0_modifiers; // {abs,neg,sext}
bits<3> src1_sel;
bits<3> src1_modifiers;
bits<3> dst_sel;
bits<2> dst_unused;
bits<1> clamp;
let Inst{39-32} = src0;
let Inst{42-40} = dst_sel;
let Inst{44-43} = dst_unused;
let Inst{45} = clamp;
let Inst{50-48} = src0_sel;
let Inst{53-51} = src0_modifiers;
let Inst{58-56} = src1_sel;
let Inst{61-59} = src1_modifiers;
}
class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
bits<8> vdst;
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = op;
let Inst{24-17} = vdst;
let Inst{31-25} = 0x3f; // encoding
}
class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
bits<8> vdst;
bits<8> src1;
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = src1;
let Inst{24-17} = vdst;
let Inst{30-25} = op;
let Inst{31} = 0x0; // encoding
}
class VOPC_SDWAe <bits<8> op> : VOP_SDWAe {
bits<8> src1;
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = src1;
let Inst{24-17} = op;
let Inst{31-25} = 0x3e; // encoding
}
class EXPe_vi : EXPe {
let Inst{31-26} = 0x31; //encoding
}

View File

@ -0,0 +1,40 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
// ToDo: converters
// ToDo: VOPC
// ToDo: VOP2b (see vop_dpp.s)
// ToDo: V_MAC_F32 (see vop_dpp.s)
// ToDo: sext()
// ToDo: intrinsics
// NOSICI: error:
// VI: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x06]
v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD
// NOSICI: error:
// VI: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x06]
v_mov_b32 v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
// NOSICI: error:
// VI: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x06]
v_mov_b32 v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0
// NOSICI: error:
// VI: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02]
v_min_u32 v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2
// NOSICI: error:
// VI: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05]
v_min_u32 v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1
// NOSICI: error:
// VI: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06]
v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
// NOSICI: error:
// VI: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06]
v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD