forked from OSchip/llvm-project
[AMDGPU] Assembler: basic support for SDWA instructions
Support for SDWA instructions for VOP1 and VOP2 encoding. Not done yet: - converters for support optional operands and modifiers - VOPC - sext() modifier - intrinsics - VOP2b (see vop_dpp.s) - V_MAC_F32 (see vop_dpp.s) Differential Revision: http://reviews.llvm.org/D19360 llvm-svn: 267553
This commit is contained in:
parent
323ab3975b
commit
3025e7f25f
|
@ -74,6 +74,8 @@ public:
|
|||
ImmTyDppRowMask,
|
||||
ImmTyDppBankMask,
|
||||
ImmTyDppBoundCtrl,
|
||||
ImmTySdwaSel,
|
||||
ImmTySdwaDstUnused,
|
||||
ImmTyDMask,
|
||||
ImmTyUNorm,
|
||||
ImmTyDA,
|
||||
|
@ -253,6 +255,14 @@ public:
|
|||
return isImmTy(ImmTyDppBoundCtrl);
|
||||
}
|
||||
|
||||
bool isSDWASel() const {
|
||||
return isImmTy(ImmTySdwaSel);
|
||||
}
|
||||
|
||||
bool isSDWADstUnused() const {
|
||||
return isImmTy(ImmTySdwaDstUnused);
|
||||
}
|
||||
|
||||
void setModifiers(unsigned Mods) {
|
||||
assert(isReg() || (isImm() && Imm.Modifiers == 0));
|
||||
if (isReg())
|
||||
|
@ -522,6 +532,7 @@ public:
|
|||
OperandMatchResultTy parseOptionalOps(
|
||||
const ArrayRef<OptionalOperand> &OptionalOps,
|
||||
OperandVector &Operands);
|
||||
OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value);
|
||||
|
||||
|
||||
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
|
||||
|
@ -569,6 +580,9 @@ public:
|
|||
void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods);
|
||||
|
||||
OperandMatchResultTy parseSDWASel(OperandVector &Operands);
|
||||
OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
|
||||
};
|
||||
|
||||
struct OptionalOperand {
|
||||
|
@ -1396,6 +1410,30 @@ AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
|
|||
return MatchOperand_NoMatch;
|
||||
}
|
||||
|
||||
AMDGPUAsmParser::OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseStringWithPrefix(const char *Prefix, StringRef &Value) {
|
||||
if (getLexer().isNot(AsmToken::Identifier)) {
|
||||
return MatchOperand_NoMatch;
|
||||
}
|
||||
StringRef Tok = Parser.getTok().getString();
|
||||
if (Tok != Prefix) {
|
||||
return MatchOperand_NoMatch;
|
||||
}
|
||||
|
||||
Parser.Lex();
|
||||
if (getLexer().isNot(AsmToken::Colon)) {
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
Parser.Lex();
|
||||
if (getLexer().isNot(AsmToken::Identifier)) {
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
Value = Parser.getTok().getString();
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ds
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2296,6 +2334,80 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands,
|
|||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// sdwa
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
AMDGPUAsmParser::OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) {
|
||||
SMLoc S = Parser.getTok().getLoc();
|
||||
StringRef Value;
|
||||
AMDGPUAsmParser::OperandMatchResultTy res;
|
||||
|
||||
res = parseStringWithPrefix("dst_sel", Value);
|
||||
if (res == MatchOperand_ParseFail) {
|
||||
return MatchOperand_ParseFail;
|
||||
} else if (res == MatchOperand_NoMatch) {
|
||||
res = parseStringWithPrefix("src0_sel", Value);
|
||||
if (res == MatchOperand_ParseFail) {
|
||||
return MatchOperand_ParseFail;
|
||||
} else if (res == MatchOperand_NoMatch) {
|
||||
res = parseStringWithPrefix("src1_sel", Value);
|
||||
if (res != MatchOperand_Success) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int64_t Int;
|
||||
Int = StringSwitch<int64_t>(Value)
|
||||
.Case("BYTE_0", 0)
|
||||
.Case("BYTE_1", 1)
|
||||
.Case("BYTE_2", 2)
|
||||
.Case("BYTE_3", 3)
|
||||
.Case("WORD_0", 4)
|
||||
.Case("WORD_1", 5)
|
||||
.Case("DWORD", 6)
|
||||
.Default(0xffffffff);
|
||||
Parser.Lex(); // eat last token
|
||||
|
||||
if (Int == 0xffffffff) {
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
|
||||
AMDGPUOperand::ImmTySdwaSel));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
AMDGPUAsmParser::OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
|
||||
SMLoc S = Parser.getTok().getLoc();
|
||||
StringRef Value;
|
||||
AMDGPUAsmParser::OperandMatchResultTy res;
|
||||
|
||||
res = parseStringWithPrefix("dst_unused", Value);
|
||||
if (res != MatchOperand_Success) {
|
||||
return res;
|
||||
}
|
||||
|
||||
int64_t Int;
|
||||
Int = StringSwitch<int64_t>(Value)
|
||||
.Case("UNUSED_PAD", 0)
|
||||
.Case("UNUSED_SEXT", 1)
|
||||
.Case("UNUSED_PRESERVE", 2)
|
||||
.Default(0xffffffff);
|
||||
Parser.Lex(); // eat last token
|
||||
|
||||
if (Int == 0xffffffff) {
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
|
||||
AMDGPUOperand::ImmTySdwaDstUnused));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
|
||||
/// Force static initialization.
|
||||
extern "C" void LLVMInitializeAMDGPUAsmParser() {
|
||||
|
|
|
@ -282,6 +282,8 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
|
|||
O << "_e64 ";
|
||||
else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
|
||||
O << "_dpp ";
|
||||
else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
|
||||
O << "_sdwa ";
|
||||
else
|
||||
O << "_e32 ";
|
||||
|
||||
|
@ -479,6 +481,51 @@ void AMDGPUInstPrinter::printBoundCtrlOperand(const MCInst *MI, unsigned OpNo,
|
|||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
unsigned Imm = MI->getOperand(OpNo).getImm();
|
||||
switch (Imm) {
|
||||
case 0: O << "BYTE_0"; break;
|
||||
case 1: O << "BYTE_1"; break;
|
||||
case 2: O << "BYTE_2"; break;
|
||||
case 3: O << "BYTE_3"; break;
|
||||
case 4: O << "WORD_0"; break;
|
||||
case 5: O << "WORD_1"; break;
|
||||
case 6: O << "DWORD"; break;
|
||||
default: llvm_unreachable("Invalid SDWA data select operand");
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
O << "dst_sel:";
|
||||
printSDWASel(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
O << "src0_sel:";
|
||||
printSDWASel(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
O << "src1_sel:";
|
||||
printSDWASel(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
O << "dst_unused:";
|
||||
unsigned Imm = MI->getOperand(OpNo).getImm();
|
||||
switch (Imm) {
|
||||
case 0: O << "UNUSED_PAD"; break;
|
||||
case 1: O << "UNUSED_SEXT"; break;
|
||||
case 2: O << "UNUSED_PRESERVE"; break;
|
||||
default: llvm_unreachable("Invalid SDWA dest_unused operand");
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O) {
|
||||
unsigned Imm = MI->getOperand(OpNum).getImm();
|
||||
|
|
|
@ -67,6 +67,11 @@ private:
|
|||
void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
|
||||
|
|
|
@ -29,17 +29,18 @@ enum {
|
|||
VOP2 = 1 << 11,
|
||||
VOP3 = 1 << 12,
|
||||
VOPC = 1 << 13,
|
||||
DPP = 1 << 14,
|
||||
SDWA = 1 << 14,
|
||||
DPP = 1 << 15,
|
||||
|
||||
MUBUF = 1 << 15,
|
||||
MTBUF = 1 << 16,
|
||||
SMRD = 1 << 17,
|
||||
DS = 1 << 18,
|
||||
MIMG = 1 << 19,
|
||||
FLAT = 1 << 20,
|
||||
WQM = 1 << 21,
|
||||
VGPRSpill = 1 << 22,
|
||||
VOPAsmPrefer32Bit = 1 << 23
|
||||
MUBUF = 1 << 16,
|
||||
MTBUF = 1 << 17,
|
||||
SMRD = 1 << 18,
|
||||
DS = 1 << 19,
|
||||
MIMG = 1 << 20,
|
||||
FLAT = 1 << 21,
|
||||
WQM = 1 << 22,
|
||||
VGPRSpill = 1 << 23,
|
||||
VOPAsmPrefer32Bit = 1 << 24
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
field bits<1> VOP2 = 0;
|
||||
field bits<1> VOP3 = 0;
|
||||
field bits<1> VOPC = 0;
|
||||
field bits<1> SDWA = 0;
|
||||
field bits<1> DPP = 0;
|
||||
|
||||
field bits<1> MUBUF = 0;
|
||||
|
@ -64,17 +65,18 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
let TSFlags{11} = VOP2;
|
||||
let TSFlags{12} = VOP3;
|
||||
let TSFlags{13} = VOPC;
|
||||
let TSFlags{14} = DPP;
|
||||
let TSFlags{14} = SDWA;
|
||||
let TSFlags{15} = DPP;
|
||||
|
||||
let TSFlags{15} = MUBUF;
|
||||
let TSFlags{16} = MTBUF;
|
||||
let TSFlags{17} = SMRD;
|
||||
let TSFlags{18} = DS;
|
||||
let TSFlags{19} = MIMG;
|
||||
let TSFlags{20} = FLAT;
|
||||
let TSFlags{21} = WQM;
|
||||
let TSFlags{22} = VGPRSpill;
|
||||
let TSFlags{23} = VOPAsmPrefer32Bit;
|
||||
let TSFlags{16} = MUBUF;
|
||||
let TSFlags{17} = MTBUF;
|
||||
let TSFlags{18} = SMRD;
|
||||
let TSFlags{19} = DS;
|
||||
let TSFlags{20} = MIMG;
|
||||
let TSFlags{21} = FLAT;
|
||||
let TSFlags{22} = WQM;
|
||||
let TSFlags{23} = VGPRSpill;
|
||||
let TSFlags{24} = VOPAsmPrefer32Bit;
|
||||
|
||||
let SchedRW = [Write32Bit];
|
||||
|
||||
|
|
|
@ -576,6 +576,22 @@ class DPPOptionalMatchClass <string OpName>: AsmOperandClass {
|
|||
let IsOptional = 1;
|
||||
}
|
||||
|
||||
def SDWASelMatchClass : AsmOperandClass {
|
||||
let Name = "SDWASel";
|
||||
let PredicateMethod = "isSDWASel";
|
||||
let ParserMethod = "parseSDWASel";
|
||||
let RenderMethod = "addImmOperands";
|
||||
let IsOptional = 1;
|
||||
}
|
||||
|
||||
def SDWADstUnusedMatchClass : AsmOperandClass {
|
||||
let Name = "SDWADstUnused";
|
||||
let PredicateMethod = "isSDWADstUnused";
|
||||
let ParserMethod = "parseSDWADstUnused";
|
||||
let RenderMethod = "addImmOperands";
|
||||
let IsOptional = 1;
|
||||
}
|
||||
|
||||
class OptionalImmAsmOperand <string OpName> : AsmOperandClass {
|
||||
let Name = "Imm"#OpName;
|
||||
let PredicateMethod = "isImm";
|
||||
|
@ -737,11 +753,31 @@ def bound_ctrl : Operand <i1> {
|
|||
let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">;
|
||||
}
|
||||
|
||||
def dst_sel : Operand <i32> {
|
||||
let PrintMethod = "printSDWADstSel";
|
||||
let ParserMatchClass = SDWASelMatchClass;
|
||||
}
|
||||
|
||||
def src0_sel : Operand <i32> {
|
||||
let PrintMethod = "printSDWASrc0Sel";
|
||||
let ParserMatchClass = SDWASelMatchClass;
|
||||
}
|
||||
|
||||
def src1_sel : Operand <i32> {
|
||||
let PrintMethod = "printSDWASrc1Sel";
|
||||
let ParserMatchClass = SDWASelMatchClass;
|
||||
}
|
||||
|
||||
def hwreg : Operand <i16> {
|
||||
let PrintMethod = "printHwreg";
|
||||
let ParserMatchClass = HwregMatchClass;
|
||||
}
|
||||
|
||||
def dst_unused : Operand <i32> {
|
||||
let PrintMethod = "printSDWADstUnused";
|
||||
let ParserMatchClass = SDWADstUnusedMatchClass;
|
||||
}
|
||||
|
||||
} // End OperandType = "OPERAND_IMMEDIATE"
|
||||
|
||||
|
||||
|
@ -1316,16 +1352,11 @@ class getVOPSrc0ForVT<ValueType VT> {
|
|||
RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
|
||||
}
|
||||
|
||||
// Returns the register class to use for source 1 of VOP[12C] for the
|
||||
// given VT.
|
||||
class getVOPSrc1ForVT<ValueType VT> {
|
||||
// Returns the vreg register class to use for source operand given VT
|
||||
class getVregSrcForVT<ValueType VT> {
|
||||
RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
|
||||
}
|
||||
|
||||
// Returns the register class to use for DPP source operands.
|
||||
class getDPPSrcForVT<ValueType VT> {
|
||||
RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
|
||||
}
|
||||
|
||||
// Returns the register class to use for sources of VOP3 instructions for the
|
||||
// given VT.
|
||||
|
@ -1431,7 +1462,40 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
|||
/* endif */)));
|
||||
}
|
||||
|
||||
class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
|
||||
class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
||||
bit HasModifiers> {
|
||||
|
||||
dag ret = !if (!eq(NumSrcArgs, 0),
|
||||
// VOP1 without input operands (V_NOP)
|
||||
(ins),
|
||||
!if (!eq(NumSrcArgs, 1),
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP1_SDWA with modifiers
|
||||
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
|
||||
ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel)
|
||||
/* else */,
|
||||
// VOP1_SDWA without modifiers
|
||||
(ins Src0RC:$src0, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel)
|
||||
/* endif */)
|
||||
/* NumSrcArgs == 2 */,
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP2_SDWA with modifiers
|
||||
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
|
||||
InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
|
||||
ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel)
|
||||
/* else */,
|
||||
// VOP2_DPP without modifiers
|
||||
(ins Src0RC:$src0, Src1RC:$src1,
|
||||
dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel)
|
||||
/* endif */)));
|
||||
}
|
||||
|
||||
// Outs for DPP and SDWA
|
||||
class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
|
||||
dag ret = !if(HasDst,
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
(outs DstRCDPP:$sdst), // sdst for VOPC
|
||||
|
@ -1484,20 +1548,41 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
|
|||
string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl";
|
||||
}
|
||||
|
||||
class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
|
||||
class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
||||
string dst = !if(HasDst,
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
"$sdst",
|
||||
"$vdst"),
|
||||
""); // use $sdst for VOPC
|
||||
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
||||
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
||||
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
||||
" $src1_modifiers,"));
|
||||
string args = !if(!eq(HasModifiers, 0),
|
||||
getAsm32<0, NumSrcArgs, DstVT>.ret,
|
||||
", "#src0#src1#", $clamp");
|
||||
string sdwa = !if(!eq(NumSrcArgs, 0),
|
||||
"",
|
||||
!if(!eq(NumSrcArgs, 1),
|
||||
" $dst_sel $dst_unused $src0_sel",
|
||||
" $dst_sel $dst_unused $src0_sel $src1_sel"
|
||||
)
|
||||
);
|
||||
string ret = dst#args#sdwa;
|
||||
}
|
||||
|
||||
// Function that checks if instruction supports DPP and SDWA
|
||||
class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
|
||||
ValueType Src1VT = i32> {
|
||||
bit ret = !if(!eq(NumSrcArgs, 3),
|
||||
0, // NumSrcArgs == 3 - No DPP for VOP3
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
0, // No DPP for VOPC
|
||||
!if(!eq(DstVT.Size, 64),
|
||||
0, // 64-bit dst - No DPP for 64-bit operands
|
||||
0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
|
||||
!if(!eq(DstVT.Size, 64),
|
||||
0, // 64-bit dst - No DPP or SDWA for 64-bit operands
|
||||
!if(!eq(Src0VT.Size, 64),
|
||||
0, // 64-bit src0
|
||||
!if(!eq(Src0VT.Size, 64),
|
||||
0, // 64-bit src0
|
||||
!if(!eq(Src0VT.Size, 64),
|
||||
0, // 64-bit src2
|
||||
1
|
||||
)
|
||||
0, // 64-bit src2
|
||||
1
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -1514,41 +1599,47 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
|||
field ValueType Src2VT = ArgVT[3];
|
||||
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
|
||||
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
|
||||
field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
|
||||
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
|
||||
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
|
||||
field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
|
||||
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
|
||||
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
|
||||
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
|
||||
field RegisterClass Src0DPP = getDPPSrcForVT<Src0VT>.ret;
|
||||
field RegisterClass Src1DPP = getDPPSrcForVT<Src1VT>.ret;
|
||||
|
||||
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
|
||||
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
|
||||
field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
|
||||
field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
|
||||
|
||||
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
|
||||
field bit HasDst32 = HasDst;
|
||||
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
|
||||
field bit HasModifiers = hasModifiers<Src0VT>.ret;
|
||||
|
||||
field bit HasDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
|
||||
|
||||
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
|
||||
|
||||
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
|
||||
|
||||
// VOP3b instructions are a special case with a second explicit
|
||||
// output. This is manually overridden for them.
|
||||
field dag Outs32 = Outs;
|
||||
field dag Outs64 = Outs;
|
||||
field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
|
||||
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
|
||||
field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
|
||||
|
||||
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
|
||||
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
|
||||
HasModifiers>.ret;
|
||||
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret;
|
||||
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers>.ret;
|
||||
|
||||
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
|
||||
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
}
|
||||
|
||||
class VOP_NO_DPP <VOPProfile p> : VOPProfile <p.ArgVT> {
|
||||
let HasDPP = 0;
|
||||
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
|
||||
let HasExt = 0;
|
||||
}
|
||||
|
||||
// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
|
||||
|
@ -1659,12 +1750,12 @@ def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
|
|||
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
|
||||
field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm);
|
||||
field string Asm32 = "$vdst, $src0, $src1, $imm";
|
||||
field bit HasDPP = 0;
|
||||
field bit HasExt = 0;
|
||||
}
|
||||
def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
|
||||
field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1);
|
||||
field string Asm32 = "$vdst, $src0, $imm, $src1";
|
||||
field bit HasDPP = 0;
|
||||
field bit HasExt = 0;
|
||||
}
|
||||
def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
|
||||
|
@ -1675,9 +1766,15 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
|
|||
VGPR_32:$src2, // stub argument
|
||||
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0,
|
||||
InputModsNoDefault:$src1_modifiers, Src1RC32:$src1,
|
||||
VGPR_32:$src2, // stub argument
|
||||
ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
let Asm32 = getAsm32<1, 2, f32>.ret;
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
|
||||
}
|
||||
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
|
||||
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
|
||||
|
@ -1787,13 +1884,37 @@ multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
|
|||
class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
|
||||
VOP1_DPPe <op.VI>,
|
||||
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
|
||||
let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let DecoderNamespace = "DPP";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
let src0_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
let src1_modifiers = 0;
|
||||
}
|
||||
|
||||
class SDWADisableFields <VOPProfile p> {
|
||||
bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
|
||||
bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
|
||||
bits<3> src0_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
|
||||
!if(!eq(p.NumSrcArgs, 1), 6,
|
||||
?));
|
||||
bits<3> src1_modifiers = !if(!eq(p.NumSrcArgs, 0), 0,
|
||||
!if(!eq(p.NumSrcArgs, 1), 0,
|
||||
!if(p.HasModifiers, ?, 0)));
|
||||
bits<3> dst_sel = !if(p.HasDst, ?, 6);
|
||||
bits<2> dst_unused = !if(p.HasDst, ?, 0);
|
||||
bits<1> clamp = !if(p.HasModifiers, ?, 0);
|
||||
}
|
||||
|
||||
class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
|
||||
VOP1_SDWAe <op.VI>,
|
||||
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
|
||||
SDWADisableFields <p> {
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let DecoderNamespace = "SDWA";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
|
||||
string asm = opName#p.Asm32> {
|
||||
|
||||
|
@ -1851,13 +1972,22 @@ multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
|
|||
class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
|
||||
VOP2_DPPe <op.VI>,
|
||||
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
|
||||
let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let DecoderNamespace = "DPP";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
let src0_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
let src1_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
}
|
||||
|
||||
class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
|
||||
VOP2_SDWAe <op.VI>,
|
||||
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
|
||||
SDWADisableFields <p> {
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let DecoderNamespace = "SDWA";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
|
||||
|
||||
bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
|
||||
|
@ -2089,6 +2219,8 @@ multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
|
|||
p.HasModifiers>;
|
||||
|
||||
def _dpp : VOP1_DPP <op, opName, p>;
|
||||
|
||||
def _sdwa : VOP1_SDWA <op, opName, p>;
|
||||
}
|
||||
|
||||
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
|
||||
|
@ -2122,6 +2254,8 @@ multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
|
|||
revOp, p.HasModifiers>;
|
||||
|
||||
def _dpp : VOP2_DPP <op, opName, p>;
|
||||
|
||||
def _sdwa : VOP2_SDWA <op, opName, p>;
|
||||
}
|
||||
|
||||
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
|
||||
|
|
|
@ -1394,13 +1394,13 @@ defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
|
|||
VOP_F32_F32, int_amdgcn_frexp_mant
|
||||
>;
|
||||
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
|
||||
defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_DPP<VOP_NONE>>;
|
||||
defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
|
||||
}
|
||||
|
||||
let Uses = [M0, EXEC] in {
|
||||
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_DPP<VOP_I32_I32>>;
|
||||
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_DPP<VOP_I32_I32>>;
|
||||
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_DPP<VOP_I32_I32>>;
|
||||
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
||||
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
||||
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
// These instruction only exist on SI and CI
|
||||
|
|
|
@ -225,6 +225,61 @@ class VOP2_DPPe <bits<6> op> : VOP_DPPe {
|
|||
let Inst{31} = 0x0; //encoding
|
||||
}
|
||||
|
||||
class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
let SDWA = 1;
|
||||
let Size = 8;
|
||||
}
|
||||
|
||||
class VOP_SDWAe : Enc64 {
|
||||
bits<8> src0;
|
||||
bits<3> src0_sel;
|
||||
bits<3> src0_modifiers; // {abs,neg,sext}
|
||||
bits<3> src1_sel;
|
||||
bits<3> src1_modifiers;
|
||||
bits<3> dst_sel;
|
||||
bits<2> dst_unused;
|
||||
bits<1> clamp;
|
||||
|
||||
let Inst{39-32} = src0;
|
||||
let Inst{42-40} = dst_sel;
|
||||
let Inst{44-43} = dst_unused;
|
||||
let Inst{45} = clamp;
|
||||
let Inst{50-48} = src0_sel;
|
||||
let Inst{53-51} = src0_modifiers;
|
||||
let Inst{58-56} = src1_sel;
|
||||
let Inst{61-59} = src1_modifiers;
|
||||
}
|
||||
|
||||
class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
|
||||
bits<8> vdst;
|
||||
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{31-25} = 0x3f; // encoding
|
||||
}
|
||||
|
||||
class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
|
||||
bits<8> vdst;
|
||||
bits<8> src1;
|
||||
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; // encoding
|
||||
}
|
||||
|
||||
class VOPC_SDWAe <bits<8> op> : VOP_SDWAe {
|
||||
bits<8> src1;
|
||||
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = op;
|
||||
let Inst{31-25} = 0x3e; // encoding
|
||||
}
|
||||
|
||||
class EXPe_vi : EXPe {
|
||||
let Inst{31-26} = 0x31; //encoding
|
||||
}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI
|
||||
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
|
||||
|
||||
// ToDo: converters
|
||||
// ToDo: VOPC
|
||||
// ToDo: VOP2b (see vop_dpp.s)
|
||||
// ToDo: V_MAC_F32 (see vop_dpp.s)
|
||||
// ToDo: sext()
|
||||
// ToDo: intrinsics
|
||||
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x06]
|
||||
v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x06]
|
||||
v_mov_b32 v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x06]
|
||||
v_mov_b32 v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02]
|
||||
v_min_u32 v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05]
|
||||
v_min_u32 v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06]
|
||||
v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06]
|
||||
v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
Loading…
Reference in New Issue