forked from OSchip/llvm-project
[AMDGPU] Add intrinsics for tbuffer load and store
Intrinsic already existed for llvm.SI.tbuffer.store Needed tbuffer.load and also re-implementing the intrinsic as llvm.amdgcn.tbuffer.* Added CodeGen tests for the 2 new variants added. Left the original llvm.SI.tbuffer.store implementation to avoid issues with existing code Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, tpr Differential Revision: https://reviews.llvm.org/D30687 llvm-svn: 306031
This commit is contained in:
parent
9bdb460f64
commit
70e8bc1bf3
|
@ -475,6 +475,33 @@ class AMDGPUBufferStore : Intrinsic <
|
|||
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
|
||||
def int_amdgcn_buffer_store : AMDGPUBufferStore;
|
||||
|
||||
def int_amdgcn_tbuffer_load : Intrinsic <
|
||||
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
|
||||
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // voffset(VGPR)
|
||||
llvm_i32_ty, // soffset(SGPR)
|
||||
llvm_i32_ty, // offset(imm)
|
||||
llvm_i32_ty, // dfmt(imm)
|
||||
llvm_i32_ty, // nfmt(imm)
|
||||
llvm_i1_ty, // glc(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
|
||||
def int_amdgcn_tbuffer_store : Intrinsic <
|
||||
[],
|
||||
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
|
||||
llvm_v4i32_ty, // rsrc(SGPR)
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // voffset(VGPR)
|
||||
llvm_i32_ty, // soffset(SGPR)
|
||||
llvm_i32_ty, // offset(imm)
|
||||
llvm_i32_ty, // dfmt(imm)
|
||||
llvm_i32_ty, // nfmt(imm)
|
||||
llvm_i1_ty, // glc(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
|
||||
class AMDGPUBufferAtomic : Intrinsic <
|
||||
[llvm_i32_ty],
|
||||
[llvm_i32_ty, // vdata(VGPR)
|
||||
|
|
|
@ -3664,6 +3664,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(STORE_MSKOR)
|
||||
NODE_NAME_CASE(LOAD_CONSTANT)
|
||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
|
||||
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
|
||||
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
|
||||
NODE_NAME_CASE(ATOMIC_INC)
|
||||
NODE_NAME_CASE(ATOMIC_DEC)
|
||||
|
|
|
@ -403,6 +403,8 @@ enum NodeType : unsigned {
|
|||
STORE_MSKOR,
|
||||
LOAD_CONSTANT,
|
||||
TBUFFER_STORE_FORMAT,
|
||||
TBUFFER_STORE_FORMAT_X3,
|
||||
TBUFFER_LOAD_FORMAT,
|
||||
ATOMIC_CMP_SWAP,
|
||||
ATOMIC_INC,
|
||||
ATOMIC_DEC,
|
||||
|
|
|
@ -152,6 +152,8 @@ public:
|
|||
ImmTyExpTgt,
|
||||
ImmTyExpCompr,
|
||||
ImmTyExpVM,
|
||||
ImmTyDFMT,
|
||||
ImmTyNFMT,
|
||||
ImmTyHwreg,
|
||||
ImmTyOff,
|
||||
ImmTySendMsg,
|
||||
|
@ -294,6 +296,8 @@ public:
|
|||
bool isGLC() const { return isImmTy(ImmTyGLC); }
|
||||
bool isSLC() const { return isImmTy(ImmTySLC); }
|
||||
bool isTFE() const { return isImmTy(ImmTyTFE); }
|
||||
bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
|
||||
bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
|
||||
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
|
||||
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
|
||||
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
|
||||
|
@ -638,6 +642,8 @@ public:
|
|||
case ImmTyGLC: OS << "GLC"; break;
|
||||
case ImmTySLC: OS << "SLC"; break;
|
||||
case ImmTyTFE: OS << "TFE"; break;
|
||||
case ImmTyDFMT: OS << "DFMT"; break;
|
||||
case ImmTyNFMT: OS << "NFMT"; break;
|
||||
case ImmTyClampSI: OS << "ClampSI"; break;
|
||||
case ImmTyOModSI: OS << "OModSI"; break;
|
||||
case ImmTyDppCtrl: OS << "DppCtrl"; break;
|
||||
|
@ -1033,6 +1039,8 @@ public:
|
|||
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
|
||||
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
|
||||
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
|
||||
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
AMDGPUOperand::Ptr defaultGLC() const;
|
||||
AMDGPUOperand::Ptr defaultSLC() const;
|
||||
AMDGPUOperand::Ptr defaultTFE() const;
|
||||
|
@ -3820,6 +3828,44 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
|
|||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
|
||||
OptionalImmIndexMap OptionalIdx;
|
||||
|
||||
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
|
||||
|
||||
// Add the register arguments
|
||||
if (Op.isReg()) {
|
||||
Op.addRegOperands(Inst, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle the case where soffset is an immediate
|
||||
if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
|
||||
Op.addImmOperands(Inst, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle tokens like 'offen' which are sometimes hard-coded into the
|
||||
// asm string. There are no MCInst operands for these.
|
||||
if (Op.isToken()) {
|
||||
continue;
|
||||
}
|
||||
assert(Op.isImm());
|
||||
|
||||
// Handle optional arguments
|
||||
OptionalIdx[Op.getImmTy()] = i;
|
||||
}
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTyOffset);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// mimg
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -4000,6 +4046,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
|
|||
{"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
|
||||
{"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
|
||||
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
|
||||
{"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
|
||||
{"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
|
||||
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
|
||||
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
|
||||
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
|
||||
|
|
|
@ -57,6 +57,11 @@ class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
|
|||
string OpName = NAME # suffix;
|
||||
}
|
||||
|
||||
class MTBUFAddr64Table <bit is_addr64, string suffix = ""> {
|
||||
bit IsAddr64 = is_addr64;
|
||||
string OpName = NAME # suffix;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -78,14 +83,31 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
|
|||
let EXP_CNT = 1;
|
||||
let MTBUF = 1;
|
||||
let Uses = [EXEC];
|
||||
|
||||
let hasSideEffects = 0;
|
||||
let SchedRW = [WriteVMEM];
|
||||
|
||||
let AsmMatchConverter = "cvtMtbuf";
|
||||
|
||||
bits<1> offen = 0;
|
||||
bits<1> idxen = 0;
|
||||
bits<1> addr64 = 0;
|
||||
bits<1> has_vdata = 1;
|
||||
bits<1> has_vaddr = 1;
|
||||
bits<1> has_glc = 1;
|
||||
bits<1> glc_value = 0; // the value for glc if no such operand
|
||||
bits<4> dfmt_value = 1; // the value for dfmt if no such operand
|
||||
bits<3> nfmt_value = 0; // the value for nfmt if no such operand
|
||||
bits<1> has_srsrc = 1;
|
||||
bits<1> has_soffset = 1;
|
||||
bits<1> has_offset = 1;
|
||||
bits<1> has_slc = 1;
|
||||
bits<1> has_tfe = 1;
|
||||
bits<1> has_dfmt = 1;
|
||||
bits<1> has_nfmt = 1;
|
||||
}
|
||||
|
||||
class MTBUF_Real <MTBUF_Pseudo ps> :
|
||||
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
||||
Enc64 {
|
||||
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
|
||||
|
||||
let isPseudo = 0;
|
||||
let isCodeGenOnly = 0;
|
||||
|
@ -97,57 +119,168 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
|
|||
let DisableEncoding = ps.DisableEncoding;
|
||||
let TSFlags = ps.TSFlags;
|
||||
|
||||
bits<8> vdata;
|
||||
bits<12> offset;
|
||||
bits<1> offen;
|
||||
bits<1> idxen;
|
||||
bits<1> glc;
|
||||
bits<1> addr64;
|
||||
bits<4> dfmt;
|
||||
bits<3> nfmt;
|
||||
bits<8> vaddr;
|
||||
bits<7> srsrc;
|
||||
bits<1> slc;
|
||||
bits<1> tfe;
|
||||
bits<8> soffset;
|
||||
|
||||
let Inst{11-0} = offset;
|
||||
let Inst{12} = offen;
|
||||
let Inst{13} = idxen;
|
||||
let Inst{14} = glc;
|
||||
let Inst{22-19} = dfmt;
|
||||
let Inst{25-23} = nfmt;
|
||||
let Inst{31-26} = 0x3a; //encoding
|
||||
let Inst{39-32} = vaddr;
|
||||
let Inst{47-40} = vdata;
|
||||
let Inst{52-48} = srsrc{6-2};
|
||||
let Inst{54} = slc;
|
||||
let Inst{55} = tfe;
|
||||
let Inst{63-56} = soffset;
|
||||
bits<1> glc;
|
||||
bits<4> dfmt;
|
||||
bits<3> nfmt;
|
||||
bits<8> vaddr;
|
||||
bits<8> vdata;
|
||||
bits<7> srsrc;
|
||||
bits<1> slc;
|
||||
bits<1> tfe;
|
||||
bits<8> soffset;
|
||||
}
|
||||
|
||||
class MTBUF_Load_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
|
||||
opName, (outs regClass:$dst),
|
||||
(ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
|
||||
i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
|
||||
i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
|
||||
" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
|
||||
" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
|
||||
class getMTBUFInsDA<list<RegisterClass> vdataList,
|
||||
list<RegisterClass> vaddrList=[]> {
|
||||
RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
|
||||
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
|
||||
dag InsNoData = !if(!empty(vaddrList),
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe),
|
||||
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe)
|
||||
);
|
||||
dag InsData = !if(!empty(vaddrList),
|
||||
(ins vdataClass:$vdata, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
|
||||
slc:$slc, tfe:$tfe),
|
||||
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
|
||||
slc:$slc, tfe:$tfe)
|
||||
);
|
||||
dag ret = !if(!empty(vdataList), InsNoData, InsData);
|
||||
}
|
||||
|
||||
class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
|
||||
dag ret =
|
||||
!if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret,
|
||||
!if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
|
||||
!if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
|
||||
!if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
|
||||
!if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
|
||||
(ins))))));
|
||||
}
|
||||
|
||||
class getMTBUFAsmOps<int addrKind> {
|
||||
string Pfx =
|
||||
!if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
|
||||
!if(!eq(addrKind, BUFAddrKind.OffEn),
|
||||
"$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
|
||||
!if(!eq(addrKind, BUFAddrKind.IdxEn),
|
||||
"$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
|
||||
!if(!eq(addrKind, BUFAddrKind.BothEn),
|
||||
"$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
|
||||
!if(!eq(addrKind, BUFAddrKind.Addr64),
|
||||
"$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
|
||||
"")))));
|
||||
string ret = Pfx # "$offset";
|
||||
}
|
||||
|
||||
class MTBUF_SetupAddr<int addrKind> {
|
||||
bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
|
||||
!if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
|
||||
|
||||
bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
|
||||
!if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
|
||||
|
||||
bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
|
||||
|
||||
bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
|
||||
}
|
||||
|
||||
class MTBUF_Load_Pseudo <string opName,
|
||||
int addrKind,
|
||||
RegisterClass vdataClass,
|
||||
list<dag> pattern=[],
|
||||
// Workaround bug bz30254
|
||||
int addrKindCopy = addrKind>
|
||||
: MTBUF_Pseudo<opName,
|
||||
(outs vdataClass:$vdata),
|
||||
getMTBUFIns<addrKindCopy>.ret,
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
|
||||
pattern>,
|
||||
MTBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 0;
|
||||
}
|
||||
|
||||
class MTBUF_Store_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
|
||||
opName, (outs),
|
||||
(ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
|
||||
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
|
||||
SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
|
||||
" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
|
||||
" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
|
||||
multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
|
||||
ValueType load_vt = i32,
|
||||
SDPatternOperator ld = null_frag> {
|
||||
|
||||
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
||||
[(set load_vt:$vdata,
|
||||
(ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
|
||||
i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
|
||||
MTBUFAddr64Table<0>;
|
||||
|
||||
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
||||
[(set load_vt:$vdata,
|
||||
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
|
||||
i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
|
||||
MTBUFAddr64Table<1>;
|
||||
|
||||
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
|
||||
let DisableWQM = 1 in {
|
||||
def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
|
||||
def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
}
|
||||
}
|
||||
|
||||
class MTBUF_Store_Pseudo <string opName,
|
||||
int addrKind,
|
||||
RegisterClass vdataClass,
|
||||
list<dag> pattern=[],
|
||||
// Workaround bug bz30254
|
||||
int addrKindCopy = addrKind,
|
||||
RegisterClass vdataClassCopy = vdataClass>
|
||||
: MTBUF_Pseudo<opName,
|
||||
(outs),
|
||||
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
|
||||
pattern>,
|
||||
MTBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
|
||||
ValueType store_vt = i32,
|
||||
SDPatternOperator st = null_frag> {
|
||||
|
||||
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
||||
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
|
||||
i1:$slc, i1:$tfe))]>,
|
||||
MTBUFAddr64Table<0>;
|
||||
|
||||
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
||||
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
|
||||
i1:$slc, i1:$tfe))]>,
|
||||
MTBUFAddr64Table<1>;
|
||||
|
||||
def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
|
||||
let DisableWQM = 1 in {
|
||||
def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
|
||||
def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MUBUF classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -676,14 +809,14 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
|
|||
// MTBUF Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>;
|
||||
//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>;
|
||||
//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>;
|
||||
def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>;
|
||||
def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>;
|
||||
def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>;
|
||||
def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>;
|
||||
def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>;
|
||||
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
|
||||
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
|
||||
defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
|
||||
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
|
||||
|
||||
} // End let SubtargetPredicate = isGCN
|
||||
|
||||
|
@ -1093,22 +1226,98 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OF
|
|||
// MTBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// TBUFFER_STORE_FORMAT_*, addr64=0
|
||||
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF_Pseudo opcode> : Pat<
|
||||
(SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
|
||||
i32:$soffset, imm:$inst_offset, imm:$dfmt,
|
||||
imm:$nfmt, imm:$offen, imm:$idxen,
|
||||
imm:$glc, imm:$slc, imm:$tfe),
|
||||
(opcode
|
||||
$vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
|
||||
(as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
|
||||
(as_i1imm $slc), (as_i1imm $tfe), $soffset)
|
||||
>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// tbuffer_load/store_format patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
|
||||
def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
|
||||
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
|
||||
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
|
||||
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : Pat<
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">;
|
||||
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
|
||||
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">;
|
||||
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">;
|
||||
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
|
||||
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
|
||||
|
||||
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : Pat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $dfmt),
|
||||
(as_i8imm $nfmt), (as_i1imm $glc),
|
||||
(as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $dfmt),
|
||||
(as_i8imm $nfmt), (as_i1imm $glc),
|
||||
(as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $dfmt),
|
||||
(as_i8imm $nfmt), (as_i1imm $glc),
|
||||
(as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
|
||||
imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
|
||||
$vdata,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4i32, "TBUFFER_STORE_FORMAT_XYZ">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">;
|
||||
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
|
||||
|
||||
} // End let Predicates = [isGCN]
|
||||
|
||||
|
@ -1224,21 +1433,44 @@ def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
|
|||
|
||||
class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
|
||||
MTBUF_Real<ps>,
|
||||
Enc64,
|
||||
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
|
||||
let AssemblerPredicate=isSICI;
|
||||
let DecoderNamespace="SICI";
|
||||
|
||||
bits<1> addr64;
|
||||
let Inst{15} = addr64;
|
||||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{15} = ps.addr64;
|
||||
let Inst{18-16} = op;
|
||||
let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
|
||||
let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
|
||||
let Inst{31-26} = 0x3a; //encoding
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
||||
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
||||
let Inst{54} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
||||
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
||||
}
|
||||
|
||||
def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>;
|
||||
def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>;
|
||||
def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>;
|
||||
def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>;
|
||||
def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>;
|
||||
multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
|
||||
def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
|
||||
def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
|
||||
def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
|
||||
def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
|
||||
def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
|
||||
}
|
||||
|
||||
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
|
||||
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
|
||||
//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
|
||||
defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
|
||||
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CI
|
||||
|
@ -1350,16 +1582,39 @@ def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
|
|||
|
||||
class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
|
||||
MTBUF_Real<ps>,
|
||||
Enc64,
|
||||
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
|
||||
let AssemblerPredicate=isVI;
|
||||
let DecoderNamespace="VI";
|
||||
|
||||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{18-15} = op;
|
||||
let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
|
||||
let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
|
||||
let Inst{31-26} = 0x3a; //encoding
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
||||
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
||||
let Inst{54} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
||||
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
||||
}
|
||||
|
||||
def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>;
|
||||
def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>;
|
||||
def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>;
|
||||
def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>;
|
||||
def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>;
|
||||
multiclass MTBUF_Real_AllAddr_vi<bits<4> op> {
|
||||
def _OFFSET_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
|
||||
def _OFFEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
|
||||
def _IDXEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
|
||||
def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
|
||||
}
|
||||
|
||||
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>;
|
||||
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>;
|
||||
//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>;
|
||||
defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>;
|
||||
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>;
|
||||
|
|
|
@ -231,6 +231,24 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
|
|||
O << " vm";
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
if (MI->getOperand(OpNo).getImm()) {
|
||||
O << " dfmt:";
|
||||
printU8ImmDecOperand(MI, OpNo, O);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
if (MI->getOperand(OpNo).getImm()) {
|
||||
O << " nfmt:";
|
||||
printU8ImmDecOperand(MI, OpNo, O);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
|
||||
const MCRegisterInfo &MRI) {
|
||||
switch (RegNo) {
|
||||
|
|
|
@ -88,6 +88,10 @@ private:
|
|||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printExpVM(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printDFMT(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printNFMT(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
|
||||
void printRegOperand(unsigned RegNo, raw_ostream &O);
|
||||
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
|
|
|
@ -3295,6 +3295,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
SelectionDAG &DAG) const {
|
||||
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDLoc DL(Op);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec: {
|
||||
|
@ -3320,7 +3322,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(5), // glc
|
||||
Op.getOperand(6) // slc
|
||||
};
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
|
||||
|
@ -3335,6 +3336,29 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
|
||||
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
|
||||
}
|
||||
case Intrinsic::amdgcn_tbuffer_load: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // rsrc
|
||||
Op.getOperand(3), // vindex
|
||||
Op.getOperand(4), // voffset
|
||||
Op.getOperand(5), // soffset
|
||||
Op.getOperand(6), // offset
|
||||
Op.getOperand(7), // dfmt
|
||||
Op.getOperand(8), // nfmt
|
||||
Op.getOperand(9), // glc
|
||||
Op.getOperand(10) // slc
|
||||
};
|
||||
|
||||
EVT VT = Op.getOperand(2).getValueType();
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad,
|
||||
VT.getStoreSize(), VT.getStoreSize());
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
// Basic sample.
|
||||
case Intrinsic::amdgcn_image_sample:
|
||||
case Intrinsic::amdgcn_image_sample_cl:
|
||||
|
@ -3400,10 +3424,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
|
||||
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
SDLoc DL(Op);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_exp: {
|
||||
|
@ -3470,33 +3494,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
}
|
||||
case AMDGPUIntrinsic::SI_tbuffer_store: {
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3),
|
||||
Op.getOperand(4),
|
||||
Op.getOperand(5),
|
||||
Op.getOperand(6),
|
||||
Op.getOperand(7),
|
||||
Op.getOperand(8),
|
||||
Op.getOperand(9),
|
||||
Op.getOperand(10),
|
||||
Op.getOperand(11),
|
||||
Op.getOperand(12),
|
||||
Op.getOperand(13),
|
||||
Op.getOperand(14)
|
||||
};
|
||||
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore,
|
||||
VT.getStoreSize(), 4);
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
case AMDGPUIntrinsic::AMDGPU_kill: {
|
||||
SDValue Src = Op.getOperand(2);
|
||||
if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
|
||||
|
@ -3512,7 +3509,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
}
|
||||
case Intrinsic::amdgcn_s_barrier: {
|
||||
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
|
||||
if (WGSize <= ST.getWavefrontSize())
|
||||
|
@ -3521,6 +3517,76 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
}
|
||||
return SDValue();
|
||||
};
|
||||
case AMDGPUIntrinsic::SI_tbuffer_store: {
|
||||
|
||||
// Extract vindex and voffset from vaddr as appropriate
|
||||
const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10));
|
||||
const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11));
|
||||
SDValue VAddr = Op.getOperand(5);
|
||||
|
||||
SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
|
||||
|
||||
assert(!(OffEn->isOne() && IdxEn->isOne()) &&
|
||||
"Legacy intrinsic doesn't support both offset and index - use new version");
|
||||
|
||||
SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
|
||||
SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
|
||||
|
||||
// Deal with the vec-3 case
|
||||
const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4));
|
||||
auto Opcode = NumChannels->getZExtValue() == 3 ?
|
||||
AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
|
||||
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
Op.getOperand(3), // vdata
|
||||
Op.getOperand(2), // rsrc
|
||||
VIndex,
|
||||
VOffset,
|
||||
Op.getOperand(6), // soffset
|
||||
Op.getOperand(7), // inst_offset
|
||||
Op.getOperand(8), // dfmt
|
||||
Op.getOperand(9), // nfmt
|
||||
Op.getOperand(12), // glc
|
||||
Op.getOperand(13), // slc
|
||||
};
|
||||
|
||||
const ConstantSDNode *tfe = cast<ConstantSDNode>(Op.getOperand(14));
|
||||
assert(tfe->getZExtValue() == 0 &&
|
||||
"Value of tfe other than zero is unsupported");
|
||||
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore,
|
||||
VT.getStoreSize(), 4);
|
||||
return DAG.getMemIntrinsicNode(Opcode, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
|
||||
case Intrinsic::amdgcn_tbuffer_store: {
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
Op.getOperand(2), // vdata
|
||||
Op.getOperand(3), // rsrc
|
||||
Op.getOperand(4), // vindex
|
||||
Op.getOperand(5), // voffset
|
||||
Op.getOperand(6), // soffset
|
||||
Op.getOperand(7), // offset
|
||||
Op.getOperand(8), // dfmt
|
||||
Op.getOperand(9), // nfmt
|
||||
Op.getOperand(10), // glc
|
||||
Op.getOperand(11) // slc
|
||||
};
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore,
|
||||
VT.getStoreSize(), 4);
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
|
||||
default:
|
||||
return Op;
|
||||
}
|
||||
|
|
|
@ -41,25 +41,41 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
|
|||
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
||||
>;
|
||||
|
||||
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
|
||||
SDTypeProfile<0, 13,
|
||||
[SDTCisVT<0, v4i32>, // rsrc(SGPR)
|
||||
SDTCisVT<1, iAny>, // vdata(VGPR)
|
||||
SDTCisVT<2, i32>, // num_channels(imm)
|
||||
SDTCisVT<3, i32>, // vaddr(VGPR)
|
||||
def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT",
|
||||
SDTypeProfile<1, 9,
|
||||
[ // vdata
|
||||
SDTCisVT<1, v4i32>, // rsrc
|
||||
SDTCisVT<2, i32>, // vindex(VGPR)
|
||||
SDTCisVT<3, i32>, // voffset(VGPR)
|
||||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // inst_offset(imm)
|
||||
SDTCisVT<5, i32>, // offset(imm)
|
||||
SDTCisVT<6, i32>, // dfmt(imm)
|
||||
SDTCisVT<7, i32>, // nfmt(imm)
|
||||
SDTCisVT<8, i32>, // offen(imm)
|
||||
SDTCisVT<9, i32>, // idxen(imm)
|
||||
SDTCisVT<10, i32>, // glc(imm)
|
||||
SDTCisVT<11, i32>, // slc(imm)
|
||||
SDTCisVT<12, i32> // tfe(imm)
|
||||
SDTCisVT<8, i32>, // glc(imm)
|
||||
SDTCisVT<9, i32> // slc(imm)
|
||||
]>,
|
||||
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
||||
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
||||
>;
|
||||
|
||||
def SDTtbuffer_store : SDTypeProfile<0, 10,
|
||||
[ // vdata
|
||||
SDTCisVT<1, v4i32>, // rsrc
|
||||
SDTCisVT<2, i32>, // vindex(VGPR)
|
||||
SDTCisVT<3, i32>, // voffset(VGPR)
|
||||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // offset(imm)
|
||||
SDTCisVT<6, i32>, // dfmt(imm)
|
||||
SDTCisVT<7, i32>, // nfmt(imm)
|
||||
SDTCisVT<8, i32>, // glc(imm)
|
||||
SDTCisVT<9, i32> // slc(imm)
|
||||
]>;
|
||||
|
||||
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
|
||||
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
||||
def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
|
||||
SDTtbuffer_store,
|
||||
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
||||
|
||||
def SDTBufferLoad : SDTypeProfile<1, 5,
|
||||
[ // vdata
|
||||
SDTCisVT<1, v4i32>, // rsrc
|
||||
|
@ -547,6 +563,9 @@ def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
|
|||
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
|
||||
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
|
||||
|
||||
def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
|
||||
def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
|
||||
|
||||
def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
|
||||
|
||||
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;CHECK-LABEL: {{^}}test1:
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 glc slc
|
||||
define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
|
@ -11,8 +11,38 @@ define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
|
|||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test1_idx:
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:32 glc slc
|
||||
define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test1_scalar_offset:
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, {{s[0-9]+}} idxen offset:32 glc slc
|
||||
define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffset) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 %soffset, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test1_no_glc_slc:
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32
|
||||
define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 0,
|
||||
i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test2:
|
||||
;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
|
||||
;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 offen offset:24 glc slc
|
||||
define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
|
@ -22,7 +52,7 @@ define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test3:
|
||||
;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
|
||||
;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:11, nfmt:4, 0 offen offset:16 glc slc
|
||||
define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
|
||||
|
@ -32,7 +62,7 @@ define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test4:
|
||||
;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
|
||||
;CHECK: tbuffer_store_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:4, nfmt:4, 0 offen offset:8 glc slc
|
||||
define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) {
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
|
||||
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load:
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:3, 0 glc
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 slc
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0
|
||||
; GCN: s_waitcnt
|
||||
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
|
||||
main_body:
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
|
||||
%vdata_glc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 3, i1 1, i1 0)
|
||||
%vdata_slc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 1)
|
||||
%vdata_f32 = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
%vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
|
||||
%vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
|
||||
%r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
|
||||
%r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
|
||||
%r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
|
||||
%r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
|
||||
ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_immoffs:
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offset:42
|
||||
define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
|
||||
main_body:
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 42, i32 14, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
ret <4 x float> %vdata.f
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:2, 61 offset:4095
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:3, {{s[0-9]+}} offset:73
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, {{s[0-9]+}} offset:1
|
||||
; GCN: s_waitcnt
|
||||
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 61, i32 4095, i32 15, i32 2, i1 0, i1 0)
|
||||
%vdata_glc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 %soffs, i32 73, i32 14, i32 3, i1 0, i1 0)
|
||||
%vdata_slc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 %soffs, i32 1, i32 13, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
%vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
|
||||
%vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
|
||||
%r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
|
||||
%r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
|
||||
%r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
|
||||
ret {<4 x float>, <4 x float>, <4 x float>} %r2
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_idx:
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen
|
||||
define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) {
|
||||
main_body:
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
ret <4 x float> %vdata.f
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_ofs:
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen
|
||||
define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
|
||||
main_body:
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
ret <4 x float> %vdata.f
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_ofs_imm:
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:52
|
||||
define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
|
||||
main_body:
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 52, i32 14, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
ret <4 x float> %vdata.f
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_both:
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen
|
||||
define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) {
|
||||
main_body:
|
||||
%vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
|
||||
ret <4 x float> %vdata.f
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_load_xy:
|
||||
; GCN: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0
|
||||
define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
|
||||
%vdata = call <2 x i32> @llvm.amdgcn.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 13, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast <2 x i32> %vdata to <2 x float>
|
||||
ret <2 x float> %vdata.f
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_load_x:
|
||||
; GCN: tbuffer_load_format_x {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0
|
||||
define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
|
||||
%vdata = call i32 @llvm.amdgcn.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 13, i32 4, i1 0, i1 0)
|
||||
%vdata.f = bitcast i32 %vdata to float
|
||||
ret float %vdata.f
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
declare <2 x i32> @llvm.amdgcn.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
declare <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
declare <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
|
@ -0,0 +1,110 @@
|
|||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_store:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0
|
||||
; GCN: tbuffer_store_format_xyzw v[4:7], off, s[0:3], dfmt:13, nfmt:3, 0 glc
|
||||
; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 slc
|
||||
; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0
|
||||
define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %1 to <4 x i32>
|
||||
%in2 = bitcast <4 x float> %2 to <4 x i32>
|
||||
%in3 = bitcast <4 x float> %3 to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 2, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 3, i1 1, i1 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 1)
|
||||
call void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_store_immoffs:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, 0 offset:42
|
||||
define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %1 to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 42, i32 5, i32 7, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} offset:42
|
||||
define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %vdata to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 %soffset, i32 42, i32 5, i32 7, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_store_idx:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen
|
||||
define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %vdata to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 0, i32 15, i32 2, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_store_ofs:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:3, nfmt:7, 0 offen
|
||||
define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %vdata to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 %voffset, i32 0, i32 0, i32 3, i32 7, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_store_both:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen
|
||||
define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex, i32 %voffset) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %vdata to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 6, i32 4, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Ideally, the register allocator would avoid the wait here
|
||||
;
|
||||
; GCN-LABEL: {{^}}buffer_store_wait:
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
; GCN: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:16, nfmt:2, 0 idxen
|
||||
define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex.1, i32 %vindex.2, i32 %vindex.3) {
|
||||
main_body:
|
||||
%in1 = bitcast <4 x float> %vdata to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex.1, i32 0, i32 0, i32 0, i32 15, i32 3, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %vindex.2, i32 0, i1 0, i1 0)
|
||||
%data.i = bitcast <4 x float> %data to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %data.i, <4 x i32> %0, i32 %vindex.3, i32 0, i32 0, i32 0, i32 16, i32 2, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_store_x1:
|
||||
; GCN: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen
|
||||
define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
|
||||
main_body:
|
||||
%data.i = bitcast float %data to i32
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %data.i, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 13, i32 7, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}buffer_store_x2:
|
||||
; GCN: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen
|
||||
define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %vindex) {
|
||||
main_body:
|
||||
%data.i = bitcast <2 x float> %data to <2 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32> %data.i, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
|
||||
declare void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
|
||||
declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
|
||||
declare void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
|
@ -26,11 +26,11 @@ main_body:
|
|||
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1
|
||||
%tmp10 = insertelement <4 x i32> %tmp9, i32 undef, i32 2
|
||||
%tmp11 = insertelement <4 x i32> %tmp10, i32 undef, i32 3
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %tmp11, i32 4, i32 undef, i32 %arg, i32 0, i32 14, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %tmp11, <4 x i32> undef, i32 undef, i32 0, i32 %arg, i32 0, i32 14, i32 4, i1 1, i1 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
@ -11,13 +11,13 @@ define amdgpu_vs void @test1(i32 %v) #0 {
|
|||
|
||||
store i32 %v, i32 addrspace(3)* %p0
|
||||
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %v, i32 1, i32 undef, i32 undef, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %v, <4 x i32> undef, i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i1 1, i1 0)
|
||||
|
||||
%w = load i32, i32 addrspace(3)* %p0
|
||||
store i32 %w, i32 addrspace(3)* %p1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
@ -62,7 +62,8 @@ main_body:
|
|||
%tmp2 = shl i32 %6, 2
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp4 = add i32 %6, 16
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -80,7 +81,8 @@ main_body:
|
|||
%tmp2 = shl i32 %6, 2
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp4 = add i32 %6, 16
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -175,6 +177,6 @@ define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
|
|||
}
|
||||
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
||||
|
|
|
@ -25,29 +25,29 @@ main_body:
|
|||
%array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2
|
||||
%array_vector11 = insertelement <4 x float> %array_vector10, float undef, i32 3
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp3, i32 1, i32 36, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 36, i32 4, i32 4, i1 1, i1 1)
|
||||
%bc = bitcast <4 x float> %array_vector3 to <4 x i32>
|
||||
%tmp4 = extractelement <4 x i32> %bc, i32 undef
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp4, i32 1, i32 48, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp4, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 48, i32 4, i32 4, i1 1, i1 1)
|
||||
%bc49 = bitcast <4 x float> %array_vector11 to <4 x i32>
|
||||
%tmp5 = extractelement <4 x i32> %bc49, i32 undef
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp5, i32 1, i32 72, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp5, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 72, i32 4, i32 4, i1 1, i1 1)
|
||||
%array_vector21 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %tmp, i32 1
|
||||
%array_vector22 = insertelement <4 x float> %array_vector21, float undef, i32 2
|
||||
%array_vector23 = insertelement <4 x float> %array_vector22, float undef, i32 3
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 28, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 28, i32 4, i32 4, i1 1, i1 1)
|
||||
%bc52 = bitcast <4 x float> %array_vector23 to <4 x i32>
|
||||
%tmp6 = extractelement <4 x i32> %bc52, i32 undef
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp6, i32 1, i32 64, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 20, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 56, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 92, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp6, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 64, i32 4, i32 4, i1 1, i1 1)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 20, i32 4, i32 4, i1 1, i1 1)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 56, i32 4, i32 4, i1 1, i1 1)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 92, i32 4, i32 4, i1 1, i1 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #2
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="tonga" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
|
||||
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #2
|
||||
|
||||
|
@ -258,9 +258,8 @@ define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(i32 addrspace(
|
|||
; %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
|
||||
|
||||
; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
; call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
; i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
|
||||
; i32 1, i32 0)
|
||||
; call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %vdata, <4 x i32> undef,
|
||||
; i32 %vaddr, i32 0, i32 0, i32 32, i32 14, i32 4, i1 1, i1 1)
|
||||
|
||||
; %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Test for dfmt and nfmt (tbuffer only)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
|
||||
// SICI: tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x01,0x01]
|
||||
// VI: tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x01,0x01]
|
||||
|
||||
tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
|
||||
// SICI: tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x79,0xe9,0x00,0x01,0x01,0x01]
|
||||
// VI: tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x78,0xe9,0x00,0x01,0x01,0x01]
|
||||
|
||||
tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1
|
||||
// SICI: tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7b,0xe9,0x00,0x01,0x01,0x01]
|
||||
// VI: tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x79,0xe9,0x00,0x01,0x01,0x01]
|
||||
|
||||
tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
|
||||
// SICI: tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7c,0xe9,0x00,0x01,0x01,0x01]
|
||||
// VI: tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7a,0xe9,0x00,0x01,0x01,0x01]
|
||||
|
||||
tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
|
||||
// SICI: tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7d,0xe9,0x00,0x01,0x01,0x01]
|
||||
// VI: tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7a,0xe9,0x00,0x01,0x01,0x01]
|
||||
|
||||
tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1
|
||||
// SICI: tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x01,0x01]
|
||||
// VI: tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x01,0x01]
|
||||
|
||||
tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1
|
||||
// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x1d,0x71]
|
||||
// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71]
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI
|
||||
|
||||
# VI: tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x01,0x01]
|
||||
0x00 0x00 0x78 0xe9 0x00 0x01 0x01 0x01
|
||||
|
||||
# VI: tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x78,0xe9,0x00,0x01,0x01,0x01]
|
||||
0x00 0x80 0x78 0xe9 0x00 0x01 0x01 0x01
|
||||
|
||||
# VI: tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x79,0xe9,0x00,0x01,0x01,0x01]
|
||||
0x00 0x80 0x79 0xe9 0x00 0x01 0x01 0x01
|
||||
|
||||
# VI: tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7a,0xe9,0x00,0x01,0x01,0x01]
|
||||
0x00 0x00 0x7a 0xe9 0x00 0x01 0x01 0x01
|
||||
|
||||
# VI: tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7a,0xe9,0x00,0x01,0x01,0x01]
|
||||
0x00 0x80 0x7a 0xe9 0x00 0x01 0x01 0x01
|
||||
|
||||
# VI: tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x01,0x01]
|
||||
0x00 0x80 0x7b 0xe9 0x00 0x01 0x01 0x01
|
||||
|
||||
# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71]
|
||||
0x00 0x80 0x7b 0xe9 0x00 0x01 0x1d 0x71
|
Loading…
Reference in New Issue