AMDGPU: Select MIMG instructions manually in SITargetLowering

Summary:
Having TableGen patterns for image intrinsics is hitting limitations:
for D16 we already have to manually pre-lower the packing of data
values, and we will have to do the same for A16 eventually.

Since there is already some custom C++ code anyway, it is arguably easier
to just do everything in C++, now that we can use the beefed-up generic
tables backend of TableGen to provide all the required metadata and map
intrinsics to corresponding opcodes. With this approach, all image
intrinsic lowering happens in SITargetLowering::lowerImage. That code is
dense due to all the cases that it handles, but it should still be easier
to follow than what we had before, by virtue of it all being done in a
single location, and by virtue of not relying on the TableGen pattern
magic that very few people really understand.

This means that we will have MachineSDNodes with MIMG instructions
during DAG combining, but that seems alright: previously we had
intrinsic nodes instead, but those are similarly opaque to the generic
CodeGen infrastructure, and the final pattern matching just did a 1:1
translation to machine instructions anyway. If anything, the fact that
we now merge the address words into a vector before DAG combine should
be an advantage.

Change-Id: I417f26bd88f54ce9781c1668acc01f3f99774de6

Reviewers: arsenm, rampitec, rtaylor, tstellar

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D48017

llvm-svn: 335228
This commit is contained in:
Nicolai Haehnle 2018-06-21 13:36:57 +00:00
parent 0ab200b6c9
commit 7a9c03f484
10 changed files with 389 additions and 240 deletions

View File

@ -606,6 +606,7 @@ class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
// Represent texture/image types / dimensionality.
class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> {
AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
string Name = name; // e.g. "2darraymsaa"
bit DA = 0; // DA bit in MIMG encoding
@ -617,6 +618,9 @@ class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_n
makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"),
!foreach(name, coord_names, "d" # name # "dv")),
llvm_anyfloat_ty>.ret;
bits<8> NumCoords = !size(CoordSliceArgs);
bits<8> NumGradients = !size(GradientArgs);
}
def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>;
@ -867,22 +871,18 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
}
defm int_amdgcn_image_getlod : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>;
}
//////////////////////////////////////////////////////////////////////////
// getresinfo intrinsics (separate due to D16)
//////////////////////////////////////////////////////////////////////////
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimGetResInfoIntrinsics = {
//////////////////////////////////////////////////////////////////////////
// getresinfo intrinsics
//////////////////////////////////////////////////////////////////////////
foreach dim = AMDGPUDims.All in {
def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
: AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>;
}
}
//////////////////////////////////////////////////////////////////////////
// gather4 intrinsics
//////////////////////////////////////////////////////////////////////////
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimGatherIntrinsics = {
//////////////////////////////////////////////////////////////////////////
// gather4 intrinsics
//////////////////////////////////////////////////////////////////////////
foreach sample = AMDGPUSampleVariantsNoGradients in {
foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:

View File

@ -28,10 +28,9 @@ using namespace llvm;
namespace llvm {
namespace AMDGPU {
#define GET_RsrcIntrinsics_IMPL
#include "AMDGPUGenSearchableTables.inc"
#define GET_D16ImageDimIntrinsics_IMPL
#define GET_ImageDimIntrinsicTable_IMPL
#define GET_RsrcIntrinsics_IMPL
#include "AMDGPUGenSearchableTables.inc"
}
}

View File

@ -69,6 +69,13 @@ struct D16ImageDimIntrinsic {
};
const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsic(unsigned Intr);
struct ImageDimIntrinsicInfo {
unsigned Intr;
unsigned BaseOpcode;
MIMGDim Dim;
};
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
} // end AMDGPU namespace
} // End llvm namespace

View File

@ -28,8 +28,6 @@ def RsrcIntrinsics : GenericTable {
foreach intr = !listconcat(AMDGPUBufferIntrinsics,
AMDGPUImageIntrinsics,
AMDGPUImageDimIntrinsics,
AMDGPUImageDimGatherIntrinsics,
AMDGPUImageDimGetResInfoIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>;
}
@ -91,22 +89,3 @@ def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;
class D16ImageDimIntrinsic<AMDGPUImageDimIntrinsic intr> {
Intrinsic Intr = intr;
code D16HelperIntr =
!cast<code>("AMDGPUIntrinsic::SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name);
}
def D16ImageDimIntrinsics : GenericTable {
let FilterClass = "D16ImageDimIntrinsic";
let Fields = ["Intr", "D16HelperIntr"];
let PrimaryKey = ["Intr"];
let PrimaryKeyName = "lookupD16ImageDimIntrinsic";
}
foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimGatherIntrinsics) in {
def : D16ImageDimIntrinsic<intr>;
}

View File

@ -27,6 +27,10 @@ def MIMGEncoding : GenericEnum {
// vdata/vaddr size.
class MIMGBaseOpcode {
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(NAME);
bit Store = 0;
bit Atomic = 0;
bit AtomicX2 = 0; // (f)cmpswap
bit Sampler = 0;
bits<8> NumExtraArgs = 0;
bit Gradients = 0;
bit Coordinates = 1;
@ -41,14 +45,29 @@ def MIMGBaseOpcode : GenericEnum {
def MIMGBaseOpcodesTable : GenericTable {
let FilterClass = "MIMGBaseOpcode";
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "NumExtraArgs", "Gradients", "Coordinates",
"LodOrClampOrMip", "HasD16"];
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
"HasD16"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
let PrimaryKey = ["BaseOpcode"];
let PrimaryKeyName = "getMIMGBaseOpcodeInfo";
}
def MIMGDim : GenericEnum {
let FilterClass = "AMDGPUDimProps";
}
def MIMGDimInfoTable : GenericTable {
let FilterClass = "AMDGPUDimProps";
let CppTypeName = "MIMGDimInfo";
let Fields = ["Dim", "NumCoords", "NumGradients", "DA"];
GenericEnum TypeOf_Dim = MIMGDim;
let PrimaryKey = ["Dim"];
let PrimaryKeyName = "getMIMGDimInfo";
}
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@ -188,6 +207,7 @@ multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
def "" : MIMGBaseOpcode {
let Store = 1;
let LodOrClampOrMip = mip;
let HasD16 = has_d16;
}
@ -263,7 +283,10 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
}
multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atomics
def "" : MIMGBaseOpcode;
def "" : MIMGBaseOpcode {
let Atomic = 1;
let AtomicX2 = isCmpSwap;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
// _V* variants have different dst size, but the size is encoded implicitly,
@ -309,6 +332,7 @@ multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm, RegisterClass dst_rc
class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
: MIMGBaseOpcode {
let Sampler = 1;
let NumExtraArgs = !size(sample.ExtraAddrArgs);
let Gradients = sample.Gradients;
let LodOrClampOrMip = !ne(sample.LodOrClamp, "");
@ -458,188 +482,30 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, AMDGPUSample_c_cd_cl_o>;
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
/********** ============================== **********/
/********** Dimension-aware image patterns **********/
/********** ============================== **********/
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
/********** ========================================= **********/
class getDwordsType<int dwords> {
int NumDwords = dwords;
string suffix = !if(!lt(dwords, 1), ?,
!if(!eq(dwords, 1), "_V1",
!if(!eq(dwords, 2), "_V2",
!if(!le(dwords, 4), "_V4",
!if(!le(dwords, 8), "_V8",
!if(!le(dwords, 16), "_V16", ?))))));
ValueType VT = !if(!lt(dwords, 1), ?,
!if(!eq(dwords, 1), f32,
!if(!eq(dwords, 2), v2f32,
!if(!le(dwords, 4), v4f32,
!if(!le(dwords, 8), v8f32,
!if(!le(dwords, 16), v16f32, ?))))));
RegisterClass VReg = !if(!lt(dwords, 1), ?,
!if(!eq(dwords, 1), VGPR_32,
!if(!eq(dwords, 2), VReg_64,
!if(!le(dwords, 4), VReg_128,
!if(!le(dwords, 8), VReg_256,
!if(!le(dwords, 16), VReg_512, ?))))));
class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
Intrinsic Intr = I;
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod));
AMDGPUDimProps Dim = I.P.Dim;
}
class makeRegSequence_Fold<int i, dag d> {
int idx = i;
dag lhs = d;
}
def ImageDimIntrinsicTable : GenericTable {
let FilterClass = "ImageDimIntrinsicInfo";
let Fields = ["Intr", "BaseOpcode", "Dim"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
GenericEnum TypeOf_Dim = MIMGDim;
// Generate a dag node which returns a vector register of class RC into which
// the source operands given by names have been inserted (assuming that each
// name corresponds to an operand whose size is equal to a subregister).
class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> {
dag ret =
!if(!eq(!size(names), 1),
!dag(COPY_TO_REGCLASS, [?, RC], [names[0], ?]),
!foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name,
makeRegSequence_Fold<
!add(f.idx, 1),
!con((INSERT_SUBREG f.lhs),
!dag(INSERT_SUBREG, [?, !cast<SubRegIndex>("sub"#f.idx)],
[name, ?]))>).lhs);
}
class ImageDimPattern<AMDGPUImageDimIntrinsic I,
string dop, ValueType dty, bit d16,
string suffix = ""> : GCNPat<(undef), (undef)> {
list<AMDGPUArg> AddrArgs = I.P.AddrDefaultArgs;
getDwordsType AddrDwords = getDwordsType<!size(AddrArgs)>;
MIMG MI =
!cast<MIMG>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
// DAG fragment to match data arguments (vdata for store/atomic, dmask
// for non-atomic).
dag MatchDataDag =
!con(!dag(I, !foreach(arg, I.P.DataArgs, dty),
!foreach(arg, I.P.DataArgs, arg.Name)),
!if(I.P.IsAtomic, (I), (I i32:$dmask)));
// DAG fragment to match vaddr arguments.
dag MatchAddrDag = !dag(I, !foreach(arg, AddrArgs, arg.Type.VT),
!foreach(arg, AddrArgs, arg.Name));
// DAG fragment to match sampler resource and unorm arguments.
dag MatchSamplerDag = !if(I.P.IsSample, (I v4i32:$sampler, i1:$unorm), (I));
// DAG node that generates the MI vdata for store/atomic
getDwordsType DataDwords = getDwordsType<!size(I.P.DataArgs)>;
dag GenDataDag =
!if(I.P.IsAtomic, (MI makeRegSequence<DataDwords.VT, DataDwords.VReg,
!foreach(arg, I.P.DataArgs, arg.Name)>.ret),
!if(!size(I.P.DataArgs), (MI $vdata), (MI)));
// DAG node that generates the MI vaddr
dag GenAddrDag = makeRegSequence<AddrDwords.VT, AddrDwords.VReg,
!foreach(arg, AddrArgs, arg.Name)>.ret;
// DAG fragments that generate various inline flags
dag GenDmask =
!if(I.P.IsAtomic, (MI !add(!shl(1, DataDwords.NumDwords), -1)),
(MI (as_i32imm $dmask)));
dag GenGLC =
!if(I.P.IsAtomic, (MI 1),
(MI (bitextract_imm<0> $cachepolicy)));
dag MatchIntrinsic = !con(MatchDataDag,
MatchAddrDag,
(I v8i32:$rsrc),
MatchSamplerDag,
(I 0/*texfailctrl*/,
i32:$cachepolicy));
let PatternToMatch =
!if(!size(I.RetTypes), (dty MatchIntrinsic), MatchIntrinsic);
bit IsCmpSwap = !and(I.P.IsAtomic, !eq(!size(I.P.DataArgs), 2));
dag ImageInstruction =
!con(GenDataDag,
(MI GenAddrDag),
(MI $rsrc),
!if(I.P.IsSample, (MI $sampler), (MI)),
GenDmask,
!if(I.P.IsSample, (MI (as_i1imm $unorm)), (MI 1)),
GenGLC,
(MI (bitextract_imm<1> $cachepolicy),
0, /* r128 */
0, /* tfe */
0 /*(as_i1imm $lwe)*/,
{ I.P.Dim.DA }),
!if(MI.BaseOpcode.HasD16, (MI d16), (MI)));
let ResultInstrs = [
!if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
];
let PrimaryKey = ["Intr"];
let PrimaryKeyName = "getImageDimIntrinsicInfo";
let PrimaryKeyEarlyOut = 1;
}
foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimGetResInfoIntrinsics) in {
def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32, 0>;
def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32, 0>;
def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32, 0>;
}
multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
AMDGPUImageDimIntrinsic d16helper> {
let SubtargetPredicate = HasUnpackedD16VMem in {
def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, 1>;
def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, 1>;
def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
def _packed_v1 : ImageDimPattern<I, "_V1", f16, 1>;
def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, 1>;
def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, 1>;
} // End HasPackedD16VMem.
}
foreach intr = AMDGPUImageDimIntrinsics in {
def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg<llvm_any_ty, arg.Name>);
}
let TargetPrefix = "SI", isTarget = 1 in
def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name :
AMDGPUImageDimIntrinsic<!cast<AMDGPUDimProfile>(intr#"_d16helper_profile"),
intr.IntrProperties, intr.Properties>;
defm intr#_d16 :
ImageDimD16Helper<
intr, !cast<AMDGPUImageDimIntrinsic>(
"int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name)>;
}
foreach intr = AMDGPUImageDimGatherIntrinsics in {
def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32, 0>;
def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg<llvm_any_ty, arg.Name>);
}
let TargetPrefix = "SI", isTarget = 1 in
def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name :
AMDGPUImageDimIntrinsic<!cast<AMDGPUDimProfile>(intr#"_d16helper_profile"),
intr.IntrProperties, intr.Properties>;
let SubtargetPredicate = HasUnpackedD16VMem in {
def intr#_unpacked_v4 :
ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
"int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
"_V4", v4i32, 1>;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, 1>;
} // End HasPackedD16VMem.
}
foreach intr = AMDGPUImageDimAtomicIntrinsics in {
def intr#_pat1 : ImageDimPattern<intr, "_V1", i32, 0>;
AMDGPUImageDimAtomicIntrinsics) in {
def : ImageDimIntrinsicInfo<intr>;
}
/********** ======================= **********/

View File

@ -4516,6 +4516,245 @@ static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getUNDEF(VT);
}
static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
ArrayRef<SDValue> Elts) {
assert(!Elts.empty());
MVT Type;
unsigned NumElts;
if (Elts.size() == 1) {
Type = MVT::f32;
NumElts = 1;
} else if (Elts.size() == 2) {
Type = MVT::v2f32;
NumElts = 2;
} else if (Elts.size() <= 4) {
Type = MVT::v4f32;
NumElts = 4;
} else if (Elts.size() <= 8) {
Type = MVT::v8f32;
NumElts = 8;
} else {
assert(Elts.size() <= 16);
Type = MVT::v16f32;
NumElts = 16;
}
SmallVector<SDValue, 16> VecElts(NumElts);
for (unsigned i = 0; i < Elts.size(); ++i) {
SDValue Elt = Elts[i];
if (Elt.getValueType() != MVT::f32)
Elt = DAG.getBitcast(MVT::f32, Elt);
VecElts[i] = Elt;
}
for (unsigned i = Elts.size(); i < NumElts; ++i)
VecElts[i] = DAG.getUNDEF(MVT::f32);
if (NumElts == 1)
return VecElts[0];
return DAG.getBuildVector(Type, DL, VecElts);
}
static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
SDValue *GLC, SDValue *SLC) {
auto CachePolicyConst = dyn_cast<ConstantSDNode>(CachePolicy.getNode());
if (!CachePolicyConst)
return false;
uint64_t Value = CachePolicyConst->getZExtValue();
SDLoc DL(CachePolicy);
if (GLC) {
*GLC = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x1;
}
if (SLC) {
*SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x2;
}
return Value == 0;
}
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
SDValue VData;
int NumVDataDwords;
unsigned AddrIdx; // Index of first address argument
unsigned DMask;
if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);
bool Is64Bit = VData.getValueType() == MVT::i64;
if (BaseOpcode->AtomicX2) {
SDValue VData2 = Op.getOperand(3);
VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
{VData, VData2});
if (Is64Bit)
VData = DAG.getBitcast(MVT::v4i32, VData);
ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
DMask = Is64Bit ? 0xf : 0x3;
NumVDataDwords = Is64Bit ? 4 : 2;
AddrIdx = 4;
} else {
DMask = Is64Bit ? 0x3 : 0x1;
NumVDataDwords = Is64Bit ? 2 : 1;
AddrIdx = 3;
}
} else {
unsigned DMaskIdx;
if (BaseOpcode->Store) {
VData = Op.getOperand(2);
MVT StoreVT = VData.getSimpleValueType();
if (StoreVT.getScalarType() == MVT::f16) {
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS ||
!BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
VData = handleD16VData(VData, DAG);
}
NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
DMaskIdx = 3;
} else {
MVT LoadVT = Op.getSimpleValueType();
if (LoadVT.getScalarType() == MVT::f16) {
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS ||
!BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
if (LoadVT.isVector() && Subtarget->hasUnpackedD16VMem())
ResultTypes[0] = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
}
NumVDataDwords = (ResultTypes[0].getSizeInBits() + 31) / 32;
DMaskIdx = isa<MemSDNode>(Op) ? 2 : 1;
}
auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
if (!DMaskConst)
return Op;
AddrIdx = DMaskIdx + 1;
DMask = DMaskConst->getZExtValue();
if (!DMask && !BaseOpcode->Store) {
// Eliminate no-op loads. Stores with dmask == 0 are *not* no-op: they
// store the channels' default values.
SDValue Undef = DAG.getUNDEF(Op.getValueType());
if (isa<MemSDNode>(Op))
return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
return Undef;
}
}
unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
(BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
(BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
(BaseOpcode->LodOrClampOrMip ? 1 : 0);
SmallVector<SDValue, 4> VAddrs;
for (unsigned i = 0; i < NumVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
unsigned CtrlIdx; // Index of texfailctrl argument
SDValue Unorm;
if (!BaseOpcode->Sampler) {
Unorm = True;
CtrlIdx = AddrIdx + NumVAddrs + 1;
} else {
auto UnormConst =
dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
if (!UnormConst)
return Op;
Unorm = UnormConst->getZExtValue() ? True : False;
CtrlIdx = AddrIdx + NumVAddrs + 3;
}
SDValue TexFail = Op.getOperand(CtrlIdx);
auto TexFailConst = dyn_cast<ConstantSDNode>(TexFail.getNode());
if (!TexFailConst || TexFailConst->getZExtValue() != 0)
return Op;
SDValue GLC;
SDValue SLC;
if (BaseOpcode->Atomic) {
GLC = True; // TODO no-return optimization
if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC))
return Op;
} else {
if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC))
return Op;
}
SmallVector<SDValue, 14> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
Ops.push_back(VData); // vdata
Ops.push_back(VAddr);
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
if (BaseOpcode->Sampler)
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
Ops.push_back(Unorm);
Ops.push_back(GLC);
Ops.push_back(SLC);
Ops.push_back(False); // r128
Ops.push_back(False); // tfe
Ops.push_back(False); // lwe
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
if (isa<MemSDNode>(Op))
Ops.push_back(Op.getOperand(0)); // chain
int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
*MemRefs = MemOp->getMemOperand();
NewNode->setMemRefs(MemRefs, MemRefs + 1);
}
if (BaseOpcode->AtomicX2) {
SmallVector<SDValue, 1> Elt;
DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
} else if (IsD16 && !BaseOpcode->Store) {
MVT LoadVT = Op.getSimpleValueType();
SDValue Adjusted = adjustLoadValueTypeImpl(
SDValue(NewNode, 0), LoadVT, DL, DAG, Subtarget->hasUnpackedD16VMem());
return DAG.getMergeValues({Adjusted, SDValue(NewNode, 1)}, DL);
}
return SDValue(NewNode, 0);
}
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@ -4853,6 +5092,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
return lowerImage(Op, ImageDimIntr, DAG);
return Op;
}
}
@ -5134,15 +5377,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
default:
if (Subtarget->hasUnpackedD16VMem() &&
Op.getValueType().isVector() &&
Op.getValueType().getScalarSizeInBits() == 16) {
if (const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr =
AMDGPU::lookupD16ImageDimIntrinsic(IntrID)) {
return adjustLoadValueType(D16ImageDimIntr->D16HelperIntr,
cast<MemSDNode>(Op), DAG, true);
}
}
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
return lowerImage(Op, ImageDimIntr, DAG);
return SDValue();
}
@ -5392,25 +5629,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue();
}
default: {
const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr =
AMDGPU::lookupD16ImageDimIntrinsic(IntrinsicID);
if (D16ImageDimIntr) {
SDValue VData = Op.getOperand(2);
EVT StoreVT = VData.getValueType();
if (Subtarget->hasUnpackedD16VMem() &&
StoreVT.isVector() &&
StoreVT.getScalarSizeInBits() == 16) {
SmallVector<SDValue, 12> Ops(Op.getNode()->op_values());
Ops[1] = DAG.getConstant(D16ImageDimIntr->D16HelperIntr, DL, MVT::i32);
Ops[2] = handleD16VData(VData, DAG);
MemSDNode *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Op->getVTList(),
Ops, M->getMemoryVT(),
M->getMemOperand());
}
}
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
return lowerImage(Op, ImageDimIntr, DAG);
return Op;
}

View File

@ -42,6 +42,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;

View File

@ -107,9 +107,18 @@ struct MIMGInfo {
uint8_t VAddrDwords;
};
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords) {
const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
VDataDwords, VAddrDwords);
return Info ? Info->Opcode : -1;
}
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
const MIMGInfo *NewInfo =

View File

@ -39,6 +39,7 @@ class Triple;
namespace AMDGPU {
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
#include "AMDGPUGenSearchableTables.inc"
@ -162,6 +163,37 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
struct MIMGBaseOpcodeInfo {
MIMGBaseOpcode BaseOpcode;
bool Store;
bool Atomic;
bool AtomicX2;
bool Sampler;
uint8_t NumExtraArgs;
bool Gradients;
bool Coordinates;
bool LodOrClampOrMip;
bool HasD16;
};
LLVM_READONLY
const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
struct MIMGDimInfo {
MIMGDim Dim;
uint8_t NumCoords;
uint8_t NumGradients;
bool DA;
};
LLVM_READONLY
const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);
LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);

View File

@ -0,0 +1,34 @@
; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; GCN-LABEL: {{^}}getlod_1d:
; GCN: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @getlod_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %r
}
; GCN-LABEL: {{^}}getlod_2d:
; GCN: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3{{$}}
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <2 x float> @getlod_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%r = call <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <2 x float> %r
}
; GCN-LABEL: {{^}}adjust_writemask_getlod_none_enabled:
; GCN-NOT: image
define amdgpu_ps <4 x float> @adjust_writemask_getlod_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %r
}
declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
attributes #0 = { nounwind readnone }