forked from OSchip/llvm-project
R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 190575
This commit is contained in:
parent
7f6fa4c4c5
commit
afcf12f33a
|
@ -19,7 +19,8 @@ def CC_SI : CallingConv<[
|
|||
|
||||
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
|
||||
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
|
||||
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
|
||||
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
|
||||
SGPR16
|
||||
]>>>,
|
||||
|
||||
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
|
||||
|
|
|
@ -724,5 +724,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(SAMPLED)
|
||||
NODE_NAME_CASE(SAMPLEL)
|
||||
NODE_NAME_CASE(STORE_MSKOR)
|
||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -160,6 +160,7 @@ enum {
|
|||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
STORE_MSKOR,
|
||||
LOAD_CONSTANT,
|
||||
TBUFFER_STORE_FORMAT,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
|
|
@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
|
||||
|
@ -463,6 +465,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
Op.getOperand(3));
|
||||
}
|
||||
}
|
||||
|
||||
case ISD::INTRINSIC_VOID:
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case AMDGPUIntrinsic::SI_tbuffer_store: {
|
||||
SDLoc DL(Op);
|
||||
SDValue Ops [] = {
|
||||
Chain,
|
||||
ResourceDescriptorToi128(Op.getOperand(2), DAG),
|
||||
Op.getOperand(3),
|
||||
Op.getOperand(4),
|
||||
Op.getOperand(5),
|
||||
Op.getOperand(6),
|
||||
Op.getOperand(7),
|
||||
Op.getOperand(8),
|
||||
Op.getOperand(9),
|
||||
Op.getOperand(10),
|
||||
Op.getOperand(11),
|
||||
Op.getOperand(12),
|
||||
Op.getOperand(13),
|
||||
Op.getOperand(14)
|
||||
};
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore,
|
||||
VT.getSizeInBits() / 8, 4);
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
|
||||
Op->getVTList(), Ops,
|
||||
sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
|
|
@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
|
|||
[SDNPMayLoad, SDNPMemOperand]
|
||||
>;
|
||||
|
||||
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
|
||||
SDTypeProfile<0, 13,
|
||||
[SDTCisVT<0, i128>, // rsrc(SGPR)
|
||||
SDTCisVT<1, iAny>, // vdata(VGPR)
|
||||
SDTCisVT<2, i32>, // num_channels(imm)
|
||||
SDTCisVT<3, i32>, // vaddr(VGPR)
|
||||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // inst_offset(imm)
|
||||
SDTCisVT<6, i32>, // dfmt(imm)
|
||||
SDTCisVT<7, i32>, // nfmt(imm)
|
||||
SDTCisVT<8, i32>, // offen(imm)
|
||||
SDTCisVT<9, i32>, // idxen(imm)
|
||||
SDTCisVT<10, i32>, // glc(imm)
|
||||
SDTCisVT<11, i32>, // slc(imm)
|
||||
SDTCisVT<12, i32> // tfe(imm)
|
||||
]>,
|
||||
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
||||
>;
|
||||
|
||||
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
|
||||
SDTCisVT<3, i32>]>
|
||||
|
@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf <
|
|||
}]>
|
||||
>;
|
||||
|
||||
def as_i1imm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
|
||||
}]>;
|
||||
|
||||
def as_i8imm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
|
||||
}]>;
|
||||
|
||||
def as_i16imm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
|
||||
}]>;
|
||||
|
|
|
@ -477,10 +477,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
|
|||
//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
|
||||
//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
|
||||
def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
|
||||
//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
|
||||
//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
|
||||
//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
|
||||
//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
|
||||
def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
|
||||
def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
|
||||
def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
|
||||
def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
|
||||
|
||||
let mayLoad = 1 in {
|
||||
|
||||
|
@ -1881,6 +1881,27 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
|
|||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
|
||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// TBUFFER_STORE_FORMAT_*, addr64=0
|
||||
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
|
||||
(SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
|
||||
i32:$soffset, imm:$inst_offset, imm:$dfmt,
|
||||
imm:$nfmt, imm:$offen, imm:$idxen,
|
||||
imm:$glc, imm:$slc, imm:$tfe),
|
||||
(opcode
|
||||
$vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
|
||||
(as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
|
||||
(as_i1imm $slc), (as_i1imm $tfe), $soffset)
|
||||
>;
|
||||
|
||||
def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
|
||||
def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
|
||||
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
|
||||
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
|
||||
|
||||
/********** ====================== **********/
|
||||
/********** Indirect adressing **********/
|
||||
/********** ====================== **********/
|
||||
|
|
|
@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
|||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
|
||||
|
||||
// Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
|
||||
def int_SI_tbuffer_store : Intrinsic <
|
||||
[],
|
||||
[llvm_anyint_ty, // rsrc(SGPR)
|
||||
llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
|
||||
llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
|
||||
llvm_i32_ty, // vaddr(VGPR)
|
||||
llvm_i32_ty, // soffset(SGPR)
|
||||
llvm_i32_ty, // inst_offset(imm)
|
||||
llvm_i32_ty, // dfmt(imm)
|
||||
llvm_i32_ty, // nfmt(imm)
|
||||
llvm_i32_ty, // offen(imm)
|
||||
llvm_i32_ty, // idxen(imm)
|
||||
llvm_i32_ty, // glc(imm)
|
||||
llvm_i32_ty, // slc(imm)
|
||||
llvm_i32_ty], // tfe(imm)
|
||||
[]>;
|
||||
|
||||
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_SI_sample : Sample;
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||
|
||||
;CHECK_LABEL: @test1
|
||||
;CHECK: TBUFFER_STORE_FORMAT_XYZW {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 32, -1, 0, -1, 0, 14, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
|
||||
define void @test1(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK_LABEL: @test2
|
||||
;CHECK: TBUFFER_STORE_FORMAT_XYZ {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 24, -1, 0, -1, 0, 13, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
|
||||
define void @test2(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK_LABEL: @test3
|
||||
;CHECK: TBUFFER_STORE_FORMAT_XY {{VGPR[0-9]+_VGPR[0-9]+}}, 16, -1, 0, -1, 0, 11, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
|
||||
define void @test3(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
|
||||
i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK_LABEL: @test4
|
||||
;CHECK: TBUFFER_STORE_FORMAT_X {{VGPR[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
|
||||
define void @test4(i32 %vdata, i32 %vaddr) {
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
|
||||
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
Loading…
Reference in New Issue