forked from OSchip/llvm-project
AMDGPU/SI: Support data types other than V4f32 in image intrinsics
Summary: Extend image intrinsics to support data types of V1F32 and V2F32. TODO: we should define a mapping table to change the opcode for data type of V2F32 but just one channel is active, even though such case should be very rare. Reviewers: tstellarAMD Differential Revision: http://reviews.llvm.org/D26472 llvm-svn: 286860
This commit is contained in:
parent
41c52889b9
commit
8236fe103f
|
@ -368,9 +368,9 @@ multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
|
|||
// 1. Handle half data type like v4f16, and add D16 bit support;
|
||||
// 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
|
||||
// 3. Add A16 support when we pass address of half type.
|
||||
multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType vt> {
|
||||
multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
def : Pat<
|
||||
(v4f32 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
|
||||
(dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
|
||||
i1:$slc, i1:$lwe, i1:$da)),
|
||||
(opcode $addr, $rsrc, $sampler,
|
||||
(as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
|
||||
|
@ -378,12 +378,19 @@ multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType vt
|
|||
>;
|
||||
}
|
||||
|
||||
multiclass AMDGCNSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32>;
|
||||
}
|
||||
|
||||
// TODO: support v3f32.
|
||||
multiclass AMDGCNSamplePatterns<SDPatternOperator name, string opcode> {
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4_V1), f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4_V2), v2f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4_V4), v4f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4_V8), v8f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4_V16), v16f32>;
|
||||
defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
}
|
||||
|
||||
// Image only
|
||||
|
@ -401,9 +408,9 @@ multiclass ImagePatterns<SDPatternOperator name, string opcode> {
|
|||
def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
|
||||
}
|
||||
|
||||
multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType vt> {
|
||||
multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
def : Pat <
|
||||
(v4f32 (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
|
||||
(dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
|
||||
i1:$da)),
|
||||
(opcode $addr, $rsrc,
|
||||
(as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
|
||||
|
@ -411,15 +418,22 @@ multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType vt> {
|
|||
>;
|
||||
}
|
||||
|
||||
multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
|
||||
multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>;
|
||||
}
|
||||
|
||||
multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType vt> {
|
||||
// TODO: support v3f32.
|
||||
multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
}
|
||||
|
||||
multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
def : Pat <
|
||||
(name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
|
||||
(name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
|
||||
i1:$lwe, i1:$da),
|
||||
(opcode $data, $addr, $rsrc,
|
||||
(as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
|
||||
|
@ -427,10 +441,17 @@ multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType vt>
|
|||
>;
|
||||
}
|
||||
|
||||
multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>;
|
||||
}
|
||||
|
||||
// TODO: support v3f32.
|
||||
multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
}
|
||||
|
||||
class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
|
||||
|
@ -558,7 +579,7 @@ def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
|
|||
// Image load
|
||||
defm : ImageLoadPatterns<int_amdgcn_image_load, "IMAGE_LOAD">;
|
||||
defm : ImageLoadPatterns<int_amdgcn_image_load_mip, "IMAGE_LOAD_MIP">;
|
||||
defm : ImageLoadPattern<int_amdgcn_image_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
|
||||
defm : ImageLoadPatterns<int_amdgcn_image_getresinfo, "IMAGE_GET_RESINFO">;
|
||||
|
||||
// Image store
|
||||
defm : ImageStorePatterns<int_amdgcn_image_store, "IMAGE_STORE">;
|
||||
|
@ -613,49 +634,35 @@ defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_o, "IMAGE_SAMPLE_C_C
|
|||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
|
||||
|
||||
// Gather opcodes
|
||||
// Only the variants which make sense are defined.
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4, IMAGE_GATHER4_V4_V2, v2f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4, IMAGE_GATHER4_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_l, IMAGE_GATHER4_L_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_b, IMAGE_GATHER4_B_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4, "IMAGE_GATHER4">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl, "IMAGE_GATHER4_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l, "IMAGE_GATHER4_L">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b, "IMAGE_GATHER4_B">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl, "IMAGE_GATHER4_B_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz, "IMAGE_GATHER4_LZ">;
|
||||
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c, IMAGE_GATHER4_C_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c, "IMAGE_GATHER4_C">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl, "IMAGE_GATHER4_C_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l, "IMAGE_GATHER4_C_L">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b, "IMAGE_GATHER4_C_B">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz, "IMAGE_GATHER4_C_LZ">;
|
||||
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_o, IMAGE_GATHER4_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_o, "IMAGE_GATHER4_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl_o, "IMAGE_GATHER4_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l_o, "IMAGE_GATHER4_L_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_o, "IMAGE_GATHER4_B_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz_o, "IMAGE_GATHER4_LZ_O">;
|
||||
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8f32>;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_o, "IMAGE_GATHER4_C_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">;
|
||||
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_getlod, IMAGE_GET_LOD_V4_V1, f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_getlod, IMAGE_GET_LOD_V4_V2, v2f32>;
|
||||
defm : AMDGCNSamplePattern<int_amdgcn_image_getlod, IMAGE_GET_LOD_V4_V4, v4f32>;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">;
|
||||
|
||||
// Image atomics
|
||||
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">;
|
||||
|
|
|
@ -4047,13 +4047,16 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
|
|||
|
||||
if (TII->isMIMG(MI)) {
|
||||
unsigned VReg = MI.getOperand(0).getReg();
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(VReg);
|
||||
// TODO: Need mapping tables to handle other cases (register classes).
|
||||
if (RC != &AMDGPU::VReg_128RegClass)
|
||||
return;
|
||||
|
||||
unsigned DmaskIdx = MI.getNumOperands() == 12 ? 3 : 4;
|
||||
unsigned Writemask = MI.getOperand(DmaskIdx).getImm();
|
||||
unsigned BitsSet = 0;
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
BitsSet += Writemask & (1 << i) ? 1 : 0;
|
||||
|
||||
const TargetRegisterClass *RC;
|
||||
switch (BitsSet) {
|
||||
default: return;
|
||||
case 1: RC = &AMDGPU::VGPR_32RegClass; break;
|
||||
|
|
|
@ -320,6 +320,23 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_f32:
|
||||
; GCN: image_gather4 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
|
||||
define void @gather4_f32(float addrspace(1)* %out) {
|
||||
main_body:
|
||||
%r = call float @llvm.amdgcn.image.gather4.f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
|
||||
store float %r, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_v2f32:
|
||||
; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
|
||||
define void @gather4_v2f32(<2 x float> addrspace(1)* %out) {
|
||||
main_body:
|
||||
%r = call <2 x float> @llvm.amdgcn.image.gather4.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 0, i1 0, i1 0, i1 0, i1 1)
|
||||
store <2 x float> %r, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
|
@ -360,5 +377,7 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v8f32.v8i32(<8 x f
|
|||
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
|
||||
declare float @llvm.amdgcn.image.gather4.f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
declare <2 x float> @llvm.amdgcn.image.gather4.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
|
|
@ -48,6 +48,25 @@ main_body:
|
|||
ret float %elt
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_f32_v2i32:
|
||||
;CHECK: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
|
||||
main_body:
|
||||
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
|
||||
ret float %tex
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_v2f32_v4i32:
|
||||
;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
|
||||
main_body:
|
||||
%tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
|
||||
ret <2 x float> %tex
|
||||
}
|
||||
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_v4i32:
|
||||
;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
|
||||
|
@ -72,6 +91,22 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_f32_i32:
|
||||
;CHECK: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_v2f32_v4i32:
|
||||
;CHECK: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_mip:
|
||||
;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
|
||||
|
@ -93,7 +128,6 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Ideally, the register allocator would avoid the wait here
|
||||
;
|
||||
;CHECK-LABEL: {{^}}image_store_wait:
|
||||
|
@ -110,6 +144,13 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
|
|
|
@ -181,6 +181,23 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_f32:
|
||||
; GCN: image_sample {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1
|
||||
define void @sample_f32(float addrspace(1)* %out) {
|
||||
main_body:
|
||||
%r = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
store float %r, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_v2f32:
|
||||
; GCN: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3
|
||||
define void @sample_v2f32(<2 x float> addrspace(1)* %out) {
|
||||
main_body:
|
||||
%r = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
store <2 x float> %r, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
|
@ -204,5 +221,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>
|
|||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
|
||||
declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
|
Loading…
Reference in New Issue