AMDGPU/SI: Implement amdgcn image intrinsics with sampler
Summary:
This patch define and implement amdgcn image intrinsics with sampler.
1. define vdata type to be llvm_anyfloat_ty, address type to be llvm_anyfloat_ty,
and rsrc type to be llvm_anyint_ty. As a result, we expect the intrinsics name
to have three suffixes to overload each of these three types;
2. D128 as well as two other flags are implied in the three types, for example,
if you use v8i32 as resource type, then r128 is 0!
3. don't expose TFE flag, and other flags are exposed in the instruction order:
unrm, glc, slc, lwe and da.
Differential Revision: http://reviews.llvm.org/D22838
Reviewed by:
arsenm and tstellarAMD
llvm-svn: 278291
2016-08-11 05:15:30 +08:00
|
|
|
; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
|
|
|
|
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}getlod:
|
|
|
|
; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da
|
2017-12-09 04:00:57 +08:00
|
|
|
; GCN: s_waitcnt vmcnt(0)
|
|
|
|
; GCN: store_dwordx4
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @getlod(<4 x float> addrspace(1)* %out) {
|
AMDGPU/SI: Implement amdgcn image intrinsics with sampler
Summary:
This patch define and implement amdgcn image intrinsics with sampler.
1. define vdata type to be llvm_anyfloat_ty, address type to be llvm_anyfloat_ty,
and rsrc type to be llvm_anyint_ty. As a result, we expect the intrinsics name
to have three suffixes to overload each of these three types;
2. D128 as well as two other flags are implied in the three types, for example,
if you use v8i32 as resource type, then r128 is 0!
3. don't expose TFE flag, and other flags are exposed in the instruction order:
unrm, glc, slc, lwe and da.
Differential Revision: http://reviews.llvm.org/D22838
Reviewed by:
arsenm and tstellarAMD
llvm-svn: 278291
2016-08-11 05:15:30 +08:00
|
|
|
main_body:
|
|
|
|
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1)
|
|
|
|
store <4 x float> %r, <4 x float> addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}getlod_v2:
|
|
|
|
; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da
|
2017-12-09 04:00:57 +08:00
|
|
|
; GCN: s_waitcnt vmcnt(0)
|
|
|
|
; GCN: store_dwordx4
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @getlod_v2(<4 x float> addrspace(1)* %out) {
|
AMDGPU/SI: Implement amdgcn image intrinsics with sampler
Summary:
This patch define and implement amdgcn image intrinsics with sampler.
1. define vdata type to be llvm_anyfloat_ty, address type to be llvm_anyfloat_ty,
and rsrc type to be llvm_anyint_ty. As a result, we expect the intrinsics name
to have three suffixes to overload each of these three types;
2. D128 as well as two other flags are implied in the three types, for example,
if you use v8i32 as resource type, then r128 is 0!
3. don't expose TFE flag, and other flags are exposed in the instruction order:
unrm, glc, slc, lwe and da.
Differential Revision: http://reviews.llvm.org/D22838
Reviewed by:
arsenm and tstellarAMD
llvm-svn: 278291
2016-08-11 05:15:30 +08:00
|
|
|
main_body:
|
|
|
|
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1)
|
|
|
|
store <4 x float> %r, <4 x float> addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}getlod_v4:
|
|
|
|
; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da
|
2017-12-09 04:00:57 +08:00
|
|
|
; GCN: s_waitcnt vmcnt(0)
|
|
|
|
; GCN: store_dwordx4
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @getlod_v4(<4 x float> addrspace(1)* %out) {
|
AMDGPU/SI: Implement amdgcn image intrinsics with sampler
Summary:
This patch define and implement amdgcn image intrinsics with sampler.
1. define vdata type to be llvm_anyfloat_ty, address type to be llvm_anyfloat_ty,
and rsrc type to be llvm_anyint_ty. As a result, we expect the intrinsics name
to have three suffixes to overload each of these three types;
2. D128 as well as two other flags are implied in the three types, for example,
if you use v8i32 as resource type, then r128 is 0!
3. don't expose TFE flag, and other flags are exposed in the instruction order:
unrm, glc, slc, lwe and da.
Differential Revision: http://reviews.llvm.org/D22838
Reviewed by:
arsenm and tstellarAMD
llvm-svn: 278291
2016-08-11 05:15:30 +08:00
|
|
|
main_body:
|
|
|
|
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1)
|
|
|
|
store <4 x float> %r, <4 x float> addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-03-22 00:32:17 +08:00
|
|
|
; GCN-LABEL: {{^}}adjust_writemask_getlod_none_enabled:
|
|
|
|
; GCN-NOT: image
|
|
|
|
; GCN-NOT: store
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @adjust_writemask_getlod_none_enabled(float addrspace(1)* %out) {
|
2017-03-22 00:32:17 +08:00
|
|
|
main_body:
|
|
|
|
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)
|
|
|
|
%elt0 = extractelement <4 x float> %r, i32 0
|
|
|
|
store float %elt0, float addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
AMDGPU/SI: Implement amdgcn image intrinsics with sampler
Summary:
This patch define and implement amdgcn image intrinsics with sampler.
1. define vdata type to be llvm_anyfloat_ty, address type to be llvm_anyfloat_ty,
and rsrc type to be llvm_anyint_ty. As a result, we expect the intrinsics name
to have three suffixes to overload each of these three types;
2. D128 as well as two other flags are implied in the three types, for example,
if you use v8i32 as resource type, then r128 is 0!
3. don't expose TFE flag, and other flags are exposed in the instruction order:
unrm, glc, slc, lwe and da.
Differential Revision: http://reviews.llvm.org/D22838
Reviewed by:
arsenm and tstellarAMD
llvm-svn: 278291
2016-08-11 05:15:30 +08:00
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
|
|
|
|
|
|
|
|
|
|
|
|
attributes #0 = { nounwind readnone }
|