2019-05-02 00:32:58 +08:00
|
|
|
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_cube:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
|
|
|
|
; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_2darray:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
|
|
|
|
; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_cl_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_cl_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_b_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_b_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_b_cl_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_l_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_l_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_lz_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_lz_2d:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
|
|
|
; GFX10: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_2d_dmask_2:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2{{$}}
|
|
|
|
; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_2d_dmask_4:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4{{$}}
|
|
|
|
; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_2d_dmask_8:
|
2019-05-02 00:32:58 +08:00
|
|
|
; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8{{$}}
|
|
|
|
; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D ;
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
|
|
|
|
attributes #0 = { nounwind }
|
|
|
|
attributes #1 = { nounwind readonly }
|
|
|
|
attributes #2 = { nounwind readnone }
|