[AMDGPU] Optimize _L image intrinsic to _LZ when lod is zero
Summary:
Add _L to _LZ image intrinsic table mapping to table gen.
In ISelLowering check if image intrinsic has lod and if it's equal
to zero, if so remove lod and change opcode to equivalent mapped _LZ.
Change-Id: Ie24cd7e788e2195d846c7bd256151178cbb9ec71
Subscribers: arsenm, mehdi_amini, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, steven_wu, dexonsmith, llvm-commits
Differential Revision: https://reviews.llvm.org/D49483
llvm-svn: 338523
2018-08-01 20:12:01 +08:00
|
|
|
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
|
|
|
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_l_1d:
|
|
|
|
; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_l_2d:
|
|
|
|
; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_c_l_1d:
|
|
|
|
; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_c_l_2d:
|
2020-01-29 21:04:56 +08:00
|
|
|
; GCN: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
|
[AMDGPU] Optimize _L image intrinsic to _LZ when lod is zero
Summary:
Add _L to _LZ image intrinsic table mapping to table gen.
In ISelLowering check if image intrinsic has lod and if it's equal
to zero, if so remove lod and change opcode to equivalent mapped _LZ.
Change-Id: Ie24cd7e788e2195d846c7bd256151178cbb9ec71
Subscribers: arsenm, mehdi_amini, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, steven_wu, dexonsmith, llvm-commits
Differential Revision: https://reviews.llvm.org/D49483
llvm-svn: 338523
2018-08-01 20:12:01 +08:00
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_l_o_1d:
|
|
|
|
; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_l_o_2d:
|
2020-01-29 21:04:56 +08:00
|
|
|
; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
|
[AMDGPU] Optimize _L image intrinsic to _LZ when lod is zero
Summary:
Add _L to _LZ image intrinsic table mapping to table gen.
In ISelLowering check if image intrinsic has lod and if it's equal
to zero, if so remove lod and change opcode to equivalent mapped _LZ.
Change-Id: Ie24cd7e788e2195d846c7bd256151178cbb9ec71
Subscribers: arsenm, mehdi_amini, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, steven_wu, dexonsmith, llvm-commits
Differential Revision: https://reviews.llvm.org/D49483
llvm-svn: 338523
2018-08-01 20:12:01 +08:00
|
|
|
define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_c_l_o_1d:
|
2020-01-29 21:04:56 +08:00
|
|
|
; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
|
[AMDGPU] Optimize _L image intrinsic to _LZ when lod is zero
Summary:
Add _L to _LZ image intrinsic table mapping to table gen.
In ISelLowering check if image intrinsic has lod and if it's equal
to zero, if so remove lod and change opcode to equivalent mapped _LZ.
Change-Id: Ie24cd7e788e2195d846c7bd256151178cbb9ec71
Subscribers: arsenm, mehdi_amini, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, steven_wu, dexonsmith, llvm-commits
Differential Revision: https://reviews.llvm.org/D49483
llvm-svn: 338523
2018-08-01 20:12:01 +08:00
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}sample_c_l_o_2d:
|
|
|
|
; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_l_2d:
|
|
|
|
; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_l_2d:
|
2020-01-29 21:04:56 +08:00
|
|
|
; GCN: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
|
[AMDGPU] Optimize _L image intrinsic to _LZ when lod is zero
Summary:
Add _L to _LZ image intrinsic table mapping to table gen.
In ISelLowering check if image intrinsic has lod and if it's equal
to zero, if so remove lod and change opcode to equivalent mapped _LZ.
Change-Id: Ie24cd7e788e2195d846c7bd256151178cbb9ec71
Subscribers: arsenm, mehdi_amini, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, steven_wu, dexonsmith, llvm-commits
Differential Revision: https://reviews.llvm.org/D49483
llvm-svn: 338523
2018-08-01 20:12:01 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_l_o_2d:
|
2020-01-29 21:04:56 +08:00
|
|
|
; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
|
[AMDGPU] Optimize _L image intrinsic to _LZ when lod is zero
Summary:
Add _L to _LZ image intrinsic table mapping to table gen.
In ISelLowering check if image intrinsic has lod and if it's equal
to zero, if so remove lod and change opcode to equivalent mapped _LZ.
Change-Id: Ie24cd7e788e2195d846c7bd256151178cbb9ec71
Subscribers: arsenm, mehdi_amini, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, steven_wu, dexonsmith, llvm-commits
Differential Revision: https://reviews.llvm.org/D49483
llvm-svn: 338523
2018-08-01 20:12:01 +08:00
|
|
|
define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}gather4_c_l_o_2d:
|
|
|
|
; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
|
|
|
define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
|
|
|
|
main_body:
|
|
|
|
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
|
|
ret <4 x float> %v
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|