[AMDGPU][SDag] Add IMG init also for image_gather4 instructions

This fixes an oversight in D99747 which moved the IMG init code from
SIAddIMGInit to AdjustInstrPostInstrSelection, but did not set the
hasPostISelHook flag on gather4 instructions.

Differential Revision: https://reviews.llvm.org/D99953
This commit is contained in:
Jay Foad 2021-04-06 14:31:24 +01:00
parent 7344f3d39a
commit e9608a84d8
3 changed files with 80 additions and 1 deletions

View File

@ -851,7 +851,7 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
Gather4 = 1, hasPostISelHook = 0 in {
Gather4 = 1 in {
let VDataDwords = 2 in
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
let VDataDwords = 4 in

View File

@ -49,6 +49,68 @@ main_body:
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @gather4_2d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX6-LABEL: gather4_2d_tfe:
; GFX6: ; %bb.0: ; %main_body
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: s_mov_b32 s3, s5
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s7
; GFX6-NEXT: s_mov_b32 s6, s8
; GFX6-NEXT: s_mov_b32 s7, s9
; GFX6-NEXT: s_mov_b32 s8, s10
; GFX6-NEXT: s_mov_b32 s9, s11
; GFX6-NEXT: s_mov_b64 s[14:15], exec
; GFX6-NEXT: s_mov_b32 s10, s12
; GFX6-NEXT: s_mov_b32 s11, s13
; GFX6-NEXT: s_wqm_b64 exec, exec
; GFX6-NEXT: v_mov_b32_e32 v5, v0
; GFX6-NEXT: v_mov_b32_e32 v0, 0
; GFX6-NEXT: v_mov_b32_e32 v6, v1
; GFX6-NEXT: v_mov_b32_e32 v1, v0
; GFX6-NEXT: v_mov_b32_e32 v2, v0
; GFX6-NEXT: v_mov_b32_e32 v3, v0
; GFX6-NEXT: v_mov_b32_e32 v4, v0
; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX6-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 tfe
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX10NSA-LABEL: gather4_2d_tfe:
; GFX10NSA: ; %bb.0: ; %main_body
; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10NSA-NEXT: v_mov_b32_e32 v5, v0
; GFX10NSA-NEXT: v_mov_b32_e32 v0, 0
; GFX10NSA-NEXT: v_mov_b32_e32 v6, v1
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s4, s6
; GFX10NSA-NEXT: s_mov_b32 s5, s7
; GFX10NSA-NEXT: s_mov_b32 s6, s8
; GFX10NSA-NEXT: s_mov_b32 s7, s9
; GFX10NSA-NEXT: s_mov_b32 s8, s10
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
; GFX10NSA-NEXT: v_mov_b32_e32 v1, v0
; GFX10NSA-NEXT: v_mov_b32_e32 v2, v0
; GFX10NSA-NEXT: v_mov_b32_e32 v3, v0
; GFX10NSA-NEXT: v_mov_b32_e32 v4, v0
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28
; GFX10NSA-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: ; return to shader part epilog
main_body:
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
%r = extractvalue { <4 x float>, i32 } %v, 0
ret <4 x float> %r
}
define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
; GFX6-LABEL: gather4_cube:
; GFX6: ; %bb.0: ; %main_body
@ -778,6 +840,7 @@ main_body:
}
declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0

View File

@ -11,6 +11,21 @@ main_body:
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_2d_tfe:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
; GCN: v_mov_b32_e32 v2, v0
; GCN: v_mov_b32_e32 v3, v0
; GCN: v_mov_b32_e32 v4, v0
; GFX6789: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 tfe{{$}}
; GFX10: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ;
define amdgpu_ps <4 x float> @gather4_2d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
%r = extractvalue { <4 x float>, i32 } %v, 0
ret <4 x float> %r
}
; GCN-LABEL: {{^}}gather4_cube:
; GFX6789: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da{{$}}
; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ;
@ -156,6 +171,7 @@ main_body:
}
declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1