forked from OSchip/llvm-project
1258 lines
46 KiB
LLVM
1258 lines
46 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s
|
|
|
|
define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
|
|
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
|
|
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
|
|
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
|
|
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
|
|
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
|
|
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
|
|
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
|
|
; GPRIDX-NEXT: s_mov_b64 s[12:13], exec
|
|
; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s14
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0
|
|
; GPRIDX-NEXT: s_movrels_b32 s14, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s14
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB0_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[12:13]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_const_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b32 s4, 1.0
|
|
; MOVREL-NEXT: s_mov_b32 s5, 2.0
|
|
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
|
|
; MOVREL-NEXT: s_mov_b32 s7, 4.0
|
|
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
|
|
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
|
|
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
|
|
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
|
|
; MOVREL-NEXT: s_mov_b64 s[12:13], exec
|
|
; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s14, v0
|
|
; MOVREL-NEXT: s_mov_b32 m0, s14
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0
|
|
; MOVREL-NEXT: s_movrels_b32 s14, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s14
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB0_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[12:13]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v1
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
|
|
ret float %ext
|
|
}
|
|
|
|
define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
|
|
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
|
|
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
|
|
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
|
|
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
|
|
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
|
|
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
|
|
; GPRIDX-NEXT: s_movrels_b32 s0, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s4, 1.0
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s5, 2.0
|
|
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
|
|
; MOVREL-NEXT: s_mov_b32 s7, 4.0
|
|
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
|
|
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
|
|
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
|
|
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
|
|
; MOVREL-NEXT: s_movrels_b32 s0, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
|
|
ret float %ext
|
|
}
|
|
|
|
define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b64 s[8:9], exec
|
|
; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s10
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0
|
|
; GPRIDX-NEXT: s_movrels_b32 s10, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s10
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB2_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[8:9]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b64 s[8:9], exec
|
|
; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s10, v0
|
|
; MOVREL-NEXT: s_mov_b32 m0, s10
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0
|
|
; MOVREL-NEXT: s_movrels_b32 s10, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s10
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB2_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[8:9]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v1
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%ext = extractelement <8 x float> %vec, i32 %sel
|
|
ret float %ext
|
|
}
|
|
|
|
define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB3_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
|
; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
|
|
; MOVREL-NEXT: s_mov_b32 m0, s6
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v9, v0
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB3_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%ext = extractelement <8 x float> %vec, i32 %sel
|
|
ret float %ext
|
|
}
|
|
|
|
define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%ext = extractelement <8 x float> %vec, i32 %sel
|
|
ret float %ext
|
|
}
|
|
|
|
define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_movrels_b32 s0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s10
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_movrels_b32 s0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%ext = extractelement <8 x float> %vec, i32 %sel
|
|
ret float %ext
|
|
}
|
|
|
|
define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
|
|
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
|
|
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
|
|
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
|
|
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
|
|
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
|
|
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
|
|
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
|
|
; GPRIDX-NEXT: s_mov_b64 s[20:21], exec
|
|
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0
|
|
; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
|
|
; GPRIDX-NEXT: s_movrels_b32 s22, s4
|
|
; GPRIDX-NEXT: s_movrels_b32 s23, s5
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB6_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[20:21]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s22
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s23
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8i64_const_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], 1
|
|
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
|
|
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
|
|
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
|
|
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
|
|
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
|
|
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
|
|
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
|
|
; MOVREL-NEXT: s_mov_b64 s[20:21], exec
|
|
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s22, v0
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s22, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
|
|
; MOVREL-NEXT: s_movrels_b32 s22, s4
|
|
; MOVREL-NEXT: s_movrels_b32 s23, s5
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB6_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[20:21]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s22
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s23
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
|
|
ret i64 %ext
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s2
|
|
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
|
|
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
|
|
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
|
|
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
|
|
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
|
|
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
|
|
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], 1
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
|
|
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
|
|
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
|
|
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
|
|
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
|
|
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
|
|
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
|
|
store i64 %ext, i64 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b64 s[16:17], exec
|
|
; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0
|
|
; GPRIDX-NEXT: s_lshl_b32 m0, s18, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
|
|
; GPRIDX-NEXT: s_movrels_b32 s18, s0
|
|
; GPRIDX-NEXT: s_movrels_b32 s19, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB8_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[16:17]
|
|
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8i64_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_mov_b64 s[16:17], exec
|
|
; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s18, v0
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s18, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
|
|
; MOVREL-NEXT: s_movrels_b32 s18, s0
|
|
; MOVREL-NEXT: s_movrels_b32 s19, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s19
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB8_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[16:17]
|
|
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2]
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%ext = extractelement <8 x i64> %vec, i32 %sel
|
|
store i64 %ext, i64 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8i64_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
|
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB9_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8i64_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
|
; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB9_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%ext = extractelement <8 x i64> %vec, i32 %sel
|
|
ret i64 %ext
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8i64_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v17, v1
|
|
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%ext = extractelement <8 x i64> %vec, i32 %sel
|
|
store i64 %ext, i64 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8i64_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%ext = extractelement <8 x i64> %vec, i32 %sel
|
|
store i64 %ext, i64 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_movrels_b32 s0, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 m0, s10
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_movrels_b32 s0, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 3
|
|
%ext = extractelement <8 x float> %vec, i32 %add
|
|
ret float %ext
|
|
}
|
|
|
|
define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v3
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB13_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
|
; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
|
|
; MOVREL-NEXT: s_mov_b32 m0, s6
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v9, v3
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB13_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%add = add i32 %sel, 3
|
|
%ext = extractelement <8 x float> %vec, i32 %add
|
|
ret float %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 1
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 2
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 3
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 4
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 5
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 6
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, 7
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_add_i32 m0, s18, -1
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_add_i32 m0, s18, -1
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%add = add i32 %sel, -1
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
|
|
; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
|
; GPRIDX-NEXT: s_add_i32 s7, s6, 3
|
|
; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB22_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
|
; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
|
; MOVREL-NEXT: s_add_i32 s6, s6, 3
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB22_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%add = add i32 %sel, 3
|
|
%ext = extractelement <8 x double> %vec, i32 %add
|
|
ret double %ext
|
|
}
|
|
|
|
define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_extract_v8p3_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB23_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8p3_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
|
; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
|
|
; MOVREL-NEXT: s_mov_b32 m0, s6
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v9, v0
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB23_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
|
|
ret i8 addrspace(3)* %ext
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_movrels_b32 s0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: ds_write_b32 v0, v0
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8p3_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s10
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_movrels_b32 s0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: s_mov_b32 m0, -1
|
|
; MOVREL-NEXT: ds_write_b32 v0, v0
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
|
|
store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
|
|
ret void
|
|
}
|
|
|
|
define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_extract_v8p1_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
|
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB25_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8p1_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
|
|
; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
|
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
|
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
|
|
; MOVREL-NEXT: s_cbranch_execnz BB25_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
|
|
ret i8 addrspace(1)* %ext
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_extract_v8p1_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 m0, s18
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
|
|
store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|