llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

1258 lines
46 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s
define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_mov_b64 s[12:13], exec
; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0
; GPRIDX-NEXT: s_mov_b32 m0, s14
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0
; GPRIDX-NEXT: s_movrels_b32 s14, s4
; GPRIDX-NEXT: v_mov_b32_e32 v1, s14
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB0_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[12:13]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8f32_const_s_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b32 s4, 1.0
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_mov_b64 s[12:13], exec
; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s14, v0
; MOVREL-NEXT: s_mov_b32 m0, s14
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0
; MOVREL-NEXT: s_movrels_b32 s14, s4
; MOVREL-NEXT: v_mov_b32_e32 v1, s14
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB0_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[12:13]
; MOVREL-NEXT: v_mov_b32_e32 v0, v1
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
ret float %ext
}
define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
; GPRIDX-NEXT: s_mov_b32 m0, s2
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_movrels_b32 s0, s4
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s4, 1.0
; MOVREL-NEXT: s_mov_b32 m0, s2
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_movrels_b32 s0, s4
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: ; return to shader part epilog
entry:
%ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
ret float %ext
}
define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_s_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b64 s[8:9], exec
; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0
; GPRIDX-NEXT: s_mov_b32 m0, s10
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0
; GPRIDX-NEXT: s_movrels_b32 s10, s0
; GPRIDX-NEXT: v_mov_b32_e32 v1, s10
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB2_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[8:9]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f32_s_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b64 s[8:9], exec
; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s10, v0
; MOVREL-NEXT: s_mov_b32 m0, s10
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0
; MOVREL-NEXT: s_movrels_b32 s10, s0
; MOVREL-NEXT: v_mov_b32_e32 v1, s10
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB2_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[8:9]
; MOVREL-NEXT: v_mov_b32_e32 v0, v1
; MOVREL-NEXT: ; return to shader part epilog
entry:
%ext = extractelement <8 x float> %vec, i32 %sel
ret float %ext
}
define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_v_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB3_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8f32_v_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
; MOVREL-NEXT: s_mov_b32 m0, s6
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; MOVREL-NEXT: v_movrels_b32_e32 v9, v0
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB3_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <8 x float> %vec, i32 %sel
ret float %ext
}
define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f32_v_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 m0, s2
; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
; MOVREL-NEXT: ; return to shader part epilog
entry:
%ext = extractelement <8 x float> %vec, i32 %sel
ret float %ext
}
define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 m0, s10
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_movrels_b32 s0, s0
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f32_s_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 m0, s10
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_movrels_b32 s0, s0
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: ; return to shader part epilog
entry:
%ext = extractelement <8 x float> %vec, i32 %sel
ret float %ext
}
define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
; GPRIDX-NEXT: s_mov_b64 s[20:21], exec
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0
; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
; GPRIDX-NEXT: s_movrels_b32 s22, s4
; GPRIDX-NEXT: s_movrels_b32 s23, s5
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB6_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[20:21]
; GPRIDX-NEXT: v_mov_b32_e32 v0, s22
; GPRIDX-NEXT: v_mov_b32_e32 v1, s23
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8i64_const_s_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], 1
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
; MOVREL-NEXT: s_mov_b64 s[20:21], exec
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s22, v0
; MOVREL-NEXT: s_lshl_b32 m0, s22, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
; MOVREL-NEXT: s_movrels_b32 s22, s4
; MOVREL-NEXT: s_movrels_b32 s23, s5
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB6_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[20:21]
; MOVREL-NEXT: v_mov_b32_e32 v0, s22
; MOVREL-NEXT: v_mov_b32_e32 v1, s23
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
ret i64 %ext
}
define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
; GPRIDX-NEXT: s_mov_b32 m0, s2
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b64 s[4:5], 1
; MOVREL-NEXT: s_mov_b32 m0, s2
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; MOVREL-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
store i64 %ext, i64 addrspace(1)* undef
ret void
}
define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_mov_b64 s[16:17], exec
; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0
; GPRIDX-NEXT: s_lshl_b32 m0, s18, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
; GPRIDX-NEXT: s_movrels_b32 s18, s0
; GPRIDX-NEXT: s_movrels_b32 s19, s1
; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB8_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[16:17]
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8i64_s_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_mov_b64 s[16:17], exec
; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s18, v0
; MOVREL-NEXT: s_lshl_b32 m0, s18, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
; MOVREL-NEXT: s_movrels_b32 s18, s0
; MOVREL-NEXT: s_movrels_b32 s19, s1
; MOVREL-NEXT: v_mov_b32_e32 v1, s18
; MOVREL-NEXT: v_mov_b32_e32 v2, s19
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB8_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[16:17]
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2]
; MOVREL-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i64> %vec, i32 %sel
store i64 %ext, i64 addrspace(1)* undef
ret void
}
define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_v_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB9_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8i64_v_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB9_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <8 x i64> %vec, i32 %sel
ret i64 %ext
}
define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8i64_v_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
; MOVREL-NEXT: v_movrels_b32_e32 v17, v1
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
; MOVREL-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i64> %vec, i32 %sel
store i64 %ext, i64 addrspace(1)* undef
ret void
}
define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8i64_s_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; MOVREL-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i64> %vec, i32 %sel
store i64 %ext, i64 addrspace(1)* undef
ret void
}
define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 m0, s10
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_movrels_b32 s0, s3
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 m0, s10
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_movrels_b32 s0, s3
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 3
%ext = extractelement <8 x float> %vec, i32 %add
ret float %ext
}
define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v9, v3
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB13_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
; MOVREL-NEXT: s_mov_b32 m0, s6
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; MOVREL-NEXT: v_movrels_b32_e32 v9, v3
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB13_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%add = add i32 %sel, 3
%ext = extractelement <8 x float> %vec, i32 %add
ret float %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 1
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 2
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 3
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 4
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 5
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 6
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_nop 0
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 7
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_add_i32 m0, s18, -1
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_add_i32 m0, s18, -1
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, -1
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
; GPRIDX-NEXT: s_add_i32 s7, s6, 3
; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB22_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; MOVREL-NEXT: s_add_i32 s6, s6, 3
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB22_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%add = add i32 %sel, 3
%ext = extractelement <8 x double> %vec, i32 %add
ret double %ext
}
define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
; GPRIDX-LABEL: dyn_extract_v8p3_v_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB23_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8p3_v_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
; MOVREL-NEXT: s_mov_b32 m0, s6
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; MOVREL-NEXT: v_movrels_b32_e32 v9, v0
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB23_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
ret i8 addrspace(3)* %ext
}
define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 m0, s10
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_movrels_b32 s0, s0
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: ds_write_b32 v0, v0
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8p3_s_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 m0, s10
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_movrels_b32 s0, s0
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: s_mov_b32 m0, -1
; MOVREL-NEXT: ds_write_b32 v0, v0
; MOVREL-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
ret void
}
define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
; GPRIDX-LABEL: dyn_extract_v8p1_v_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB25_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8p1_v_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB25_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, v17
; MOVREL-NEXT: v_mov_b32_e32 v1, v18
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
ret i8 addrspace(1)* %ext
}
define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8p1_s_s:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; MOVREL-NEXT: s_endpgm
entry:
%ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
ret void
}