forked from OSchip/llvm-project
2182 lines
89 KiB
LLVM
2182 lines
89 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s
|
|
; RUN: not llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
|
|
|
|
; FIXME: Need constant bus fixup pre-gfx10 for movrel
|
|
; ERR: Bad machine code: VOP* instruction violates constant bus restriction
|
|
|
|
define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s10
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8i32_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s10
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx
|
|
ret <8 x i32> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s10
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8p3i8_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s10
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
|
|
ret <8 x i8 addrspace(3)*> %insert
|
|
}
|
|
|
|
define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
|
|
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
|
|
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
|
|
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
|
|
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
|
|
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
|
|
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
|
|
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s4
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB2_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v9
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_const_s_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
|
|
; MOVREL-NEXT: s_mov_b32 s4, 1.0
|
|
; MOVREL-NEXT: s_mov_b32 s5, 2.0
|
|
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
|
|
; MOVREL-NEXT: s_mov_b32 s7, 4.0
|
|
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
|
|
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
|
|
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s5, v1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v13
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s5, v1
|
|
; MOVREL-NEXT: s_mov_b32 m0, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v17
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, v0
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB2_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v9
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, s0
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v8
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s10
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB3_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, s0
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v12
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v8
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v16
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, s10
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB3_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s10, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: s_mov_b32 m0, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s3
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, s2
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s0
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB6_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s0
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB6_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s3, v8
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s3, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, s2
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB7_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v16
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v5
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v8
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v1
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v9, s2
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB7_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v16
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8p3i8_v_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
|
|
%cast.0 = ptrtoint <8 x i8 addrspace(3)*> %insert to <8 x i32>
|
|
%cast.1 = bitcast <8 x i32> %cast.0 to <8 x float>
|
|
ret <8 x float> %cast.1
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB10_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB10_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB10_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v5
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v1
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v10, v8
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB10_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s20
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8i64_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 m0, s20
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx
|
|
ret <8 x i64> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s20
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8p1i8_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 m0, s20
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <8 x i8 addrspace(1)*> %vec, i8 addrspace(1)* %val, i32 %idx
|
|
ret <8 x i8 addrspace(1)*> %insert
|
|
}
|
|
|
|
define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GPRIDX-NEXT: s_mov_b32 s8, 0
|
|
; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000
|
|
; GPRIDX-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; GPRIDX-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GPRIDX-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0
|
|
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0
|
|
; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000
|
|
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0
|
|
; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000
|
|
; GPRIDX-NEXT: s_mov_b32 s16, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s18, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, s19
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, s18
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, s17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, s16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, s15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, s14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, s13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, s12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, s11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, s4
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v2
|
|
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v2
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v19
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v20
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v21
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v22
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v23
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v24
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v25
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v26
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v27
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v28
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v29
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v30
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v31
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v32
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v33
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v34
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB13_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GPRIDX-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; GPRIDX-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GPRIDX-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0)
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_const_s_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; MOVREL-NEXT: s_mov_b32 s8, 0
|
|
; MOVREL-NEXT: s_mov_b32 s19, 0x40200000
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0
|
|
; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0
|
|
; MOVREL-NEXT: s_mov_b32 s9, 0x40080000
|
|
; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0
|
|
; MOVREL-NEXT: s_mov_b32 s13, 0x40140000
|
|
; MOVREL-NEXT: s_mov_b32 s12, s8
|
|
; MOVREL-NEXT: s_mov_b32 s15, 0x40180000
|
|
; MOVREL-NEXT: s_mov_b32 s14, s8
|
|
; MOVREL-NEXT: s_mov_b32 s17, 0x401c0000
|
|
; MOVREL-NEXT: s_mov_b32 s16, s8
|
|
; MOVREL-NEXT: s_mov_b32 s18, s8
|
|
; MOVREL-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; MOVREL-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; MOVREL-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, s19
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, s18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, s17
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, s16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, s15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, s14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, s13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, s12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, s11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s5, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v19
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v20
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v21
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v22
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s5, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s5, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v23
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v24
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v25
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v26
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v27
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v28
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v29
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v30
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v31
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v32
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v33
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, v34
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v3, v0
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v4, v1
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB13_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s4
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; MOVREL-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; MOVREL-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; MOVREL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0)
|
|
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, s15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, s14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, s13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, s12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, s11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s0
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB14_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v18
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v19
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v20
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v21
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v22
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v23
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v24
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v25
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v26
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v27
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v28
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v29
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v30
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v31
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v32
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB14_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, s15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, s13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, s11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, s12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, s14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s0
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB14_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v17
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v19
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v20
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v21
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v22
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v23
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v24
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v25
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v26
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v27
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v28
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v29
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v30
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v31
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v32
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, s18
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, s19
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB14_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[1:4], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[5:8], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[13:16], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s0
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s18, 1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s1
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, v0
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v3, v1
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s4, 1
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, s2
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, s3
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, s15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, s14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, s13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, s12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, s11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, s7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, s0
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB17_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v19
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v20
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v21
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v22
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v23
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v24
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v25
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v26
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v27
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v28
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v29
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v30
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v31
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v32
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v33
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v34
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v0
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB17_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, s15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, s13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, s11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, s12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, s14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, s0
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB17_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v19
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v20
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v21
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v22
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v23
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v24
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v25
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v26
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v27
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v28
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v29
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v30
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v31
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v32
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v33
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, v34
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v3, v0
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v4, v1
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB17_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB18_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s4, v16
|
|
; GPRIDX-NEXT: s_lshl_b32 s5, s4, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s4, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s5, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, v9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, v8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s2
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s5, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, s3
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB18_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[17:20], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[21:24], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[25:28], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[29:32], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB18_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v13
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, v8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, v5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, v1
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v17, s2
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v18, s3
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB18_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[17:20], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[21:24], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[25:28], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[29:32], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v16
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, v17
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB20_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, v9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, v8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v17
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB20_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB20_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v13
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, v8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, v5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, v1
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v19, v16
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v20, v17
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB20_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s6
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s5
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v3i32_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s6
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s5
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx
|
|
ret <3 x i32> %insert
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v3
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v3i32_v_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v3
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <3 x float> %vec, float %val, i32 %idx
|
|
ret <3 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s8
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s7
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v5i32_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s8
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s7
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx
|
|
ret <5 x i32> %insert
|
|
}
|
|
|
|
define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v5
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v5i32_v_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v5
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <5 x float> %vec, float %val, i32 %idx
|
|
ret <5 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 s16, s18
|
|
; GPRIDX-NEXT: s_mov_b32 s17, s19
|
|
; GPRIDX-NEXT: s_mov_b32 s18, s20
|
|
; GPRIDX-NEXT: s_mov_b32 s19, s21
|
|
; GPRIDX-NEXT: s_mov_b32 s20, s22
|
|
; GPRIDX-NEXT: s_mov_b32 s21, s23
|
|
; GPRIDX-NEXT: s_mov_b32 s22, s24
|
|
; GPRIDX-NEXT: s_mov_b32 s23, s25
|
|
; GPRIDX-NEXT: s_mov_b32 s24, s26
|
|
; GPRIDX-NEXT: s_mov_b32 s25, s27
|
|
; GPRIDX-NEXT: s_mov_b32 s26, s28
|
|
; GPRIDX-NEXT: s_mov_b32 s27, s29
|
|
; GPRIDX-NEXT: s_mov_b32 s28, s30
|
|
; GPRIDX-NEXT: s_mov_b32 s29, s31
|
|
; GPRIDX-NEXT: s_mov_b32 s30, s32
|
|
; GPRIDX-NEXT: s_mov_b32 s31, s33
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s35
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s34
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v32i32_s_s_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 m0, s35
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_mov_b32 s16, s18
|
|
; MOVREL-NEXT: s_mov_b32 s17, s19
|
|
; MOVREL-NEXT: s_mov_b32 s18, s20
|
|
; MOVREL-NEXT: s_mov_b32 s19, s21
|
|
; MOVREL-NEXT: s_mov_b32 s20, s22
|
|
; MOVREL-NEXT: s_mov_b32 s21, s23
|
|
; MOVREL-NEXT: s_mov_b32 s22, s24
|
|
; MOVREL-NEXT: s_mov_b32 s23, s25
|
|
; MOVREL-NEXT: s_mov_b32 s24, s26
|
|
; MOVREL-NEXT: s_mov_b32 s25, s27
|
|
; MOVREL-NEXT: s_mov_b32 s26, s28
|
|
; MOVREL-NEXT: s_mov_b32 s27, s29
|
|
; MOVREL-NEXT: s_mov_b32 s28, s30
|
|
; MOVREL-NEXT: s_mov_b32 s29, s31
|
|
; MOVREL-NEXT: s_mov_b32 s30, s32
|
|
; MOVREL-NEXT: s_mov_b32 s31, s33
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s34
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx
|
|
ret <32 x i32> %insert
|
|
}
|
|
|
|
define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v32
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v32i32_v_v_s:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v32
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%insert = insertelement <32 x float> %vec, float %val, i32 %idx
|
|
ret <32 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s1, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_movreld_b32 s1, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%idx.add = add i32 %idx, 1
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b32 s7, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_movreld_b32 s7, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%idx.add = add i32 %idx, 7
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB29_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB29_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB29_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v5
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v0
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v11, v8
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB29_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%idx.add = add i32 %idx, 1
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB30_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v8
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB30_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB30_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v4
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v0
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v17, v8
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB30_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
entry:
|
|
%idx.add = add i32 %idx, 7
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
ret <8 x float> %insert
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s20
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: s_movreld_b64 s[2:3], s[18:19]
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s7
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s11
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GPRIDX-NEXT: s_nop 0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s15
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
; MOVREL-NEXT: s_mov_b32 m0, s20
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
; MOVREL-NEXT: s_movreld_b64 s[2:3], s[18:19]
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, s8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, s9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s10
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s13
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s15
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%idx.add = add i32 %idx, 1
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) {
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
; GPRIDX-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18
|
|
; GPRIDX-NEXT: s_add_u32 s3, s2, 1
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s3, 1
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, v15
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, v14
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, v13
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, v12
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, v11
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, v10
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, v9
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, v8
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, v7
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, v6
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, v5
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, v4
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, v3
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, v2
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v1
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v0
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v16
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v17
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB32_1
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
|
; GPRIDX-NEXT: s_endpgm
|
|
;
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
; MOVREL-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, v15
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v0
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, v14
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v13
|
|
; MOVREL-NEXT: s_add_u32 s2, s1, 1
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v12
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v11
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, v10
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, v9
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, v8
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, v7
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, v6
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, v5
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, v4
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, v3
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, v2
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, v1
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v19, v16
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v20, v17
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
; MOVREL-NEXT: s_cbranch_execnz BB32_1
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
|
; MOVREL-NEXT: s_endpgm
|
|
entry:
|
|
%idx.add = add i32 %idx, 1
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
ret void
|
|
}
|