2020-01-04 01:17:56 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
|
|
|
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s
|
2020-02-12 15:01:35 +08:00
|
|
|
; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
|
2020-01-04 01:17:56 +08:00
|
|
|
|
|
|
|
; FIXME: Need constant bus fixup pre-gfx10 for movrel
|
|
|
|
; ERR: Bad machine code: VOP* instruction violates constant bus restriction
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s10
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8i32_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s10
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx
|
|
|
|
ret <8 x i32> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s10
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8p3i8_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s10
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
|
|
|
|
ret <8 x i8 addrspace(3)*> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
|
2020-03-05 06:12:28 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
|
2020-03-06 03:57:45 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
|
|
; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v1
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v17
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB2_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v9
|
|
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_const_s_v_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
|
|
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
|
2020-03-05 06:12:28 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
|
2020-03-06 03:57:45 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, 4.0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, 2.0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, 1.0
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s11
|
2020-03-05 06:12:28 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s10
|
2020-03-06 03:57:45 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s7
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s5, v1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v13
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s5, v1
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v17
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, v0
|
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB2_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v9
|
|
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, s0
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v8
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s10
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB3_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s5
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s3
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, s0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v12
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v0
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v16
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, s10
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB3_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v8
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s10, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s3
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, s2
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s0
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB6_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s3
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s6
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v13
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v17
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, v0
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB6_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
[AMDGPU] Remove dubious logic in bidirectional list scheduler
Summary:
pickNodeBidirectional tried to compare the best top candidate and the
best bottom candidate by examining TopCand.Reason and BotCand.Reason.
This is unsound because, after calling pickNodeFromQueue, Cand.Reason
does not reflect the most important reason why Cand was chosen. Rather
it reflects the most recent reason why it beat some other potential
candidate, which could have been for some low priority tie breaker
reason.
I have seen this cause problems where TopCand is a good candidate, but
because TopCand.Reason is ORDER (which is very low priority) it is
repeatedly ignored in favour of a mediocre BotCand. This is not how
bidirectional scheduling is supposed to work.
To fix this I changed the code to always compare TopCand and BotCand
directly, like the generic implementation of pickNodeBidirectional does.
This removes some uncommented AMDGPU-specific logic; if this logic turns
out to be important then perhaps it could be moved into an override of
tryCandidate instead.
Graphics shader benchmarking on gfx10 shows a lot more positive than
negative effects from this change.
Reviewers: arsenm, tstellar, rampitec, kzhuravl, vpykhtin, dstuttard, tpr, atrick, MatzeB
Subscribers: jvesely, wdng, nhaehnle, yaxunl, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68338
2019-10-07 22:33:59 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v9
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s3, v8
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s3, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, s2
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB7_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v16
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v5
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v1
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v9, s2
|
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB7_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v16
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8p3i8_v_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v8
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
|
|
|
|
%cast.0 = ptrtoint <8 x i8 addrspace(3)*> %insert to <8 x i32>
|
|
|
|
%cast.1 = bitcast <8 x i32> %cast.0 to <8 x float>
|
|
|
|
ret <8 x float> %cast.1
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB10_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v8
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB10_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB10_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v5
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v1
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v10, v8
|
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB10_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s20
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8i64_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s20
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
|
|
; MOVREL-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx
|
|
|
|
ret <8 x i64> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s20
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8p1i8_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s20
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
|
|
; MOVREL-NEXT: s_movreld_b64 s[0:1], s[18:19]
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x i8 addrspace(1)*> %vec, i8 addrspace(1)* %val, i32 %idx
|
|
|
|
ret <8 x i8 addrspace(1)*> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
2020-03-06 03:57:45 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 s18, 0
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000
|
|
|
|
; GPRIDX-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
|
|
; GPRIDX-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
|
|
; GPRIDX-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
|
2020-03-05 06:12:28 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000
|
2020-03-06 03:57:45 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 s16, s18
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s18
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s18
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s18
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, s19
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, s18
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, s17
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, s16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, s15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, s14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, s13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, s12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, s11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, s10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
|
|
|
; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v2
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v2
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(DST)
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v19
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v20
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v21
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v22
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v23
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v24
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v25
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v26
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v27
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v28
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v29
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v30
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v31
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v32
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v33
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v34
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v0
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB13_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
|
|
; GPRIDX-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
|
|
; GPRIDX-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
|
|
; GPRIDX-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
|
|
; GPRIDX-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_const_s_v_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
|
|
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
|
2020-03-06 03:57:45 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s18, 0
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s19, 0x40200000
|
2020-03-05 06:12:28 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s17, 0x401c0000
|
2020-03-06 03:57:45 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, 0x40180000
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, 0x40140000
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s16, s18
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s18
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s18
|
|
|
|
; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, 0x40080000
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s18
|
|
|
|
; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0
|
|
|
|
; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
|
|
; MOVREL-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
|
|
; MOVREL-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, s19
|
2020-03-06 03:57:45 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, s18
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, s17
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, s16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, s15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, s14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, s13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, s12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, s11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, s10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, s7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s5, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v19
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v20
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v21
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v22
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s5, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s5, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v23
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v24
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v25
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v26
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v27
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v28
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v29
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v30
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v31
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v32
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v33
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, v34
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v3, v0
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v4, v1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB13_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s4
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
|
|
; MOVREL-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
|
|
; MOVREL-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
|
|
; MOVREL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
|
|
; MOVREL-NEXT: s_waitcnt vmcnt(0)
|
|
|
|
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
|
|
|
|
; MOVREL-NEXT: s_setpc_b64 s[30:31]
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, s15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, s14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, s13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, s12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, s11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, s10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s0
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB14_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v0
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v18
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v19
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v20
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v21
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v22
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v23
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v24
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v25
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v26
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v27
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v28
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v29
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v30
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v31
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v32
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB14_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, s15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, s13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, s11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, s12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, s14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, s10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, s7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, s3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB14_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v17
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v18
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v19
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v20
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v21
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v22
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v23
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v24
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v25
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v26
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v27
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v28
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v29
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v30
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v31
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v32
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, s18
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, s19
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB14_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[1:4], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[5:8], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[13:16], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, s14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, s13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, s12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, s11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, s10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
|
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, s15
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s0
|
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s18, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, s14
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s12
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, s7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s1
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v2, v0
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v3, v1
|
2020-01-14 06:54:17 +08:00
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[6:9], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[14:17], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s4, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, s2
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, s3
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, s15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, s14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, s13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, s12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, s11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, s10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, s7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, s0
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB17_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v19
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v20
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v21
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v22
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v23
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v8, v24
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v9, v25
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v26
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v27
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v28
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v29
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v30
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v31
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v32
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v33
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v34
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v0
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB17_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, s15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, s13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, s11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, s12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, s14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, s10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, s8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, s7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, s3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, s0
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB17_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v19
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v20
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v21
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v22
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v23
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, v24
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, v25
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v26
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v27
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v28
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v29
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v30
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v31
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v32
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v33
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, v34
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v3, v0
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v4, v1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB17_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB18_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s4, v16
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s5, s4, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s4, v16
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s5, gpr_idx(DST)
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, v9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, v8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, s2
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s5, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v18, s3
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB18_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[17:20], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[21:24], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[25:28], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[29:32], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB18_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v13
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, v8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v18, v1
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v17, s2
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v18, s3
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB18_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[17:20], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[21:24], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[25:28], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[29:32], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v16
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v1, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB20_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, v9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, v8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v16
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB20_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB20_1: ; =>This Inner Loop Header: Depth=1
|
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v18
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v13
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s1, 1
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, v8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, v1
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v19, v16
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v20, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB20_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s6
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s5
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v3i32_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s5
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx
|
|
|
|
ret <3 x i32> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v3
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v3i32_v_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v3
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <3 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <3 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s8
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s7
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v5i32_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s7
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx
|
|
|
|
ret <5 x i32> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v5
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v5i32_v_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v5
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <5 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <5 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s16, s18
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s17, s19
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s18, s20
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s19, s21
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s20, s22
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s21, s23
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s22, s24
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s23, s25
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s24, s26
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s25, s27
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s26, s28
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s27, s29
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s28, s30
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s29, s31
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s30, s32
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s31, s33
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s35
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s0, s34
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v32i32_s_s_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s35
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s16, s18
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s17, s19
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s18, s20
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s19, s21
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s20, s22
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s21, s23
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s22, s24
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s23, s25
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s24, s26
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s25, s27
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s26, s28
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s27, s29
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s28, s30
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s29, s31
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s30, s32
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s31, s33
|
|
|
|
; MOVREL-NEXT: s_movreld_b32 s0, s34
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx
|
|
|
|
ret <32 x i32> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v32
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v32i32_v_v_s:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s2
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v0, v32
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%insert = insertelement <32 x float> %vec, float %val, i32 %idx
|
|
|
|
ret <32 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_nop 0
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s1, s10
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_movreld_b32 s1, s10
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%idx.add = add i32 %idx, 1
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s11
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_nop 0
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_movreld_b32 s7, s10
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, s7
|
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_movreld_b32 s7, s10
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%idx.add = add i32 %idx, 7
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB29_1: ; =>This Inner Loop Header: Depth=1
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v8
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB29_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB29_1: ; =>This Inner Loop Header: Depth=1
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v5
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v0
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v11, v8
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB29_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%idx.add = add i32 %idx, 1
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB30_1: ; =>This Inner Loop Header: Depth=1
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v16, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v15, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v14, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v13, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v12, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v11, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v10, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v17, v8
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB30_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v7, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: ; return to shader part epilog
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB30_1: ; =>This Inner Loop Header: Depth=1
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v17, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v16, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, v4
|
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v9
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s1
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, v1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, v0
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v17, v8
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB30_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, v10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, v13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, v16
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: ; return to shader part epilog
|
|
|
|
entry:
|
|
|
|
%idx.add = add i32 %idx, 7
|
|
|
|
%insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
|
|
|
|
ret <8 x float> %insert
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s5, s7
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s6, s8
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s7, s9
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s8, s10
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s9, s11
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s10, s12
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s11, s13
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s12, s14
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s13, s15
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s14, s16
|
|
|
|
; GPRIDX-NEXT: s_mov_b32 s15, s17
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_mov_b32 m0, s20
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_nop 0
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_movreld_b64 s[2:3], s[18:19]
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s3
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s7
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s11
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; GPRIDX-NEXT: s_nop 0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v0, s12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v1, s13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v2, s14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v3, s15
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 m0, s20
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s5, s7
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s6, s8
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s7, s9
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s8, s10
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s9, s11
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s10, s12
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s11, s13
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s12, s14
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s13, s15
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s14, s16
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s15, s17
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: s_movreld_b64 s[2:3], s[18:19]
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v4, s4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v1, s1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v2, s2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v3, s3
|
2020-01-14 06:54:17 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v8, s8
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v5, s5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v6, s6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v7, s7
|
2020-01-14 06:54:17 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v12, s12
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v9, s9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v10, s10
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v11, s11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v13, s13
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v14, s14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v15, s15
|
2020-01-14 06:54:17 +08:00
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off
|
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%idx.add = add i32 %idx, 1
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) {
|
|
|
|
; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
|
|
|
|
; GPRIDX: ; %bb.0: ; %entry
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
|
|
|
; GPRIDX-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18
|
2020-01-25 03:01:09 +08:00
|
|
|
; GPRIDX-NEXT: s_add_i32 s3, s2, 1
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: s_lshl_b32 s3, s3, 1
|
|
|
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v34, v15
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v33, v14
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v32, v13
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v31, v12
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v30, v11
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v29, v10
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v28, v9
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v27, v8
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v26, v7
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v25, v6
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v24, v5
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v23, v4
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v22, v3
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v21, v2
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v1
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v0
|
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v19, v16
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: v_mov_b32_e32 v20, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_set_gpr_idx_off
|
|
|
|
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
|
|
|
|
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
|
|
|
|
; GPRIDX-NEXT: s_cbranch_execnz BB32_1
|
|
|
|
; GPRIDX-NEXT: ; %bb.2:
|
|
|
|
; GPRIDX-NEXT: s_mov_b64 exec, s[0:1]
|
2020-01-04 12:13:15 +08:00
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
|
|
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
2020-01-04 01:17:56 +08:00
|
|
|
; GPRIDX-NEXT: s_endpgm
|
|
|
|
;
|
|
|
|
; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
|
|
|
|
; MOVREL: ; %bb.0: ; %entry
|
|
|
|
; MOVREL-NEXT: s_mov_b32 s0, exec_lo
|
|
|
|
; MOVREL-NEXT: ; implicit-def: $vcc_hi
|
|
|
|
; MOVREL-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_readfirstlane_b32 s1, v18
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v34, v15
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v0
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v33, v14
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v13
|
2020-01-25 03:01:09 +08:00
|
|
|
; MOVREL-NEXT: s_add_i32 s2, s1, 1
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v12
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v11
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v29, v10
|
|
|
|
; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v28, v9
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v27, v8
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v26, v7
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v25, v6
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v24, v5
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v23, v4
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v22, v3
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v21, v2
|
|
|
|
; MOVREL-NEXT: v_mov_b32_e32 v20, v1
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v19, v16
|
|
|
|
; MOVREL-NEXT: v_movreld_b32_e32 v20, v17
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo
|
|
|
|
; MOVREL-NEXT: s_cbranch_execnz BB32_1
|
|
|
|
; MOVREL-NEXT: ; %bb.2:
|
|
|
|
; MOVREL-NEXT: s_mov_b32 exec_lo, s0
|
2020-01-04 12:13:15 +08:00
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
|
|
; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[31:34], off
|
2020-01-04 01:17:56 +08:00
|
|
|
; MOVREL-NEXT: s_endpgm
|
|
|
|
entry:
|
|
|
|
%idx.add = add i32 %idx, 1
|
|
|
|
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
|
|
|
|
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
|
|
|
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
|
|
|
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
|
|
|
store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
|
|
|
|
store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
|
|
|
|
ret void
|
|
|
|
}
|