forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix tablegen selection for scalar bin ops
Fixes selection for scalar G_SMULH/G_UMULH. Also switches to using tablegen selected add/sub, which switch to the signed version of the opcode. This matches the current DAG behavior. We can't drop the manual selection for add/sub yet, because it's still both for VALU add/sub and for G_PTR_ADD.
This commit is contained in:
parent
b136238bb4
commit
62129878a6
|
@ -412,8 +412,14 @@ class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
|
||||||
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
|
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
|
||||||
(ops node:$src0, node:$src1),
|
(ops node:$src0, node:$src1),
|
||||||
(Op $src0, $src1),
|
(Op $src0, $src1),
|
||||||
[{ return !N->isDivergent(); }]
|
[{ return !N->isDivergent(); }]> {
|
||||||
>;
|
// This check is unnecessary as it's captured by the result register
|
||||||
|
// bank constraint.
|
||||||
|
//
|
||||||
|
// FIXME: Should add a way for the emitter to recognize this is a
|
||||||
|
// trivially true predicate to eliminate the check.
|
||||||
|
let GISelPredicateCode = [{return true;}];
|
||||||
|
}
|
||||||
|
|
||||||
let Defs = [SCC] in { // Carry out goes to SCC
|
let Defs = [SCC] in { // Carry out goes to SCC
|
||||||
let isCommutable = 1 in {
|
let isCommutable = 1 in {
|
||||||
|
|
|
@ -979,7 +979,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec,
|
||||||
; GPRIDX: ; %bb.0: ; %entry
|
; GPRIDX: ; %bb.0: ; %entry
|
||||||
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
; GPRIDX-NEXT: s_mov_b32 s0, s2
|
||||||
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
; GPRIDX-NEXT: s_mov_b32 s1, s3
|
||||||
; GPRIDX-NEXT: s_add_u32 m0, s18, -1
|
; GPRIDX-NEXT: s_add_i32 m0, s18, -1
|
||||||
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
; GPRIDX-NEXT: s_mov_b32 s2, s4
|
||||||
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
; GPRIDX-NEXT: s_mov_b32 s3, s5
|
||||||
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
; GPRIDX-NEXT: s_mov_b32 s4, s6
|
||||||
|
@ -1001,7 +1001,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec,
|
||||||
; MOVREL: ; %bb.0: ; %entry
|
; MOVREL: ; %bb.0: ; %entry
|
||||||
; MOVREL-NEXT: s_mov_b32 s0, s2
|
; MOVREL-NEXT: s_mov_b32 s0, s2
|
||||||
; MOVREL-NEXT: s_mov_b32 s1, s3
|
; MOVREL-NEXT: s_mov_b32 s1, s3
|
||||||
; MOVREL-NEXT: s_add_u32 m0, s18, -1
|
; MOVREL-NEXT: s_add_i32 m0, s18, -1
|
||||||
; MOVREL-NEXT: s_mov_b32 s2, s4
|
; MOVREL-NEXT: s_mov_b32 s2, s4
|
||||||
; MOVREL-NEXT: s_mov_b32 s3, s5
|
; MOVREL-NEXT: s_mov_b32 s3, s5
|
||||||
; MOVREL-NEXT: s_mov_b32 s4, s6
|
; MOVREL-NEXT: s_mov_b32 s4, s6
|
||||||
|
@ -1031,7 +1031,7 @@ define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
|
||||||
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
|
||||||
; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
||||||
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
|
||||||
; GPRIDX-NEXT: s_add_u32 s7, s6, 3
|
; GPRIDX-NEXT: s_add_i32 s7, s6, 3
|
||||||
; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1
|
; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1
|
||||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||||
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
|
||||||
|
@ -1056,7 +1056,7 @@ define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
|
||||||
; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
|
||||||
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
|
||||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
|
||||||
; MOVREL-NEXT: s_add_u32 s6, s6, 3
|
; MOVREL-NEXT: s_add_i32 s6, s6, 3
|
||||||
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
|
||||||
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
|
||||||
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
|
||||||
|
|
|
@ -2093,7 +2093,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
|
||||||
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
; GPRIDX-NEXT: s_mov_b64 s[0:1], exec
|
||||||
; GPRIDX-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1
|
; GPRIDX-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1
|
||||||
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18
|
; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18
|
||||||
; GPRIDX-NEXT: s_add_u32 s3, s2, 1
|
; GPRIDX-NEXT: s_add_i32 s3, s2, 1
|
||||||
; GPRIDX-NEXT: s_lshl_b32 s3, s3, 1
|
; GPRIDX-NEXT: s_lshl_b32 s3, s3, 1
|
||||||
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18
|
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18
|
||||||
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST)
|
||||||
|
@ -2139,7 +2139,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
|
||||||
; MOVREL-NEXT: v_mov_b32_e32 v19, v0
|
; MOVREL-NEXT: v_mov_b32_e32 v19, v0
|
||||||
; MOVREL-NEXT: v_mov_b32_e32 v33, v14
|
; MOVREL-NEXT: v_mov_b32_e32 v33, v14
|
||||||
; MOVREL-NEXT: v_mov_b32_e32 v32, v13
|
; MOVREL-NEXT: v_mov_b32_e32 v32, v13
|
||||||
; MOVREL-NEXT: s_add_u32 s2, s1, 1
|
; MOVREL-NEXT: s_add_i32 s2, s1, 1
|
||||||
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18
|
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18
|
||||||
; MOVREL-NEXT: v_mov_b32_e32 v31, v12
|
; MOVREL-NEXT: v_mov_b32_e32 v31, v12
|
||||||
; MOVREL-NEXT: v_mov_b32_e32 v30, v11
|
; MOVREL-NEXT: v_mov_b32_e32 v30, v11
|
||||||
|
|
|
@ -17,20 +17,20 @@ body: |
|
||||||
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
|
; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
|
||||||
; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec
|
; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec
|
||||||
; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec
|
; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec
|
||||||
; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit %7, implicit %8, implicit %9
|
; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9
|
||||||
; GFX9-LABEL: name: add_s32
|
; GFX9-LABEL: name: add_s32
|
||||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
|
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
|
||||||
; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
|
; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
|
||||||
; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
||||||
; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]]
|
; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr1
|
%1:sgpr(s32) = COPY $sgpr1
|
||||||
%2:vgpr(s32) = COPY $vgpr0
|
%2:vgpr(s32) = COPY $vgpr0
|
||||||
|
@ -123,14 +123,14 @@ body: |
|
||||||
; GFX6: liveins: $sgpr0
|
; GFX6: liveins: $sgpr0
|
||||||
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
||||||
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
|
; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]]
|
; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]]
|
||||||
; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
|
; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
|
||||||
; GFX9: liveins: $sgpr0
|
; GFX9: liveins: $sgpr0
|
||||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
||||||
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
|
; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]]
|
; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:sgpr(s32) = G_CONSTANT i32 16
|
%1:sgpr(s32) = G_CONSTANT i32 16
|
||||||
%2:sgpr(s32) = G_ADD %0, %1
|
%2:sgpr(s32) = G_ADD %0, %1
|
||||||
|
|
|
@ -123,8 +123,8 @@ body: |
|
||||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; CHECK: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc
|
; CHECK: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc
|
||||||
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc
|
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc
|
||||||
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]]
|
; CHECK: S_ENDPGM 0, implicit [[S_ADD_I32_]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr1
|
%1:sgpr(s32) = COPY $sgpr1
|
||||||
%2:sgpr(s32) = G_CTPOP %0
|
%2:sgpr(s32) = G_CTPOP %0
|
||||||
|
|
|
@ -316,16 +316,16 @@ body: |
|
||||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
|
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
|
||||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
; GPRIDX: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||||
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
|
@ -378,16 +378,16 @@ body: |
|
||||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8
|
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8
|
||||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
; GPRIDX: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
|
||||||
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
|
@ -469,16 +469,16 @@ body: |
|
||||||
; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
||||||
; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
|
; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
|
||||||
; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
; GPRIDX: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
|
||||||
%0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
%0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||||
|
@ -700,16 +700,16 @@ body: |
|
||||||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
|
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
|
||||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
|
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0
|
||||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||||
; GPRIDX: S_SET_GPR_IDX_OFF
|
; GPRIDX: S_SET_GPR_IDX_OFF
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||||
|
@ -764,16 +764,16 @@ body: |
|
||||||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8
|
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8
|
||||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
|
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0
|
||||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||||
; GPRIDX: S_SET_GPR_IDX_OFF
|
; GPRIDX: S_SET_GPR_IDX_OFF
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||||
|
|
|
@ -534,8 +534,8 @@ body: |
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
||||||
; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec
|
; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]]
|
; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]]
|
||||||
; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8
|
; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8
|
||||||
|
@ -543,8 +543,8 @@ body: |
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
||||||
; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 8, implicit-def $m0, implicit $m0
|
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 8, implicit-def $m0, implicit $m0
|
||||||
; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec
|
; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec
|
||||||
; GPRIDX: S_SET_GPR_IDX_OFF
|
; GPRIDX: S_SET_GPR_IDX_OFF
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]]
|
||||||
|
@ -603,8 +603,8 @@ body: |
|
||||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
||||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; MOVREL: $m0 = COPY [[S_ADD_U32_]]
|
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0
|
; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0
|
||||||
; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]]
|
; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]]
|
||||||
; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8
|
; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8
|
||||||
|
@ -612,8 +612,8 @@ body: |
|
||||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||||
; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9
|
||||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||||
; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
|
; GPRIDX: $m0 = COPY [[S_ADD_I32_]]
|
||||||
; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0
|
; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0
|
||||||
; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]]
|
; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]]
|
||||||
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||||
|
|
|
@ -18,26 +18,26 @@ body: |
|
||||||
; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||||
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX8: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc
|
; GFX8: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc
|
||||||
; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_1]]
|
; GFX8: S_ENDPGM 0, implicit [[S_ADD_I32_1]]
|
||||||
; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr
|
; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr
|
||||||
; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2
|
; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2
|
||||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||||
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX9: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc
|
; GFX9: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc
|
||||||
; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_1]]
|
; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_1]]
|
||||||
; GFX10-LABEL: name: add_s32_sgpr_sgpr_sgpr
|
; GFX10-LABEL: name: add_s32_sgpr_sgpr_sgpr
|
||||||
; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2
|
; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2
|
||||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||||
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||||
; GFX10: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX10: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc
|
; GFX10: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc
|
||||||
; GFX10: S_ENDPGM 0, implicit [[S_ADD_U32_1]]
|
; GFX10: S_ENDPGM 0, implicit [[S_ADD_I32_1]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr1
|
%1:sgpr(s32) = COPY $sgpr1
|
||||||
%2:sgpr(s32) = COPY $sgpr2
|
%2:sgpr(s32) = COPY $sgpr2
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=SI %s
|
||||||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
|
||||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
|
||||||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
# RUN: FileCheck -check-prefix=ERR %s < %t
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||||
|
|
||||||
# ERR-NOT: remark:
|
# ERR-NOT: remark:
|
||||||
# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(s32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss)
|
# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(s32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss)
|
||||||
|
@ -16,11 +15,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0, $sgpr1
|
liveins: $sgpr0, $sgpr1
|
||||||
; GCN-LABEL: name: smulh_s32_ss
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
; SI-LABEL: name: smulh_s32_ss
|
||||||
; GCN: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
; SI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
; GCN: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
; SI: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
; GCN: S_ENDPGM 0, implicit [[SMULH]](s32)
|
; SI: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||||
|
; SI: S_ENDPGM 0, implicit [[SMULH]](s32)
|
||||||
|
; GFX9-LABEL: name: smulh_s32_ss
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
|
; GFX9: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[COPY]], [[COPY1]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_MUL_HI_I32_]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr1
|
%1:sgpr(s32) = COPY $sgpr1
|
||||||
%2:sgpr(s32) = G_SMULH %0, %1
|
%2:sgpr(s32) = G_SMULH %0, %1
|
||||||
|
@ -35,11 +40,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0, $vgpr0
|
liveins: $sgpr0, $vgpr0
|
||||||
; GCN-LABEL: name: smulh_s32_sv
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; SI-LABEL: name: smulh_s32_sv
|
||||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; SI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GCN: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
; SI: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; SI: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
||||||
|
; GFX9-LABEL: name: smulh_s32_sv
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GFX9: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:vgpr(s32) = COPY $vgpr0
|
%1:vgpr(s32) = COPY $vgpr0
|
||||||
%2:vgpr(s32) = G_SMULH %0, %1
|
%2:vgpr(s32) = G_SMULH %0, %1
|
||||||
|
@ -54,11 +65,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0, $vgpr0
|
liveins: $sgpr0, $vgpr0
|
||||||
; GCN-LABEL: name: smulh_s32_vs
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; SI-LABEL: name: smulh_s32_vs
|
||||||
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GCN: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
; SI: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
; SI: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; SI: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
||||||
|
; GFX9-LABEL: name: smulh_s32_vs
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
||||||
%0:vgpr(s32) = COPY $vgpr0
|
%0:vgpr(s32) = COPY $vgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr0
|
%1:sgpr(s32) = COPY $sgpr0
|
||||||
%2:vgpr(s32) = G_SMULH %0, %1
|
%2:vgpr(s32) = G_SMULH %0, %1
|
||||||
|
@ -73,11 +90,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $vgpr0, $vgpr1
|
liveins: $vgpr0, $vgpr1
|
||||||
; GCN-LABEL: name: smulh_s32_vv
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; SI-LABEL: name: smulh_s32_vv
|
||||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GCN: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
; SI: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; SI: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
||||||
|
; GFX9-LABEL: name: smulh_s32_vv
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||||
|
; GFX9: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]]
|
||||||
%0:vgpr(s32) = COPY $vgpr0
|
%0:vgpr(s32) = COPY $vgpr0
|
||||||
%1:vgpr(s32) = COPY $vgpr1
|
%1:vgpr(s32) = COPY $vgpr1
|
||||||
%2:vgpr(s32) = G_SMULH %0, %1
|
%2:vgpr(s32) = G_SMULH %0, %1
|
||||||
|
|
|
@ -20,9 +20,9 @@ body: |
|
||||||
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GFX6: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec
|
; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
|
||||||
; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_U32_]], %7, 0, implicit $exec
|
; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec
|
||||||
; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec
|
; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec
|
||||||
; GFX6: S_ENDPGM 0, implicit %9
|
; GFX6: S_ENDPGM 0, implicit %9
|
||||||
; GFX9-LABEL: name: sub_s32
|
; GFX9-LABEL: name: sub_s32
|
||||||
|
@ -30,9 +30,9 @@ body: |
|
||||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GFX9: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec
|
; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
|
||||||
; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec
|
; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec
|
||||||
; GFX9: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
; GFX9: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
||||||
; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]]
|
; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]]
|
||||||
; GFX10-LABEL: name: sub_s32
|
; GFX10-LABEL: name: sub_s32
|
||||||
|
@ -41,9 +41,9 @@ body: |
|
||||||
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GFX10: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc
|
; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||||
; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec
|
; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
|
||||||
; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec
|
; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec
|
||||||
; GFX10: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
; GFX10: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
||||||
; GFX10: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]]
|
; GFX10: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=SI %s
|
||||||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
|
||||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
|
||||||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
# RUN: FileCheck -check-prefix=ERR %s < %t
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||||
|
|
||||||
# ERR-NOT: remark:
|
# ERR-NOT: remark:
|
||||||
# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(s32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss)
|
# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(s32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss)
|
||||||
|
@ -16,11 +15,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0, $sgpr1
|
liveins: $sgpr0, $sgpr1
|
||||||
; GCN-LABEL: name: umulh_s32_ss
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
; SI-LABEL: name: umulh_s32_ss
|
||||||
; GCN: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
; SI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
; GCN: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
; SI: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
; GCN: S_ENDPGM 0, implicit [[UMULH]](s32)
|
; SI: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||||
|
; SI: S_ENDPGM 0, implicit [[UMULH]](s32)
|
||||||
|
; GFX9-LABEL: name: umulh_s32_ss
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
|
; GFX9: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY]], [[COPY1]]
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[S_MUL_HI_U32_]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr1
|
%1:sgpr(s32) = COPY $sgpr1
|
||||||
%2:sgpr(s32) = G_UMULH %0, %1
|
%2:sgpr(s32) = G_UMULH %0, %1
|
||||||
|
@ -35,11 +40,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0, $vgpr0
|
liveins: $sgpr0, $vgpr0
|
||||||
; GCN-LABEL: name: umulh_s32_sv
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; SI-LABEL: name: umulh_s32_sv
|
||||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; SI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
; SI: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; SI: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
||||||
|
; GFX9-LABEL: name: umulh_s32_sv
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GFX9: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
||||||
%0:sgpr(s32) = COPY $sgpr0
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
%1:vgpr(s32) = COPY $vgpr0
|
%1:vgpr(s32) = COPY $vgpr0
|
||||||
%2:vgpr(s32) = G_UMULH %0, %1
|
%2:vgpr(s32) = G_UMULH %0, %1
|
||||||
|
@ -54,11 +65,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0, $vgpr0
|
liveins: $sgpr0, $vgpr0
|
||||||
; GCN-LABEL: name: umulh_s32_vs
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; SI-LABEL: name: umulh_s32_vs
|
||||||
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
; SI: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
; SI: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; SI: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
||||||
|
; GFX9-LABEL: name: umulh_s32_vs
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GFX9: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
||||||
%0:vgpr(s32) = COPY $vgpr0
|
%0:vgpr(s32) = COPY $vgpr0
|
||||||
%1:sgpr(s32) = COPY $sgpr0
|
%1:sgpr(s32) = COPY $sgpr0
|
||||||
%2:vgpr(s32) = G_UMULH %0, %1
|
%2:vgpr(s32) = G_UMULH %0, %1
|
||||||
|
@ -73,11 +90,17 @@ regBankSelected: true
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $vgpr0, $vgpr1
|
liveins: $vgpr0, $vgpr1
|
||||||
; GCN-LABEL: name: umulh_s32_vv
|
|
||||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
; SI-LABEL: name: umulh_s32_vv
|
||||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
; SI: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; SI: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
||||||
|
; GFX9-LABEL: name: umulh_s32_vv
|
||||||
|
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||||
|
; GFX9: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec
|
||||||
|
; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]]
|
||||||
%0:vgpr(s32) = COPY $vgpr0
|
%0:vgpr(s32) = COPY $vgpr0
|
||||||
%1:vgpr(s32) = COPY $vgpr1
|
%1:vgpr(s32) = COPY $vgpr1
|
||||||
%2:vgpr(s32) = G_UMULH %0, %1
|
%2:vgpr(s32) = G_UMULH %0, %1
|
||||||
|
|
|
@ -673,8 +673,8 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
||||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
||||||
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_U32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
||||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||||
%soffset = add i32 %soffset.base, 16
|
%soffset = add i32 %soffset.base, 16
|
||||||
|
@ -694,8 +694,8 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
||||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||||
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_U32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
||||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||||
%soffset = add i32 %soffset.base, 4095
|
%soffset = add i32 %soffset.base, 4095
|
||||||
|
@ -715,8 +715,8 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
||||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||||
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_U32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
||||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||||
%soffset = add i32 %soffset.base, 4096
|
%soffset = add i32 %soffset.base, 4096
|
||||||
|
@ -738,7 +738,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
||||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
|
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
|
||||||
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||||
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||||
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||||
|
@ -754,7 +754,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
||||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_U32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
|
||||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||||
|
|
|
@ -258,9 +258,9 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg
|
||||||
; GFX6-NEXT: s_mul_i32 s3, s4, 4
|
; GFX6-NEXT: s_mul_i32 s3, s4, 4
|
||||||
; GFX6-NEXT: s_mul_i32 s6, s4, 0
|
; GFX6-NEXT: s_mul_i32 s6, s4, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s4, s5, 4
|
; GFX6-NEXT: s_mul_i32 s4, s5, 4
|
||||||
; GFX6-NEXT: s_add_u32 s4, s6, s4
|
; GFX6-NEXT: s_add_i32 s6, s6, s4
|
||||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||||
; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v0
|
; GFX6-NEXT: v_add_i32_e32 v1, vcc, s6, v0
|
||||||
; GFX6-NEXT: v_mov_b32_e32 v0, s3
|
; GFX6-NEXT: v_mov_b32_e32 v0, s3
|
||||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||||
|
@ -276,9 +276,9 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg
|
||||||
; GFX7-NEXT: s_mul_i32 s3, s4, 4
|
; GFX7-NEXT: s_mul_i32 s3, s4, 4
|
||||||
; GFX7-NEXT: s_mul_i32 s6, s4, 0
|
; GFX7-NEXT: s_mul_i32 s6, s4, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s4, s5, 4
|
; GFX7-NEXT: s_mul_i32 s4, s5, 4
|
||||||
; GFX7-NEXT: s_add_u32 s4, s6, s4
|
; GFX7-NEXT: s_add_i32 s6, s6, s4
|
||||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||||
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v0
|
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s6, v0
|
||||||
; GFX7-NEXT: v_mov_b32_e32 v0, s3
|
; GFX7-NEXT: v_mov_b32_e32 v0, s3
|
||||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||||
|
@ -298,8 +298,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr,
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s0, 4
|
; GFX6-NEXT: s_mul_i32 s2, s0, 4
|
||||||
; GFX6-NEXT: s_mul_i32 s3, s0, 0
|
; GFX6-NEXT: s_mul_i32 s3, s0, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s0, s1, 4
|
; GFX6-NEXT: s_mul_i32 s0, s1, 4
|
||||||
; GFX6-NEXT: s_add_u32 s0, s3, s0
|
; GFX6-NEXT: s_add_i32 s3, s3, s0
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s0, v2
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s3, v2
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||||
|
@ -316,8 +316,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr,
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s0, 4
|
; GFX7-NEXT: s_mul_i32 s2, s0, 4
|
||||||
; GFX7-NEXT: s_mul_i32 s3, s0, 0
|
; GFX7-NEXT: s_mul_i32 s3, s0, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s0, s1, 4
|
; GFX7-NEXT: s_mul_i32 s0, s1, 4
|
||||||
; GFX7-NEXT: s_add_u32 s0, s3, s0
|
; GFX7-NEXT: s_add_i32 s3, s3, s0
|
||||||
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2
|
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s3, v2
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||||
|
@ -339,8 +339,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s0, 4
|
; GFX6-NEXT: s_mul_i32 s2, s0, 4
|
||||||
; GFX6-NEXT: s_mul_i32 s3, s0, 0
|
; GFX6-NEXT: s_mul_i32 s3, s0, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s0, s1, 4
|
; GFX6-NEXT: s_mul_i32 s0, s1, 4
|
||||||
; GFX6-NEXT: s_add_u32 s0, s3, s0
|
; GFX6-NEXT: s_add_i32 s3, s3, s0
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s0, v2
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s3, v2
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||||
|
@ -357,8 +357,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s0, 4
|
; GFX7-NEXT: s_mul_i32 s2, s0, 4
|
||||||
; GFX7-NEXT: s_mul_i32 s3, s0, 0
|
; GFX7-NEXT: s_mul_i32 s3, s0, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s0, s1, 4
|
; GFX7-NEXT: s_mul_i32 s0, s1, 4
|
||||||
; GFX7-NEXT: s_add_u32 s0, s3, s0
|
; GFX7-NEXT: s_add_i32 s3, s3, s0
|
||||||
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2
|
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s3, v2
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||||
|
@ -386,9 +386,9 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(
|
||||||
; GFX6-NEXT: s_mul_i32 s0, s2, 4
|
; GFX6-NEXT: s_mul_i32 s0, s2, 4
|
||||||
; GFX6-NEXT: s_mul_i32 s4, s2, 0
|
; GFX6-NEXT: s_mul_i32 s4, s2, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s3, 4
|
; GFX6-NEXT: s_mul_i32 s2, s3, 4
|
||||||
; GFX6-NEXT: s_add_u32 s2, s4, s2
|
; GFX6-NEXT: s_add_i32 s4, s4, s2
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s4, v2
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX6-NEXT: s_mov_b32 s2, s1
|
; GFX6-NEXT: s_mov_b32 s2, s1
|
||||||
|
@ -410,9 +410,9 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(
|
||||||
; GFX7-NEXT: s_mul_i32 s0, s2, 4
|
; GFX7-NEXT: s_mul_i32 s0, s2, 4
|
||||||
; GFX7-NEXT: s_mul_i32 s4, s2, 0
|
; GFX7-NEXT: s_mul_i32 s4, s2, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s3, 4
|
; GFX7-NEXT: s_mul_i32 s2, s3, 4
|
||||||
; GFX7-NEXT: s_add_u32 s2, s4, s2
|
; GFX7-NEXT: s_add_i32 s4, s4, s2
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||||
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s2, v2
|
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s4, v2
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX7-NEXT: s_mov_b32 s2, s1
|
; GFX7-NEXT: s_mov_b32 s2, s1
|
||||||
|
@ -800,9 +800,9 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inre
|
||||||
; GFX6-NEXT: v_mul_hi_u32 v0, 4, s2
|
; GFX6-NEXT: v_mul_hi_u32 v0, 4, s2
|
||||||
; GFX6-NEXT: s_mul_i32 s4, s2, 0
|
; GFX6-NEXT: s_mul_i32 s4, s2, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s3, s3, 4
|
; GFX6-NEXT: s_mul_i32 s3, s3, 4
|
||||||
; GFX6-NEXT: s_add_u32 s3, s4, s3
|
; GFX6-NEXT: s_add_i32 s4, s4, s3
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s2, 4
|
; GFX6-NEXT: s_mul_i32 s2, s2, 4
|
||||||
; GFX6-NEXT: v_add_i32_e32 v1, vcc, s3, v0
|
; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v0
|
||||||
; GFX6-NEXT: v_mov_b32_e32 v0, s2
|
; GFX6-NEXT: v_mov_b32_e32 v0, s2
|
||||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||||
|
@ -818,9 +818,9 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inre
|
||||||
; GFX7-NEXT: v_mul_hi_u32 v0, 4, s2
|
; GFX7-NEXT: v_mul_hi_u32 v0, 4, s2
|
||||||
; GFX7-NEXT: s_mul_i32 s4, s2, 0
|
; GFX7-NEXT: s_mul_i32 s4, s2, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s3, s3, 4
|
; GFX7-NEXT: s_mul_i32 s3, s3, 4
|
||||||
; GFX7-NEXT: s_add_u32 s3, s4, s3
|
; GFX7-NEXT: s_add_i32 s4, s4, s3
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s2, 4
|
; GFX7-NEXT: s_mul_i32 s2, s2, 4
|
||||||
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s3, v0
|
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v0
|
||||||
; GFX7-NEXT: v_mov_b32_e32 v0, s2
|
; GFX7-NEXT: v_mov_b32_e32 v0, s2
|
||||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||||
|
@ -839,9 +839,9 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr
|
||||||
; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
|
; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s0, 0
|
; GFX6-NEXT: s_mul_i32 s2, s0, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s1, s1, 4
|
; GFX6-NEXT: s_mul_i32 s1, s1, 4
|
||||||
; GFX6-NEXT: s_add_u32 s1, s2, s1
|
; GFX6-NEXT: s_add_i32 s2, s2, s1
|
||||||
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
|
||||||
; GFX6-NEXT: s_mul_i32 s0, s0, 4
|
; GFX6-NEXT: s_mul_i32 s0, s0, 4
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s1, v2
|
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||||
|
@ -857,9 +857,9 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr
|
||||||
; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
|
; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s0, 0
|
; GFX7-NEXT: s_mul_i32 s2, s0, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s1, s1, 4
|
; GFX7-NEXT: s_mul_i32 s1, s1, 4
|
||||||
; GFX7-NEXT: s_add_u32 s1, s2, s1
|
; GFX7-NEXT: s_add_i32 s2, s2, s1
|
||||||
|
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s2, v2
|
||||||
; GFX7-NEXT: s_mul_i32 s0, s0, 4
|
; GFX7-NEXT: s_mul_i32 s0, s0, 4
|
||||||
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s1, v2
|
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||||
|
@ -880,9 +880,9 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac
|
||||||
; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
|
; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s0, 0
|
; GFX6-NEXT: s_mul_i32 s2, s0, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s1, s1, 4
|
; GFX6-NEXT: s_mul_i32 s1, s1, 4
|
||||||
; GFX6-NEXT: s_add_u32 s1, s2, s1
|
; GFX6-NEXT: s_add_i32 s2, s2, s1
|
||||||
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
|
||||||
; GFX6-NEXT: s_mul_i32 s0, s0, 4
|
; GFX6-NEXT: s_mul_i32 s0, s0, 4
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s1, v2
|
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||||
|
@ -898,9 +898,9 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac
|
||||||
; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
|
; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s0, 0
|
; GFX7-NEXT: s_mul_i32 s2, s0, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s1, s1, 4
|
; GFX7-NEXT: s_mul_i32 s1, s1, 4
|
||||||
; GFX7-NEXT: s_add_u32 s1, s2, s1
|
; GFX7-NEXT: s_add_i32 s2, s2, s1
|
||||||
|
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s2, v2
|
||||||
; GFX7-NEXT: s_mul_i32 s0, s0, 4
|
; GFX7-NEXT: s_mul_i32 s0, s0, 4
|
||||||
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s1, v2
|
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||||
|
@ -925,12 +925,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspac
|
||||||
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
||||||
; GFX6-NEXT: s_mul_i32 s2, s0, 0
|
; GFX6-NEXT: s_mul_i32 s2, s0, 0
|
||||||
; GFX6-NEXT: s_mul_i32 s1, s1, 4
|
; GFX6-NEXT: s_mul_i32 s1, s1, 4
|
||||||
; GFX6-NEXT: s_add_u32 s1, s2, s1
|
|
||||||
; GFX6-NEXT: v_mov_b32_e32 v3, s5
|
; GFX6-NEXT: v_mov_b32_e32 v3, s5
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||||
|
; GFX6-NEXT: s_add_i32 s2, s2, s1
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||||
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v4
|
||||||
; GFX6-NEXT: s_mul_i32 s0, s0, 4
|
; GFX6-NEXT: s_mul_i32 s0, s0, 4
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, s1, v4
|
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||||
|
@ -949,12 +949,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspac
|
||||||
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
||||||
; GFX7-NEXT: s_mul_i32 s2, s0, 0
|
; GFX7-NEXT: s_mul_i32 s2, s0, 0
|
||||||
; GFX7-NEXT: s_mul_i32 s1, s1, 4
|
; GFX7-NEXT: s_mul_i32 s1, s1, 4
|
||||||
; GFX7-NEXT: s_add_u32 s1, s2, s1
|
|
||||||
; GFX7-NEXT: v_mov_b32_e32 v3, s5
|
; GFX7-NEXT: v_mov_b32_e32 v3, s5
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||||
|
; GFX7-NEXT: s_add_i32 s2, s2, s1
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||||
|
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s2, v4
|
||||||
; GFX7-NEXT: s_mul_i32 s0, s0, 4
|
; GFX7-NEXT: s_mul_i32 s0, s0, 4
|
||||||
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s1, v4
|
|
||||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
|
||||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||||
|
|
Loading…
Reference in New Issue