forked from OSchip/llvm-project
[GlobalISel] Fix invalid combine of unmerge(merge) with intermediate cast
Summary: The combine for unmerge(cast(merge)) is only valid for vectors, but was missing a corresponding check. Add a check that the operands are vectors to avoid an invalid combine. Without this check, the combiner would emit incorrect code for scalars and pointers because the artifact cast (trunc/ext) only affects bits at the end of the type, while this combine assumes that the casted bits appear between meaningful bits. This also uncovered a segmentation fault in the AMDGPU InstructionSelector. The tests triggering this bug have been moved to their own file and a check for the segmentation fault has been added. Reviewers: arsenm, dsanders, aemerson, paquette, aditya_nandakumar Reviewed By: arsenm Subscribers: tpr, jvesely, wdng, nhaehnle, rovka, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78191
This commit is contained in:
parent
7c13550967
commit
bddac41b9f
|
@ -364,7 +364,7 @@ public:
|
|||
// That is not done yet.
|
||||
if (ConvertOp == 0)
|
||||
return true;
|
||||
return !DestTy.isVector();
|
||||
return !DestTy.isVector() && OpTy.isVector();
|
||||
case TargetOpcode::G_CONCAT_VECTORS: {
|
||||
if (ConvertOp == 0)
|
||||
return true;
|
||||
|
|
|
@ -910,10 +910,10 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
|
||||
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32)
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV]](s192)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
|
||||
; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(s64) = COPY $vgpr4_vgpr5
|
||||
|
@ -933,16 +933,20 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
|
||||
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
|
||||
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV]](s192)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
|
||||
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
|
||||
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
|
||||
; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16), implicit [[TRUNC6]](s16)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(s64) = COPY $vgpr4_vgpr5
|
||||
|
@ -968,11 +972,15 @@ body: |
|
|||
; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
|
||||
; CHECK: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
|
||||
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
|
||||
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
|
||||
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
|
||||
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s192), implicit [[MV1]](s96), implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
|
@ -986,3 +994,46 @@ body: |
|
|||
S_ENDPGM 0, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
|
||||
; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128)
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[UV]](s64)
|
||||
; CHECK: $vgpr2_vgpr3 = COPY [[UV1]](s64)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s64) = G_MERGE_VALUES %0, %1
|
||||
%3:_(s128) = G_ANYEXT %2
|
||||
%4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3
|
||||
$vgpr0_vgpr1 = COPY %4
|
||||
$vgpr2_vgpr3 = COPY %5
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: $vgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[UV1]](s32)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(s128) = G_MERGE_VALUES %0, %1
|
||||
%3:_(s64) = G_TRUNC %2
|
||||
%4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3
|
||||
$vgpr0 = COPY %4
|
||||
$vgpr1 = COPY %5
|
||||
|
||||
...
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
; RUN: not --crash llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s
|
||||
; RUN: not --crash llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s
|
||||
|
||||
define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
|
||||
%trunc = trunc i32 %arg0 to i24
|
||||
%val = bitcast i24 %trunc to <3 x i8>
|
||||
%cvt = uitofp <3 x i8> %val to <3 x float>
|
||||
ret <3 x float> %cvt
|
||||
}
|
|
@ -191,40 +191,6 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind {
|
|||
ret <2 x float> %cvt
|
||||
}
|
||||
|
||||
define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
|
||||
; SI-LABEL: v_uitofp_v3i8_to_v3f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; SI-NEXT: s_movk_i32 s4, 0xff
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||
; SI-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; SI-NEXT: v_and_b32_e32 v1, s4, v1
|
||||
; SI-NEXT: v_and_b32_e32 v2, s4, v2
|
||||
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
|
||||
; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
|
||||
; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
|
||||
; SI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; VI-LABEL: v_uitofp_v3i8_to_v3f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: s_movk_i32 s4, 0xff
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s4
|
||||
; VI-NEXT: v_and_b32_sdwa v1, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
|
||||
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
|
||||
; VI-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
|
||||
; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
|
||||
; VI-NEXT: v_mov_b32_e32 v0, v3
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
%trunc = trunc i32 %arg0 to i24
|
||||
%val = bitcast i24 %trunc to <3 x i8>
|
||||
%cvt = uitofp <3 x i8> %val to <3 x float>
|
||||
ret <3 x float> %cvt
|
||||
}
|
||||
|
||||
define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind {
|
||||
; SI-LABEL: v_uitofp_v4i8_to_v4f32:
|
||||
; SI: ; %bb.0:
|
||||
|
|
|
@ -292,23 +292,25 @@ body: |
|
|||
; CHECK-LABEL: name: test_bitcast_s24_to_v3s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s24) = G_TRUNC %0
|
||||
|
@ -326,21 +328,24 @@ body: |
|
|||
; CHECK-LABEL: name: test_bitcast_s48_to_v3s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
|
||||
; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
|
||||
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
|
@ -349,10 +354,10 @@ body: |
|
|||
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
|
||||
; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; CHECK: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
|
||||
; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
|
||||
; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
|
||||
; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
|
||||
; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -233,29 +233,32 @@ body: |
|
|||
; CHECK-LABEL: name: test_unmerge_s8_s48
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
|
||||
; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
|
||||
; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
|
||||
; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[COPY6]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[COPY7]](s32)
|
||||
|
@ -288,14 +291,17 @@ body: |
|
|||
; CHECK-LABEL: name: test_unmerge_s16_s48
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
|
||||
; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: $vgpr0 = COPY [[COPY1]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[COPY2]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[COPY3]](s32)
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s
|
||||
; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s
|
||||
; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s
|
||||
|
||||
define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
|
||||
%load = load i32, i32 addrspace(1)* %ptr
|
||||
%ext = zext i32 %load to i96
|
||||
ret i96 %ext
|
||||
}
|
|
@ -134,44 +134,6 @@ define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
|
|||
ret i64 %ext
|
||||
}
|
||||
|
||||
define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i32_to_i96:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i32_to_i96:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i32_to_i96:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: s_mov_b32 s8, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s8
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i32, i32 addrspace(1)* %ptr
|
||||
%ext = zext i32 %load to i96
|
||||
ret i96 %ext
|
||||
}
|
||||
|
||||
define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i32_to_i128:
|
||||
; GFX9: ; %bb.0:
|
||||
|
|
Loading…
Reference in New Issue