llvm-project/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll

; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL:{{^}}row_filter_C1_D0:
define amdgpu_kernel void @row_filter_C1_D0() #0 {
entry:
  br i1 undef, label %for.inc.1, label %do.body.preheader

do.body.preheader:                                ; preds = %entry
  %tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
  br i1 undef, label %do.body56.1, label %do.body90

do.body90:                                        ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
  %tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ]
  %tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2
  %tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3
  br i1 undef, label %do.body124.1, label %do.body.1562.preheader

do.body.1562.preheader:                           ; preds = %do.body124.1, %do.body90
  %storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ]
  %tmp4 = insertelement <4 x i32> undef, i32 undef, i32 1
  br label %for.inc.1

do.body56.1:                                      ; preds = %do.body.preheader
  %tmp5 = insertelement <4 x i32> %tmp, i32 undef, i32 1
  %or.cond472.1 = or i1 undef, undef
  br i1 %or.cond472.1, label %do.body56.2, label %do.body90

do.body56.2:                                      ; preds = %do.body56.1
  %tmp6 = insertelement <4 x i32> %tmp5, i32 undef, i32 1
  br label %do.body90

do.body124.1:                                     ; preds = %do.body90
  %tmp7 = insertelement <4 x i32> %tmp3, i32 undef, i32 3
  br label %do.body.1562.preheader

for.inc.1:                                        ; preds = %do.body.1562.preheader, %entry
  %storemerge591 = phi <4 x i32> [ zeroinitializer, %entry ], [ %storemerge, %do.body.1562.preheader ]
  %add.i495 = add <4 x i32> %storemerge591, undef
  unreachable
}

; GCN-LABEL: {{^}}foo:
; GCN: s_endpgm
define amdgpu_ps void @foo() #0 {
bb:
  br i1 undef, label %bb2, label %bb1

bb1:                                              ; preds = %bb
  br i1 undef, label %bb4, label %bb6

bb2:                                              ; preds = %bb4, %bb
  %tmp = phi float [ %tmp5, %bb4 ], [ 0.000000e+00, %bb ]
  br i1 undef, label %bb9, label %bb13

bb4:                                              ; preds = %bb7, %bb6, %bb1
  %tmp5 = phi float [ undef, %bb1 ], [ undef, %bb6 ], [ %tmp8, %bb7 ]
  br label %bb2

bb6:                                              ; preds = %bb1
  br i1 undef, label %bb7, label %bb4

bb7:                                              ; preds = %bb6
  %tmp8 = fmul float undef, undef
  br label %bb4

bb9:                                              ; preds = %bb2
  %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
  %tmp11 = extractelement <4 x float> %tmp10, i32 1
  %tmp12 = extractelement <4 x float> %tmp10, i32 3
  br label %bb14

bb13:                                             ; preds = %bb2
  br i1 undef, label %bb23, label %bb24

bb14:                                             ; preds = %bb27, %bb24, %bb9
  %tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
  %tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
  %tmp17 = fmul float 1.050000e+01, %tmp16
  %tmp18 = fmul float 1.150000e+01, %tmp15
  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0
  ret void

bb23:                                             ; preds = %bb13
  br i1 undef, label %bb24, label %bb26

bb24:                                             ; preds = %bb26, %bb23, %bb13
  %tmp25 = phi float [ %tmp, %bb13 ], [ %tmp, %bb26 ], [ 0.000000e+00, %bb23 ]
  br i1 undef, label %bb27, label %bb14

bb26:                                             ; preds = %bb23
  br label %bb24

bb27:                                             ; preds = %bb24
  br label %bb14
}


declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s \| FileCheck -check-prefix=GCN %s`
			`; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s \| FileCheck -check-prefix=GCN %s`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`; GCN-LABEL:{{^}}row_filter_C1_D0:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @row_filter_C1_D0() #0 {`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`entry:`
			`br i1 undef, label %for.inc.1, label %do.body.preheader`

			`do.body.preheader: ; preds = %entry`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`br i1 undef, label %do.body56.1, label %do.body90`

			`do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ]`
			`%tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2`
			`%tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`br i1 undef, label %do.body124.1, label %do.body.1562.preheader`

			`do.body.1562.preheader: ; preds = %do.body124.1, %do.body90`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ]`
			`%tmp4 = insertelement <4 x i32> undef, i32 undef, i32 1`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`br label %for.inc.1`

			`do.body56.1: ; preds = %do.body.preheader`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%tmp5 = insertelement <4 x i32> %tmp, i32 undef, i32 1`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`%or.cond472.1 = or i1 undef, undef`
			`br i1 %or.cond472.1, label %do.body56.2, label %do.body90`

			`do.body56.2: ; preds = %do.body56.1`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%tmp6 = insertelement <4 x i32> %tmp5, i32 undef, i32 1`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`br label %do.body90`

			`do.body124.1: ; preds = %do.body90`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%tmp7 = insertelement <4 x i32> %tmp3, i32 undef, i32 3`
RegisterCoalescer: Fix valuesIdentical() in some subrange merge cases. I got confused and assumed SrcIdx/DstIdx of the CoalescerPair is a subregister index in SrcReg/DstReg, but they are actually subregister indices of the coalesced register that get you back to SrcReg/DstReg when applied. Fixed the bug, improved comments and simplified code accordingly. Testcase by Tom Stellard! llvm-svn: 225415 2015-01-08 07:58:38 +08:00			`br label %do.body.1562.preheader`

			`for.inc.1: ; preds = %do.body.1562.preheader, %entry`
			`%storemerge591 = phi <4 x i32> [ zeroinitializer, %entry ], [ %storemerge, %do.body.1562.preheader ]`
			`%add.i495 = add <4 x i32> %storemerge591, undef`
			`unreachable`
			`}`

AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`; GCN-LABEL: {{^}}foo:`
			`; GCN: s_endpgm`
AMDGPU: Add a shader calling convention This makes it possible to distinguish between mesa shaders and other kernels even in the presence of compute shaders. Patch By: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Differential Revision: http://reviews.llvm.org/D18559 llvm-svn: 265589 2016-04-07 03:40:20 +08:00			`define amdgpu_ps void @foo() #0 {`
R600/SI: Disable subreg liveness This is temporary while we try to fix a crash in the register coalescer. llvm-svn: 228861 2015-02-12 02:24:53 +08:00			`bb:`
			`br i1 undef, label %bb2, label %bb1`

			`bb1: ; preds = %bb`
			`br i1 undef, label %bb4, label %bb6`

			`bb2: ; preds = %bb4, %bb`
			`%tmp = phi float [ %tmp5, %bb4 ], [ 0.000000e+00, %bb ]`
			`br i1 undef, label %bb9, label %bb13`

			`bb4: ; preds = %bb7, %bb6, %bb1`
			`%tmp5 = phi float [ undef, %bb1 ], [ undef, %bb6 ], [ %tmp8, %bb7 ]`
			`br label %bb2`

			`bb6: ; preds = %bb1`
			`br i1 undef, label %bb7, label %bb4`

			`bb7: ; preds = %bb6`
			`%tmp8 = fmul float undef, undef`
			`br label %bb4`

			`bb9: ; preds = %bb2`
AMDGPU: Convert test cases to the dimension-aware intrinsics Summary: Also explicitly port over some tests in llvm.amdgcn.image.* that were missing. Some tests are removed because they no longer apply (i.e. explicitly testing building an address vector via insertelement). This is in preparation for the eventual removal of the old-style intrinsics. Some additional notes: - constant-address-space-32bit.ll: change some GCN-NEXT to GCN because the instruction schedule was subtly altered - insert_vector_elt.ll: the old test didn't actually test anything, because %tmp1 was not used; remove the load, because it doesn't work (Because of the amdgpu_ps calling convention? In any case, it's orthogonal to what the test claims to be testing.) Change-Id: Idfa99b6512ad139e755e82b8b89548ab08f0afcf Reviewers: arsenm, rampitec Subscribers: MatzeB, qcolombet, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D48018 llvm-svn: 335229 2018-06-21 21:37:19 +08:00			`%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)`
R600/SI: Disable subreg liveness This is temporary while we try to fix a crash in the register coalescer. llvm-svn: 228861 2015-02-12 02:24:53 +08:00			`%tmp11 = extractelement <4 x float> %tmp10, i32 1`
			`%tmp12 = extractelement <4 x float> %tmp10, i32 3`
			`br label %bb14`

			`bb13: ; preds = %bb2`
			`br i1 undef, label %bb23, label %bb24`

			`bb14: ; preds = %bb27, %bb24, %bb9`
			`%tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]`
			`%tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`%tmp17 = fmul float 1.050000e+01, %tmp16`
			`%tmp18 = fmul float 1.150000e+01, %tmp15`
			`call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0`
R600/SI: Disable subreg liveness This is temporary while we try to fix a crash in the register coalescer. llvm-svn: 228861 2015-02-12 02:24:53 +08:00			`ret void`

			`bb23: ; preds = %bb13`
			`br i1 undef, label %bb24, label %bb26`

			`bb24: ; preds = %bb26, %bb23, %bb13`
			`%tmp25 = phi float [ %tmp, %bb13 ], [ %tmp, %bb26 ], [ 0.000000e+00, %bb23 ]`
			`br i1 undef, label %bb27, label %bb14`

			`bb26: ; preds = %bb23`
			`br label %bb24`

			`bb27: ; preds = %bb24`
			`br label %bb14`
			`}`

AMDGPU: Convert image intrinsic uses in tests llvm-svn: 298386 2017-03-22 00:24:12 +08:00
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0`
AMDGPU: Convert test cases to the dimension-aware intrinsics Summary: Also explicitly port over some tests in llvm.amdgcn.image.* that were missing. Some tests are removed because they no longer apply (i.e. explicitly testing building an address vector via insertelement). This is in preparation for the eventual removal of the old-style intrinsics. Some additional notes: - constant-address-space-32bit.ll: change some GCN-NEXT to GCN because the instruction schedule was subtly altered - insert_vector_elt.ll: the old test didn't actually test anything, because %tmp1 was not used; remove the load, because it doesn't work (Because of the amdgpu_ps calling convention? In any case, it's orthogonal to what the test claims to be testing.) Change-Id: Idfa99b6512ad139e755e82b8b89548ab08f0afcf Reviewers: arsenm, rampitec Subscribers: MatzeB, qcolombet, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D48018 llvm-svn: 335229 2018-06-21 21:37:19 +08:00			`declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`
R600/SI: Disable subreg liveness This is temporary while we try to fix a crash in the register coalescer. llvm-svn: 228861 2015-02-12 02:24:53 +08:00
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`attributes #0 = { nounwind }`
AMDGPU: Convert image intrinsic uses in tests llvm-svn: 298386 2017-03-22 00:24:12 +08:00			`attributes #1 = { nounwind readonly }`