llvm-project/llvm/test/CodeGen/Hexagon/early-if-merge-loop.ll

; RUN: llc -march=hexagon < %s | FileCheck %s
; Make sure that the loop in the end has only one basic block.

; CHECK-LABEL: fred
; Rely on the comments, make sure the one for the loop header is present.
; CHECK: %loop
; CHECK-NOT: %should_merge

target triple = "hexagon"

define i32 @fred(i32 %a0, i64* nocapture readonly %a1) #0 {
b2:
  %v3 = bitcast i64* %a1 to i32*
  %v4 = getelementptr inbounds i32, i32* %v3, i32 1
  %v5 = zext i32 %a0 to i64
  br label %loop

loop:                                             ; preds = %should_merge, %b2
  %v7 = phi i32 [ 0, %b2 ], [ %v49, %should_merge ]
  %v8 = phi i32 [ 0, %b2 ], [ %v42, %should_merge ]
  %v9 = phi i32* [ %v4, %b2 ], [ %v53, %should_merge ]
  %v10 = phi i32 [ 0, %b2 ], [ %v30, %should_merge ]
  %v11 = phi i32* [ %v3, %b2 ], [ %v51, %should_merge ]
  %v12 = phi i32 [ 0, %b2 ], [ %v23, %should_merge ]
  %v13 = phi i32 [ 2, %b2 ], [ %v54, %should_merge ]
  %v14 = load i32, i32* %v11, align 4, !tbaa !0
  %v15 = load i32, i32* %v9, align 4, !tbaa !0
  %v16 = icmp ult i32 %v13, 30
  %v17 = zext i32 %v12 to i64
  %v18 = shl nuw i64 %v17, 32
  %v19 = zext i32 %v14 to i64
  %v20 = or i64 %v18, %v19
  %v21 = tail call i64 @llvm.hexagon.A2.addp(i64 %v20, i64 %v5)
  %v22 = lshr i64 %v21, 32
  %v23 = trunc i64 %v22 to i32
  %v24 = zext i32 %v10 to i64
  %v25 = shl nuw i64 %v24, 32
  %v26 = zext i32 %v15 to i64
  %v27 = or i64 %v25, %v26
  %v28 = tail call i64 @llvm.hexagon.A2.addp(i64 %v27, i64 %v5)
  %v29 = lshr i64 %v28, 32
  %v30 = trunc i64 %v29 to i32
  %v31 = getelementptr inbounds i32, i32* %v3, i32 %v13
  %v32 = load i32, i32* %v31, align 4, !tbaa !0
  %v33 = or i32 %v13, 1
  %v34 = getelementptr inbounds i32, i32* %v3, i32 %v33
  %v35 = load i32, i32* %v34, align 4, !tbaa !0
  %v36 = zext i32 %v8 to i64
  %v37 = shl nuw i64 %v36, 32
  %v38 = zext i32 %v32 to i64
  %v39 = or i64 %v37, %v38
  %v40 = tail call i64 @llvm.hexagon.A2.subp(i64 %v39, i64 %v5)
  %v41 = lshr i64 %v40, 32
  %v42 = trunc i64 %v41 to i32
  %v43 = zext i32 %v7 to i64
  %v44 = shl nuw i64 %v43, 32
  %v45 = zext i32 %v35 to i64
  %v46 = or i64 %v44, %v45
  %v47 = tail call i64 @llvm.hexagon.A2.subp(i64 %v46, i64 %v5)
  %v48 = lshr i64 %v47, 32
  %v49 = trunc i64 %v48 to i32
  br i1 %v16, label %should_merge, label %exit

should_merge:                                     ; preds = %loop
  %v50 = add nuw nsw i32 %v13, 2
  %v51 = getelementptr inbounds i32, i32* %v3, i32 %v50
  %v52 = add nuw nsw i32 %v13, 3
  %v53 = getelementptr inbounds i32, i32* %v3, i32 %v52
  %v54 = add nuw nsw i32 %v13, 4
  br label %loop

exit:                                             ; preds = %loop
  %v57 = tail call i64 @llvm.hexagon.A2.combinew(i32 %v42, i32 %v23)
  %v58 = tail call i64 @llvm.hexagon.A2.combinew(i32 %v49, i32 %v30)
  %v59 = tail call i64 @llvm.hexagon.A2.addp(i64 %v57, i64 %v58)
  %v60 = lshr i64 %v59, 32
  %v61 = trunc i64 %v60 to i32
  ret i32 %v61
}

declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
declare i64 @llvm.hexagon.A2.subp(i64, i64) #1
declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1

attributes #0 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" }
attributes #1 = { nounwind readnone }

!0 = !{!1, !1, i64 0}
!1 = !{!"long", !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C/C++ TBAA"}
[Hexagon] Early-if-convert branches that may exit the loop Merge the tail block into the loop in cases where the main loop body exits early, subject to profitability constraints. This will coalesce the loop body into fewer blocks. For example: loop: loop: // loop body // loop body if (...) jump exit --> // more body more: if (...) jump exit // more body jump loop jump loop llvm-svn: 297033 2017-03-07 01:24:04 +08:00			`; RUN: llc -march=hexagon < %s \| FileCheck %s`
			`; Make sure that the loop in the end has only one basic block.`

			`; CHECK-LABEL: fred`
			`; Rely on the comments, make sure the one for the loop header is present.`
			`; CHECK: %loop`
			`; CHECK-NOT: %should_merge`

			`target triple = "hexagon"`

			`define i32 @fred(i32 %a0, i64* nocapture readonly %a1) #0 {`
			`b2:`
			`%v3 = bitcast i64* %a1 to i32*`
			`%v4 = getelementptr inbounds i32, i32* %v3, i32 1`
			`%v5 = zext i32 %a0 to i64`
			`br label %loop`

			`loop: ; preds = %should_merge, %b2`
			`%v7 = phi i32 [ 0, %b2 ], [ %v49, %should_merge ]`
			`%v8 = phi i32 [ 0, %b2 ], [ %v42, %should_merge ]`
			`%v9 = phi i32* [ %v4, %b2 ], [ %v53, %should_merge ]`
			`%v10 = phi i32 [ 0, %b2 ], [ %v30, %should_merge ]`
			`%v11 = phi i32* [ %v3, %b2 ], [ %v51, %should_merge ]`
			`%v12 = phi i32 [ 0, %b2 ], [ %v23, %should_merge ]`
			`%v13 = phi i32 [ 2, %b2 ], [ %v54, %should_merge ]`
			`%v14 = load i32, i32* %v11, align 4, !tbaa !0`
			`%v15 = load i32, i32* %v9, align 4, !tbaa !0`
			`%v16 = icmp ult i32 %v13, 30`
			`%v17 = zext i32 %v12 to i64`
			`%v18 = shl nuw i64 %v17, 32`
			`%v19 = zext i32 %v14 to i64`
			`%v20 = or i64 %v18, %v19`
			`%v21 = tail call i64 @llvm.hexagon.A2.addp(i64 %v20, i64 %v5)`
			`%v22 = lshr i64 %v21, 32`
			`%v23 = trunc i64 %v22 to i32`
			`%v24 = zext i32 %v10 to i64`
			`%v25 = shl nuw i64 %v24, 32`
			`%v26 = zext i32 %v15 to i64`
			`%v27 = or i64 %v25, %v26`
			`%v28 = tail call i64 @llvm.hexagon.A2.addp(i64 %v27, i64 %v5)`
			`%v29 = lshr i64 %v28, 32`
			`%v30 = trunc i64 %v29 to i32`
			`%v31 = getelementptr inbounds i32, i32* %v3, i32 %v13`
			`%v32 = load i32, i32* %v31, align 4, !tbaa !0`
			`%v33 = or i32 %v13, 1`
			`%v34 = getelementptr inbounds i32, i32* %v3, i32 %v33`
			`%v35 = load i32, i32* %v34, align 4, !tbaa !0`
			`%v36 = zext i32 %v8 to i64`
			`%v37 = shl nuw i64 %v36, 32`
			`%v38 = zext i32 %v32 to i64`
			`%v39 = or i64 %v37, %v38`
			`%v40 = tail call i64 @llvm.hexagon.A2.subp(i64 %v39, i64 %v5)`
			`%v41 = lshr i64 %v40, 32`
			`%v42 = trunc i64 %v41 to i32`
			`%v43 = zext i32 %v7 to i64`
			`%v44 = shl nuw i64 %v43, 32`
			`%v45 = zext i32 %v35 to i64`
			`%v46 = or i64 %v44, %v45`
			`%v47 = tail call i64 @llvm.hexagon.A2.subp(i64 %v46, i64 %v5)`
			`%v48 = lshr i64 %v47, 32`
			`%v49 = trunc i64 %v48 to i32`
			`br i1 %v16, label %should_merge, label %exit`

			`should_merge: ; preds = %loop`
			`%v50 = add nuw nsw i32 %v13, 2`
			`%v51 = getelementptr inbounds i32, i32* %v3, i32 %v50`
			`%v52 = add nuw nsw i32 %v13, 3`
			`%v53 = getelementptr inbounds i32, i32* %v3, i32 %v52`
			`%v54 = add nuw nsw i32 %v13, 4`
			`br label %loop`

			`exit: ; preds = %loop`
			`%v57 = tail call i64 @llvm.hexagon.A2.combinew(i32 %v42, i32 %v23)`
			`%v58 = tail call i64 @llvm.hexagon.A2.combinew(i32 %v49, i32 %v30)`
			`%v59 = tail call i64 @llvm.hexagon.A2.addp(i64 %v57, i64 %v58)`
			`%v60 = lshr i64 %v59, 32`
			`%v61 = trunc i64 %v60 to i32`
			`ret i32 %v61`
			`}`

			`declare i64 @llvm.hexagon.A2.addp(i64, i64) #1`
			`declare i64 @llvm.hexagon.A2.subp(i64, i64) #1`
			`declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1`

[Hexagon] New HVX target features. This patch lets the llvm tools handle the new HVX target features that are added by frontend (clang). The target-features are of the form "hvx-length64b" for 64 Byte HVX mode, "hvx-length128b" for 128 Byte mode HVX. "hvx-double" is an alias to "hvx-length128b" and is soon will be deprecated. The hvx version target feature is upgated form "+hvx" to "+hvxv{version_number}. Eg: "+hvxv62" For the correct HVX code generation, the user must use the following target features. For 64B mode: "+hvxv62" "+hvx-length64b" For 128B mode: "+hvxv62" "+hvx-length128b" Clang picks a default length if none is specified. If for some reason, no hvx-length is specified to llvm, the compilation will bail out. There is a corresponding clang patch. Differential Revision: https://reviews.llvm.org/D38851 llvm-svn: 316101 2017-10-19 02:07:07 +08:00			`attributes #0 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" }`
[Hexagon] Early-if-convert branches that may exit the loop Merge the tail block into the loop in cases where the main loop body exits early, subject to profitability constraints. This will coalesce the loop body into fewer blocks. For example: loop: loop: // loop body // loop body if (...) jump exit --> // more body more: if (...) jump exit // more body jump loop jump loop llvm-svn: 297033 2017-03-07 01:24:04 +08:00			`attributes #1 = { nounwind readnone }`

			`!0 = !{!1, !1, i64 0}`
			`!1 = !{!"long", !2, i64 0}`
			`!2 = !{!"omnipotent char", !3, i64 0}`
			`!3 = !{!"Simple C/C++ TBAA"}`