llvm-project/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll

; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
; RUN:     -pass-remarks-analysis=loop-distribute \
; RUN:     -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
; RUN:     -pass-remarks-analysis=loop-distribute \
; RUN:                                < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS

; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
; RUN:     -pass-remarks-analysis=loop-distribute \
; RUN:     -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
; RUN:     -pass-remarks-analysis=loop-distribute \
; RUN:                                < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS

; This is the input program:
;
;     1	void forced (char *A, char *B, char *C, int N) {
;     2	#pragma clang loop distribute(enable)
;     3	  for(int i = 0; i < N; i++) {
;     4	    A[i] = B[i] * C[i];
;     5	  }
;     6	}

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"

; HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300)
; HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: memory operations are safe for vectorization (hotness: 300)
; NO_HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info{{$}}
; NO_HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: memory operations are safe for vectorization{{$}}

define void @forced(i8* %A, i8* %B, i8* %C, i32 %N) !dbg !7 !prof !22 {
entry:
  %cmp12 = icmp sgt i32 %N, 0, !dbg !9
  br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !10, !prof !23

ph:
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
  %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !12
  %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
  %arrayidx2 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !16
  %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
  %mul = mul i8 %1, %0, !dbg !17
  %arrayidx6 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !18
  store i8 %mul, i8* %arrayidx6, align 1, !dbg !19, !tbaa !13
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
  %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !20, !prof !24

for.cond.cleanup:
  ret void, !dbg !11
}

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 267633) (llvm/trunk 267675)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
!1 = !DIFile(filename: "/tmp/t.c", directory: "/tmp")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 2}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!7 = distinct !DISubprogram(name: "forced", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
!8 = !DISubroutineType(types: !2)
!9 = !DILocation(line: 3, column: 20, scope: !7)
!10 = !DILocation(line: 3, column: 3, scope: !7)
!11 = !DILocation(line: 6, column: 1, scope: !7)
!12 = !DILocation(line: 4, column: 12, scope: !7)
!13 = !{!14, !14, i64 0}
!14 = !{!"omnipotent char", !15, i64 0}
!15 = !{!"Simple C/C++ TBAA"}
!16 = !DILocation(line: 4, column: 19, scope: !7)
!17 = !DILocation(line: 4, column: 17, scope: !7)
!18 = !DILocation(line: 4, column: 5, scope: !7)
!19 = !DILocation(line: 4, column: 10, scope: !7)
!20 = distinct !{!20, !21}
!21 = !{!"llvm.loop.distribute.enable", i1 true}
!22 = !{!"function_entry_count", i64 3}
!23 = !{!"branch_weights", i32 99, i32 1}
!24 = !{!"branch_weights", i32 1, i32 99}
[OptRemark,LDist] RFC: Add hotness attribute Summary: This is the first set of changes implementing the RFC from http://thread.gmane.org/gmane.comp.compilers.llvm.devel/98334 This is a cross-sectional patch; rather than implementing the hotness attribute for all optimization remarks and all passes in a patch set, it implements it for the 'missed-optimization' remark for Loop Distribution. My goal is to shake out the design issues before scaling it up to other types and passes. Hotness is computed as an integer as the multiplication of the block frequency with the function entry count. It's only printed in opt currently since clang prints the diagnostic fields directly. E.g.: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300) A new API added is similar to emitOptimizationRemarkMissed. The difference is that it additionally takes a code region that the diagnostic corresponds to. From this, hotness is computed using BFI. The new API is exposed via an analysis pass so that it can be made dependent on LazyBFI. (Thanks to Hal for the analysis pass idea.) This feature can all be enabled by setDiagnosticHotnessRequested in the LLVM context. If this is off, LazyBFI is not calculated (D22141) so there should be no overhead. A new command-line option is added to turn this on in opt. My plan is to switch all user of emitOptimizationRemark* to use this module instead. Reviewers: hfinkel Subscribers: rcox2, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D21771 llvm-svn: 275583 2016-07-16 01:23:20 +08:00			`; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \`
[OptDiag,LDist] Convert remaining opt remarks to use the new API llvm-svn: 276340 2016-07-22 05:21:34 +08:00			`; RUN: -pass-remarks-analysis=loop-distribute \`
[OptRemark,LDist] RFC: Add hotness attribute Summary: This is the first set of changes implementing the RFC from http://thread.gmane.org/gmane.comp.compilers.llvm.devel/98334 This is a cross-sectional patch; rather than implementing the hotness attribute for all optimization remarks and all passes in a patch set, it implements it for the 'missed-optimization' remark for Loop Distribution. My goal is to shake out the design issues before scaling it up to other types and passes. Hotness is computed as an integer as the multiplication of the block frequency with the function entry count. It's only printed in opt currently since clang prints the diagnostic fields directly. E.g.: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300) A new API added is similar to emitOptimizationRemarkMissed. The difference is that it additionally takes a code region that the diagnostic corresponds to. From this, hotness is computed using BFI. The new API is exposed via an analysis pass so that it can be made dependent on LazyBFI. (Thanks to Hal for the analysis pass idea.) This feature can all be enabled by setDiagnosticHotnessRequested in the LLVM context. If this is off, LazyBFI is not calculated (D22141) so there should be no overhead. A new command-line option is added to turn this on in opt. My plan is to switch all user of emitOptimizationRemark* to use this module instead. Reviewers: hfinkel Subscribers: rcox2, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D21771 llvm-svn: 275583 2016-07-16 01:23:20 +08:00			`; RUN: -pass-remarks-with-hotness < %s 2>&1 \| FileCheck %s --check-prefix=HOTNESS`
			`; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \`
[OptDiag,LDist] Convert remaining opt remarks to use the new API llvm-svn: 276340 2016-07-22 05:21:34 +08:00			`; RUN: -pass-remarks-analysis=loop-distribute \`
[LoopDist] Port to new PM Summary: The direct motivation for the port is to ensure that the OptRemarkEmitter tests work with the new PM. This remains a function pass because we not only create multiple loops but could also version the original loop. In the test I need to invoke opt with -passes='require<aa>,loop-distribute'. LoopDistribute does not directly depend on AA however LAA does. LAA uses getCachedResult so I think we need manually pull in 'aa'. Reviewers: davidxl, silvas Subscribers: sanjoy, llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D22437 llvm-svn: 275811 2016-07-19 00:29:27 +08:00			`; RUN: < %s 2>&1 \| FileCheck %s --check-prefix=NO_HOTNESS`

			`; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \`
[OptDiag,LDist] Convert remaining opt remarks to use the new API llvm-svn: 276340 2016-07-22 05:21:34 +08:00			`; RUN: -pass-remarks-analysis=loop-distribute \`
[LoopDist] Port to new PM Summary: The direct motivation for the port is to ensure that the OptRemarkEmitter tests work with the new PM. This remains a function pass because we not only create multiple loops but could also version the original loop. In the test I need to invoke opt with -passes='require<aa>,loop-distribute'. LoopDistribute does not directly depend on AA however LAA does. LAA uses getCachedResult so I think we need manually pull in 'aa'. Reviewers: davidxl, silvas Subscribers: sanjoy, llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D22437 llvm-svn: 275811 2016-07-19 00:29:27 +08:00			`; RUN: -pass-remarks-with-hotness < %s 2>&1 \| FileCheck %s --check-prefix=HOTNESS`
			`; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \`
[OptDiag,LDist] Convert remaining opt remarks to use the new API llvm-svn: 276340 2016-07-22 05:21:34 +08:00			`; RUN: -pass-remarks-analysis=loop-distribute \`
[OptRemark,LDist] RFC: Add hotness attribute Summary: This is the first set of changes implementing the RFC from http://thread.gmane.org/gmane.comp.compilers.llvm.devel/98334 This is a cross-sectional patch; rather than implementing the hotness attribute for all optimization remarks and all passes in a patch set, it implements it for the 'missed-optimization' remark for Loop Distribution. My goal is to shake out the design issues before scaling it up to other types and passes. Hotness is computed as an integer as the multiplication of the block frequency with the function entry count. It's only printed in opt currently since clang prints the diagnostic fields directly. E.g.: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300) A new API added is similar to emitOptimizationRemarkMissed. The difference is that it additionally takes a code region that the diagnostic corresponds to. From this, hotness is computed using BFI. The new API is exposed via an analysis pass so that it can be made dependent on LazyBFI. (Thanks to Hal for the analysis pass idea.) This feature can all be enabled by setDiagnosticHotnessRequested in the LLVM context. If this is off, LazyBFI is not calculated (D22141) so there should be no overhead. A new command-line option is added to turn this on in opt. My plan is to switch all user of emitOptimizationRemark* to use this module instead. Reviewers: hfinkel Subscribers: rcox2, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D21771 llvm-svn: 275583 2016-07-16 01:23:20 +08:00			`; RUN: < %s 2>&1 \| FileCheck %s --check-prefix=NO_HOTNESS`

			`; This is the input program:`
			`;`
			`; 1 void forced (char A, char B, char *C, int N) {`
			`; 2 #pragma clang loop distribute(enable)`
			`; 3 for(int i = 0; i < N; i++) {`
			`; 4 A[i] = B[i] * C[i];`
			`; 5 }`
			`; 6 }`

			`target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"`
			`target triple = "x86_64-apple-macosx10.11.0"`

			`; HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300)`
[OptDiag,LDist] Convert remaining opt remarks to use the new API llvm-svn: 276340 2016-07-22 05:21:34 +08:00			`; HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: memory operations are safe for vectorization (hotness: 300)`
[OptRemark,LDist] RFC: Add hotness attribute Summary: This is the first set of changes implementing the RFC from http://thread.gmane.org/gmane.comp.compilers.llvm.devel/98334 This is a cross-sectional patch; rather than implementing the hotness attribute for all optimization remarks and all passes in a patch set, it implements it for the 'missed-optimization' remark for Loop Distribution. My goal is to shake out the design issues before scaling it up to other types and passes. Hotness is computed as an integer as the multiplication of the block frequency with the function entry count. It's only printed in opt currently since clang prints the diagnostic fields directly. E.g.: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300) A new API added is similar to emitOptimizationRemarkMissed. The difference is that it additionally takes a code region that the diagnostic corresponds to. From this, hotness is computed using BFI. The new API is exposed via an analysis pass so that it can be made dependent on LazyBFI. (Thanks to Hal for the analysis pass idea.) This feature can all be enabled by setDiagnosticHotnessRequested in the LLVM context. If this is off, LazyBFI is not calculated (D22141) so there should be no overhead. A new command-line option is added to turn this on in opt. My plan is to switch all user of emitOptimizationRemark* to use this module instead. Reviewers: hfinkel Subscribers: rcox2, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D21771 llvm-svn: 275583 2016-07-16 01:23:20 +08:00			`; NO_HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info{{$}}`
[OptDiag,LDist] Convert remaining opt remarks to use the new API llvm-svn: 276340 2016-07-22 05:21:34 +08:00			`; NO_HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: memory operations are safe for vectorization{{$}}`
[OptRemark,LDist] RFC: Add hotness attribute Summary: This is the first set of changes implementing the RFC from http://thread.gmane.org/gmane.comp.compilers.llvm.devel/98334 This is a cross-sectional patch; rather than implementing the hotness attribute for all optimization remarks and all passes in a patch set, it implements it for the 'missed-optimization' remark for Loop Distribution. My goal is to shake out the design issues before scaling it up to other types and passes. Hotness is computed as an integer as the multiplication of the block frequency with the function entry count. It's only printed in opt currently since clang prints the diagnostic fields directly. E.g.: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300) A new API added is similar to emitOptimizationRemarkMissed. The difference is that it additionally takes a code region that the diagnostic corresponds to. From this, hotness is computed using BFI. The new API is exposed via an analysis pass so that it can be made dependent on LazyBFI. (Thanks to Hal for the analysis pass idea.) This feature can all be enabled by setDiagnosticHotnessRequested in the LLVM context. If this is off, LazyBFI is not calculated (D22141) so there should be no overhead. A new command-line option is added to turn this on in opt. My plan is to switch all user of emitOptimizationRemark* to use this module instead. Reviewers: hfinkel Subscribers: rcox2, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D21771 llvm-svn: 275583 2016-07-16 01:23:20 +08:00
			`define void @forced(i8* %A, i8* %B, i8* %C, i32 %N) !dbg !7 !prof !22 {`
			`entry:`
			`%cmp12 = icmp sgt i32 %N, 0, !dbg !9`
			`br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !10, !prof !23`

			`ph:`
			`br label %for.body`

			`for.body:`
			`%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]`
			`%arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !12`
			`%0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13`
			`%arrayidx2 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !16`
			`%1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13`
			`%mul = mul i8 %1, %0, !dbg !17`
			`%arrayidx6 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !18`
			`store i8 %mul, i8* %arrayidx6, align 1, !dbg !19, !tbaa !13`
			`%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10`
			`%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10`
			`%exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10`
			`br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !20, !prof !24`

			`for.cond.cleanup:`
			`ret void, !dbg !11`
			`}`

			`!llvm.dbg.cu = !{!0}`
			`!llvm.module.flags = !{!3, !4}`

			`!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 267633) (llvm/trunk 267675)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)`
			`!1 = !DIFile(filename: "/tmp/t.c", directory: "/tmp")`
			`!2 = !{}`
			`!3 = !{i32 2, !"Dwarf Version", i32 2}`
			`!4 = !{i32 2, !"Debug Info Version", i32 3}`
			`!7 = distinct !DISubprogram(name: "forced", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)`
			`!8 = !DISubroutineType(types: !2)`
			`!9 = !DILocation(line: 3, column: 20, scope: !7)`
			`!10 = !DILocation(line: 3, column: 3, scope: !7)`
			`!11 = !DILocation(line: 6, column: 1, scope: !7)`
			`!12 = !DILocation(line: 4, column: 12, scope: !7)`
			`!13 = !{!14, !14, i64 0}`
			`!14 = !{!"omnipotent char", !15, i64 0}`
			`!15 = !{!"Simple C/C++ TBAA"}`
			`!16 = !DILocation(line: 4, column: 19, scope: !7)`
			`!17 = !DILocation(line: 4, column: 17, scope: !7)`
			`!18 = !DILocation(line: 4, column: 5, scope: !7)`
			`!19 = !DILocation(line: 4, column: 10, scope: !7)`
			`!20 = distinct !{!20, !21}`
			`!21 = !{!"llvm.loop.distribute.enable", i1 true}`
			`!22 = !{!"function_entry_count", i64 3}`
			`!23 = !{!"branch_weights", i32 99, i32 1}`
			`!24 = !{!"branch_weights", i32 1, i32 99}`