llvm-project/polly/test/ForwardOpTree/forward_load_differentarray.ll

; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-optree -analyze < %s | FileCheck %s -match-full-lines
;
; To forward %val, B[j] cannot be reused in bodyC because it is overwritten
; between. Verify that instead the alternative C[j] is used.
;
; for (int j = 0; j < n; j += 1) {
; bodyA:
;   double val = B[j];
;
; bodyB:
;   B[j] = 0;
;   C[j] = val;
;
; bodyC:
;   A[j] = val;
; }
;
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
entry:
  br label %for

for:
  %j = phi i32 [0, %entry], [%j.inc, %inc]
  %j.cmp = icmp slt i32 %j, %n
  br i1 %j.cmp, label %bodyA, label %exit

    bodyA:
      %B_idx = getelementptr inbounds double, double* %B, i32 %j
      %val = load double, double* %B_idx
      br label %bodyB

    bodyB:
      store double 0.0, double* %B_idx
      %C_idx = getelementptr inbounds double, double* %C, i32 %j
      store double %val, double* %C_idx
      br label %bodyC

    bodyC:
      %A_idx = getelementptr inbounds double, double* %A, i32 %j
      store double %val, double* %A_idx
      br label %inc

inc:
  %j.inc = add nuw nsw i32 %j, 1
  br label %for

exit:
  br label %return

return:
  ret void
}


; CHECK: Statistics {
; CHECK:     Known loads forwarded: 2
; CHECK:     Operand trees forwarded: 2
; CHECK:     Statements with forwarded operand trees: 2
; CHECK: }

; CHECK-NEXT: After statements {
; CHECK-NEXT:     Stmt_bodyA
; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val[] };
; CHECK-NEXT:             Instructions {
; CHECK-NEXT:                   %val = load double, double* %B_idx, align 8
; CHECK-NEXT:             }
; CHECK-NEXT:     Stmt_bodyB
; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT:                 ;
; CHECK-NEXT:            new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
; CHECK-NEXT:             Instructions {
; CHECK-NEXT:                   %val = load double, double* %B_idx, align 8
; CHECK-NEXT:                   store double 0.000000e+00, double* %B_idx, align 8
; CHECK-NEXT:                   store double %val, double* %C_idx, align 8
; CHECK-NEXT:             }
; CHECK-NEXT:     Stmt_bodyC
; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT:                 ;
; CHECK-NEXT:            new: [n] -> { Stmt_bodyC[i0] -> MemRef_C[i0] };
; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT:                 [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] };
; CHECK-NEXT:             Instructions {
; CHECK-NEXT:                   %val = load double, double* %B_idx, align 8
; CHECK-NEXT:                   store double %val, double* %A_idx, align 8
; CHECK-NEXT:             }
; CHECK-NEXT: }
[ScopBuilder] Make -polly-stmt-granularity=scalar-indep the default. Splitting basic blocks into multiple statements if there are now additional scalar dependencies gives more freedom to the scheduler, but more statements also means higher compile-time complexity. Switch to finer statement granularity, the additional compile time should be limited by the number of operations quota. The regression tests are written for the -polly-stmt-granularity=bb setting, therefore we add that flag to those tests that break with the new default. Some of the tests only fail because the statements are named differently due to a basic block resulting in multiple statements, but which are removed during simplification of statements without side-effects. Previous commits tried to reduce this effect, but it is not completely avoidable. Differential Revision: https://reviews.llvm.org/D42151 llvm-svn: 324169 2018-02-03 14:59:47 +08:00			`; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-optree -analyze < %s \| FileCheck %s -match-full-lines`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`;`
[test] Add descriptions and pseudocode to tests. NFC. llvm-svn: 310385 2017-08-09 01:26:19 +08:00			`; To forward %val, B[j] cannot be reused in bodyC because it is overwritten`
			`; between. Verify that instead the alternative C[j] is used.`
			`;`
			`; for (int j = 0; j < n; j += 1) {`
			`; bodyA:`
			`; double val = B[j];`
			`;`
			`; bodyB:`
			`; B[j] = 0;`
			`; C[j] = val;`
			`;`
			`; bodyC:`
			`; A[j] = val;`
			`; }`
			`;`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {`
			`entry:`
			`br label %for`

			`for:`
			`%j = phi i32 [0, %entry], [%j.inc, %inc]`
			`%j.cmp = icmp slt i32 %j, %n`
			`br i1 %j.cmp, label %bodyA, label %exit`

			`bodyA:`
			`%B_idx = getelementptr inbounds double, double* %B, i32 %j`
			`%val = load double, double* %B_idx`
			`br label %bodyB`

			`bodyB:`
			`store double 0.0, double* %B_idx`
			`%C_idx = getelementptr inbounds double, double* %C, i32 %j`
			`store double %val, double* %C_idx`
			`br label %bodyC`

			`bodyC:`
			`%A_idx = getelementptr inbounds double, double* %A, i32 %j`
			`store double %val, double* %A_idx`
			`br label %inc`

			`inc:`
			`%j.inc = add nuw nsw i32 %j, 1`
			`br label %for`

			`exit:`
			`br label %return`

			`return:`
			`ret void`
			`}`


			`; CHECK: Statistics {`
			`; CHECK: Known loads forwarded: 2`
			`; CHECK: Operand trees forwarded: 2`
			`; CHECK: Statements with forwarded operand trees: 2`
			`; CHECK: }`

			`; CHECK-NEXT: After statements {`
			`; CHECK-NEXT: Stmt_bodyA`
			`; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]`
			`; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };`
			`; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]`
			`; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] };`
			`; CHECK-NEXT: Instructions {`
Infer alignment of unmarked loads in IR/bitcode parsing. For IR generated by a compiler, this is really simple: you just take the datalayout from the beginning of the file, and apply it to all the IR later in the file. For optimization testcases that don't care about the datalayout, this is also really simple: we just use the default datalayout. The complexity here comes from the fact that some LLVM tools allow overriding the datalayout: some tools have an explicit flag for this, some tools will infer a datalayout based on the code generation target. Supporting this properly required plumbing through a bunch of new machinery: we want to allow overriding the datalayout after the datalayout is parsed from the file, but before we use any information from it. Therefore, IR/bitcode parsing now has a callback to allow tools to compute the datalayout at the appropriate time. Not sure if I covered all the LLVM tools that want to use the callback. (clang? lli? Misc IR manipulation tools like llvm-link?). But this is at least enough for all the LLVM regression tests, and IR without a datalayout is not something frontends should generate. This change had some sort of weird effects for certain CodeGen regression tests: if the datalayout is overridden with a datalayout with a different program or stack address space, we now parse IR based on the overridden datalayout, instead of the one written in the file (or the default one, if none is specified). This broke a few AVR tests, and one AMDGPU test. Outside the CodeGen tests I mentioned, the test changes are all just fixing CHECK lines and moving around datalayout lines in weird places. Differential Revision: https://reviews.llvm.org/D78403 2020-05-15 03:59:45 +08:00			`; CHECK-NEXT: %val = load double, double* %B_idx, align 8`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`; CHECK-NEXT: }`
			`; CHECK-NEXT: Stmt_bodyB`
			`; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]`
Port ScopInfo to the isl cpp bindings Summary: Most changes are mechanical, but in one place I changed the program semantics by fixing a likely bug: In `Scop::hasFeasibleRuntimeContext()`, I'm now explicitely handling the error-case. Before, when the call to `addNonEmptyDomainConstraints()` returned a null set, this (probably) accidentally worked because isl_bool_error converts to true. I'm checking for nullptr now. Reviewers: grosser, Meinersbur, bollu Reviewed By: Meinersbur Subscribers: nemanjai, kbarton, pollydev, llvm-commits Differential Revision: https://reviews.llvm.org/D39971 llvm-svn: 318632 2017-11-20 06:13:34 +08:00			`; CHECK-NEXT: ;`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };`
			`; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]`
			`; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };`
			`; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]`
			`; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };`
			`; CHECK-NEXT: Instructions {`
Infer alignment of unmarked loads in IR/bitcode parsing. For IR generated by a compiler, this is really simple: you just take the datalayout from the beginning of the file, and apply it to all the IR later in the file. For optimization testcases that don't care about the datalayout, this is also really simple: we just use the default datalayout. The complexity here comes from the fact that some LLVM tools allow overriding the datalayout: some tools have an explicit flag for this, some tools will infer a datalayout based on the code generation target. Supporting this properly required plumbing through a bunch of new machinery: we want to allow overriding the datalayout after the datalayout is parsed from the file, but before we use any information from it. Therefore, IR/bitcode parsing now has a callback to allow tools to compute the datalayout at the appropriate time. Not sure if I covered all the LLVM tools that want to use the callback. (clang? lli? Misc IR manipulation tools like llvm-link?). But this is at least enough for all the LLVM regression tests, and IR without a datalayout is not something frontends should generate. This change had some sort of weird effects for certain CodeGen regression tests: if the datalayout is overridden with a datalayout with a different program or stack address space, we now parse IR based on the overridden datalayout, instead of the one written in the file (or the default one, if none is specified). This broke a few AVR tests, and one AMDGPU test. Outside the CodeGen tests I mentioned, the test changes are all just fixing CHECK lines and moving around datalayout lines in weird places. Differential Revision: https://reviews.llvm.org/D78403 2020-05-15 03:59:45 +08:00			`; CHECK-NEXT: %val = load double, double* %B_idx, align 8`
Fix polly tests after D79968. 2020-05-16 06:15:09 +08:00			`; CHECK-NEXT: store double 0.000000e+00, double* %B_idx, align 8`
			`; CHECK-NEXT: store double %val, double* %C_idx, align 8`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`; CHECK-NEXT: }`
			`; CHECK-NEXT: Stmt_bodyC`
			`; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]`
Port ScopInfo to the isl cpp bindings Summary: Most changes are mechanical, but in one place I changed the program semantics by fixing a likely bug: In `Scop::hasFeasibleRuntimeContext()`, I'm now explicitely handling the error-case. Before, when the call to `addNonEmptyDomainConstraints()` returned a null set, this (probably) accidentally worked because isl_bool_error converts to true. I'm checking for nullptr now. Reviewers: grosser, Meinersbur, bollu Reviewed By: Meinersbur Subscribers: nemanjai, kbarton, pollydev, llvm-commits Differential Revision: https://reviews.llvm.org/D39971 llvm-svn: 318632 2017-11-20 06:13:34 +08:00			`; CHECK-NEXT: ;`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_C[i0] };`
			`; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]`
			`; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] };`
			`; CHECK-NEXT: Instructions {`
Infer alignment of unmarked loads in IR/bitcode parsing. For IR generated by a compiler, this is really simple: you just take the datalayout from the beginning of the file, and apply it to all the IR later in the file. For optimization testcases that don't care about the datalayout, this is also really simple: we just use the default datalayout. The complexity here comes from the fact that some LLVM tools allow overriding the datalayout: some tools have an explicit flag for this, some tools will infer a datalayout based on the code generation target. Supporting this properly required plumbing through a bunch of new machinery: we want to allow overriding the datalayout after the datalayout is parsed from the file, but before we use any information from it. Therefore, IR/bitcode parsing now has a callback to allow tools to compute the datalayout at the appropriate time. Not sure if I covered all the LLVM tools that want to use the callback. (clang? lli? Misc IR manipulation tools like llvm-link?). But this is at least enough for all the LLVM regression tests, and IR without a datalayout is not something frontends should generate. This change had some sort of weird effects for certain CodeGen regression tests: if the datalayout is overridden with a datalayout with a different program or stack address space, we now parse IR based on the overridden datalayout, instead of the one written in the file (or the default one, if none is specified). This broke a few AVR tests, and one AMDGPU test. Outside the CodeGen tests I mentioned, the test changes are all just fixing CHECK lines and moving around datalayout lines in weird places. Differential Revision: https://reviews.llvm.org/D78403 2020-05-15 03:59:45 +08:00			`; CHECK-NEXT: %val = load double, double* %B_idx, align 8`
Fix polly tests after D79968. 2020-05-16 06:15:09 +08:00			`; CHECK-NEXT: store double %val, double* %A_idx, align 8`
[ForwardOpTree] Use known array content analysis to forward load instructions. This is an addition to the -polly-optree pass that reuses the array content analysis from DeLICM to find array elements that contain the same value as the value loaded when the target statement instance is executed. The analysis is now enabled by default. The known content analysis could also be used to rematerialize any llvm::Value that was written to some array element, but currently only loads are forwarded. Differential Revision: https://reviews.llvm.org/D36380 llvm-svn: 310279 2017-08-08 02:40:29 +08:00			`; CHECK-NEXT: }`
			`; CHECK-NEXT: }`