llvm-project/polly/test/ScopInfo/invariant_load_zext_paramet...

; RUN: opt %loadPolly -polly-scops -polly-invariant-load-hoisting=true -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -S < %s | FileCheck %s --check-prefix=CODEGEN
;
;    void f(int *I0, int *I1, int *V) {
;      for (int i = 0; i < 1000; i++) {
;        if ((long)(*I0) == 0)
;          V[i] += *I1;
;      }
;    }
;
; CHECK:         Assumed Context:
; CHECK-NEXT:      [loadI0] -> {  :  }
; CHECK-NEXT:    Invalid Context:
; CHECK-NEXT:      [loadI0] -> {  : loadI0 < 0 }
;
; CHECK:   p0: %loadI0
;
; CHECK:       Stmt_if_then
; CHECK-NEXT:    Domain :=
; CHECK-NEXT:      [loadI0] -> { Stmt_if_then[i0] : loadI0 = 0 and 0 <= i0 <= 999 };
;
; CODEGEN:      polly.preload.begin:
; CODEGEN-NEXT:   %polly.access.I0 = getelementptr i32, i32* %I0, i64 0
; CODEGEN-NEXT:   %polly.access.I0.load = load i32, i32* %polly.access.I0
; CODEGEN-NEXT:   store i32 %polly.access.I0.load, i32* %loadI1a.preload.s2a
; CODEGEN-NEXT:   %0 = sext i32 %polly.access.I0.load to i64
; CODEGEN-NEXT:   %1 = icmp eq i64 %0, 0
; CODEGEN-NEXT:   br label %polly.preload.cond
;
; CODEGEN:      polly.preload.cond:
; CODEGEN-NEXT:   br i1 %1, label %polly.preload.exec, label %polly.preload.merge
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @f(i32* %I0, i32* %I1, i32* %V) {
entry:
  br label %for.cond

for.cond:                                         ; preds = %for.inc, %entry
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
  %exitcond = icmp ne i64 %indvars.iv, 1000
  br i1 %exitcond, label %for.body, label %for.end

for.body:                                         ; preds = %for.cond
  %loadI1a = load i32, i32* %I0, align 4
  %arrayidx = getelementptr inbounds i32, i32* %V, i64 %indvars.iv
  %loadI1a1 = load i32, i32* %arrayidx, align 4
  %add = add nsw i32 %loadI1a1, %loadI1a
  store i32 %add, i32* %arrayidx, align 4
  %loadI0 = load i32, i32* %I0, align 4
  %loadI0ext = zext i32 %loadI0 to i64
  %cmp1 = icmp eq i64 %loadI0ext, 0
  br i1 %cmp1, label %if.then, label %if.end

if.then:                                          ; preds = %for.body
  %loadI1b = load i32, i32* %I1, align 4
  %arrayidx4 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv
  %loadI1a4 = load i32, i32* %arrayidx4, align 4
  %add5 = add nsw i32 %loadI1a4, %loadI1b
  store i32 %add5, i32* %arrayidx4, align 4
  br label %if.end

if.end:                                           ; preds = %if.then, %for.body
  br label %for.inc

for.inc:                                          ; preds = %if.end
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  br label %for.cond

for.end:                                          ; preds = %for.cond
  ret void
}
[tests] Force invariant load hoisting for test cases that need it This will make it easier to switch the default of Polly's invariant load hoisting strategy and also makes it very clear that these test cases indeed require invariant code hoisting to work. llvm-svn: 278667 2016-08-15 21:27:49 +08:00			`; RUN: opt %loadPolly -polly-scops -polly-invariant-load-hoisting=true -analyze < %s \| FileCheck %s`
			`; RUN: opt %loadPolly -polly-codegen -polly-invariant-load-hoisting=true -S < %s \| FileCheck %s --check-prefix=CODEGEN`
[FIX] Restructure invariant load equivalence classes Sorting is replaced by a demand driven code generation that will pre-load a value when it is needed or, if it was not needed before, at some point determined by the order of invariant accesses in the program. Only in very little cases this demand driven pre-loading will kick in, though it will prevent us from generating faulty code. An example where it is needed is shown in: test/ScopInfo/invariant_loads_complicated_dependences.ll Invariant loads that appear in parameters but are not on the top-level (e.g., the parameter is not a SCEVUnknown) will now be treated correctly. Differential Revision: http://reviews.llvm.org/D13831 llvm-svn: 250655 2015-10-18 20:39:19 +08:00			`;`
			`; void f(int I0, int I1, int *V) {`
			`; for (int i = 0; i < 1000; i++) {`
			`; if ((long)(*I0) == 0)`
			`; V[i] += *I1;`
			`; }`
			`; }`
			`;`
Model zext-extend instructions A zero-extended value can be interpreted as a piecewise defined signed value. If the value was non-negative it stays the same, otherwise it is the sum of the original value and 2^n where n is the bit-width of the original (or operand) type. Examples: zext i8 127 to i32 -> { [127] } zext i8 -1 to i32 -> { [256 + (-1)] } = { [255] } zext i8 %v to i32 -> [v] -> { [v] \| v >= 0; [256 + v] \| v < 0 } However, LLVM/Scalar Evolution uses zero-extend (potentially lead by a truncate) to represent some forms of modulo computation. The left-hand side of the condition in the code below would result in the SCEV "zext i1 <false, +, true>for.body" which is just another description of the C expression "i & 1 != 0" or, equivalently, "i % 2 != 0". for (i = 0; i < N; i++) if (i & 1 != 0 /* == i % 2 /) / do something / If we do not make the modulo explicit but only use the mechanism described above we will get the very restrictive assumption "N < 3", because for all values of N >= 3 the SCEVAddRecExpr operand of the zero-extend would wrap. Alternatively, we can make the modulo in the operand explicit in the resulting piecewise function and thereby avoid the assumption on N. For the example this would result in the following piecewise affine function: { [i0] -> [(1)] : 2floor((-1 + i0)/2) = -1 + i0; [i0] -> [(0)] : 2*floor((i0)/2) = i0 } To this end we can first determine if the (immediate) operand of the zero-extend can wrap and, in case it might, we will use explicit modulo semantic to compute the result instead of emitting non-wrapping assumptions. Note that operands with large bit-widths are less likely to be negative because it would result in a very large access offset or loop bound after the zero-extend. To this end one can optimistically assume the operand to be positive and avoid the piecewise definition if the bit-width is bigger than some threshold (here MaxZextSmallBitWidth). We choose to go with a hybrid solution of all modeling techniques described above. For small bit-widths (up to MaxZextSmallBitWidth) we will model the wrapping explicitly and use a piecewise defined function. However, if the bit-width is bigger than MaxZextSmallBitWidth we will employ overflow assumptions and assume the "former negative" piece will not exist. llvm-svn: 267408 2016-04-25 22:01:36 +08:00			`; CHECK: Assumed Context:`
			`; CHECK-NEXT: [loadI0] -> { : }`
			`; CHECK-NEXT: Invalid Context:`
			`; CHECK-NEXT: [loadI0] -> { : loadI0 < 0 }`
[FIX] Restructure invariant load equivalence classes Sorting is replaced by a demand driven code generation that will pre-load a value when it is needed or, if it was not needed before, at some point determined by the order of invariant accesses in the program. Only in very little cases this demand driven pre-loading will kick in, though it will prevent us from generating faulty code. An example where it is needed is shown in: test/ScopInfo/invariant_loads_complicated_dependences.ll Invariant loads that appear in parameters but are not on the top-level (e.g., the parameter is not a SCEVUnknown) will now be treated correctly. Differential Revision: http://reviews.llvm.org/D13831 llvm-svn: 250655 2015-10-18 20:39:19 +08:00			`;`
Model zext-extend instructions A zero-extended value can be interpreted as a piecewise defined signed value. If the value was non-negative it stays the same, otherwise it is the sum of the original value and 2^n where n is the bit-width of the original (or operand) type. Examples: zext i8 127 to i32 -> { [127] } zext i8 -1 to i32 -> { [256 + (-1)] } = { [255] } zext i8 %v to i32 -> [v] -> { [v] \| v >= 0; [256 + v] \| v < 0 } However, LLVM/Scalar Evolution uses zero-extend (potentially lead by a truncate) to represent some forms of modulo computation. The left-hand side of the condition in the code below would result in the SCEV "zext i1 <false, +, true>for.body" which is just another description of the C expression "i & 1 != 0" or, equivalently, "i % 2 != 0". for (i = 0; i < N; i++) if (i & 1 != 0 /* == i % 2 /) / do something / If we do not make the modulo explicit but only use the mechanism described above we will get the very restrictive assumption "N < 3", because for all values of N >= 3 the SCEVAddRecExpr operand of the zero-extend would wrap. Alternatively, we can make the modulo in the operand explicit in the resulting piecewise function and thereby avoid the assumption on N. For the example this would result in the following piecewise affine function: { [i0] -> [(1)] : 2floor((-1 + i0)/2) = -1 + i0; [i0] -> [(0)] : 2*floor((i0)/2) = i0 } To this end we can first determine if the (immediate) operand of the zero-extend can wrap and, in case it might, we will use explicit modulo semantic to compute the result instead of emitting non-wrapping assumptions. Note that operands with large bit-widths are less likely to be negative because it would result in a very large access offset or loop bound after the zero-extend. To this end one can optimistically assume the operand to be positive and avoid the piecewise definition if the bit-width is bigger than some threshold (here MaxZextSmallBitWidth). We choose to go with a hybrid solution of all modeling techniques described above. For small bit-widths (up to MaxZextSmallBitWidth) we will model the wrapping explicitly and use a piecewise defined function. However, if the bit-width is bigger than MaxZextSmallBitWidth we will employ overflow assumptions and assume the "former negative" piece will not exist. llvm-svn: 267408 2016-04-25 22:01:36 +08:00			`; CHECK: p0: %loadI0`
			`;`
			`; CHECK: Stmt_if_then`
			`; CHECK-NEXT: Domain :=`
			`; CHECK-NEXT: [loadI0] -> { Stmt_if_then[i0] : loadI0 = 0 and 0 <= i0 <= 999 };`
[FIX] Restructure invariant load equivalence classes Sorting is replaced by a demand driven code generation that will pre-load a value when it is needed or, if it was not needed before, at some point determined by the order of invariant accesses in the program. Only in very little cases this demand driven pre-loading will kick in, though it will prevent us from generating faulty code. An example where it is needed is shown in: test/ScopInfo/invariant_loads_complicated_dependences.ll Invariant loads that appear in parameters but are not on the top-level (e.g., the parameter is not a SCEVUnknown) will now be treated correctly. Differential Revision: http://reviews.llvm.org/D13831 llvm-svn: 250655 2015-10-18 20:39:19 +08:00			`;`
			`; CODEGEN: polly.preload.begin:`
This reverts recent expression type changes The recent expression type changes still need more discussion, which will happen on phabricator or on the mailing list. The precise list of commits reverted are: - "Refactor division generation code" - "[NFC] Generate runtime checks after the SCoP" - "[FIX] Determine insertion point during SCEV expansion" - "Look through IntToPtr & PtrToInt instructions" - "Use minimal types for generated expressions" - "Temporarily promote values to i64 again" - "[NFC] Avoid unnecessary comparison for min/max expressions" - "[Polly] Fix -Wunused-variable warnings (NFC)" - "[NFC] Simplify min/max expression generation" - "Simplify the type adjustment in the IslExprBuilder" Some of them are just reverted as we would otherwise get conflicts. I will try to re-commit them if possible. llvm-svn: 272483 2016-06-12 03:17:15 +08:00			`; CODEGEN-NEXT: %polly.access.I0 = getelementptr i32, i32* %I0, i64 0`
[FIX] Restructure invariant load equivalence classes Sorting is replaced by a demand driven code generation that will pre-load a value when it is needed or, if it was not needed before, at some point determined by the order of invariant accesses in the program. Only in very little cases this demand driven pre-loading will kick in, though it will prevent us from generating faulty code. An example where it is needed is shown in: test/ScopInfo/invariant_loads_complicated_dependences.ll Invariant loads that appear in parameters but are not on the top-level (e.g., the parameter is not a SCEVUnknown) will now be treated correctly. Differential Revision: http://reviews.llvm.org/D13831 llvm-svn: 250655 2015-10-18 20:39:19 +08:00			`; CODEGEN-NEXT: %polly.access.I0.load = load i32, i32* %polly.access.I0`
[ScopBuilder] Build invariant loads separately. Create the MemoryAccesses of invariant loads separately and before all other MemoryAccesses. Invariant loads are classified as synthesizable and therefore are not contained in any statement. When iterating over all instructions of all statements, the invariant loads are consequently not processed and iterating over them separately becomes necessary. This patch can change the order in which MemoryAccesses are created, but otherwise has no functional change. Some temporary code is introduced to ensure correctness, but will be removed in the next commit. llvm-svn: 314664 2017-10-02 19:41:27 +08:00			`; CODEGEN-NEXT: store i32 %polly.access.I0.load, i32* %loadI1a.preload.s2a`
This reverts recent expression type changes The recent expression type changes still need more discussion, which will happen on phabricator or on the mailing list. The precise list of commits reverted are: - "Refactor division generation code" - "[NFC] Generate runtime checks after the SCoP" - "[FIX] Determine insertion point during SCEV expansion" - "Look through IntToPtr & PtrToInt instructions" - "Use minimal types for generated expressions" - "Temporarily promote values to i64 again" - "[NFC] Avoid unnecessary comparison for min/max expressions" - "[Polly] Fix -Wunused-variable warnings (NFC)" - "[NFC] Simplify min/max expression generation" - "Simplify the type adjustment in the IslExprBuilder" Some of them are just reverted as we would otherwise get conflicts. I will try to re-commit them if possible. llvm-svn: 272483 2016-06-12 03:17:15 +08:00			`; CODEGEN-NEXT: %0 = sext i32 %polly.access.I0.load to i64`
			`; CODEGEN-NEXT: %1 = icmp eq i64 %0, 0`
[FIX] Restructure invariant load equivalence classes Sorting is replaced by a demand driven code generation that will pre-load a value when it is needed or, if it was not needed before, at some point determined by the order of invariant accesses in the program. Only in very little cases this demand driven pre-loading will kick in, though it will prevent us from generating faulty code. An example where it is needed is shown in: test/ScopInfo/invariant_loads_complicated_dependences.ll Invariant loads that appear in parameters but are not on the top-level (e.g., the parameter is not a SCEVUnknown) will now be treated correctly. Differential Revision: http://reviews.llvm.org/D13831 llvm-svn: 250655 2015-10-18 20:39:19 +08:00			`; CODEGEN-NEXT: br label %polly.preload.cond`
			`;`
			`; CODEGEN: polly.preload.cond:`
This reverts recent expression type changes The recent expression type changes still need more discussion, which will happen on phabricator or on the mailing list. The precise list of commits reverted are: - "Refactor division generation code" - "[NFC] Generate runtime checks after the SCoP" - "[FIX] Determine insertion point during SCEV expansion" - "Look through IntToPtr & PtrToInt instructions" - "Use minimal types for generated expressions" - "Temporarily promote values to i64 again" - "[NFC] Avoid unnecessary comparison for min/max expressions" - "[Polly] Fix -Wunused-variable warnings (NFC)" - "[NFC] Simplify min/max expression generation" - "Simplify the type adjustment in the IslExprBuilder" Some of them are just reverted as we would otherwise get conflicts. I will try to re-commit them if possible. llvm-svn: 272483 2016-06-12 03:17:15 +08:00			`; CODEGEN-NEXT: br i1 %1, label %polly.preload.exec, label %polly.preload.merge`
[FIX] Restructure invariant load equivalence classes Sorting is replaced by a demand driven code generation that will pre-load a value when it is needed or, if it was not needed before, at some point determined by the order of invariant accesses in the program. Only in very little cases this demand driven pre-loading will kick in, though it will prevent us from generating faulty code. An example where it is needed is shown in: test/ScopInfo/invariant_loads_complicated_dependences.ll Invariant loads that appear in parameters but are not on the top-level (e.g., the parameter is not a SCEVUnknown) will now be treated correctly. Differential Revision: http://reviews.llvm.org/D13831 llvm-svn: 250655 2015-10-18 20:39:19 +08:00			`;`
			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`

			`define void @f(i32* %I0, i32* %I1, i32* %V) {`
			`entry:`
			`br label %for.cond`

			`for.cond: ; preds = %for.inc, %entry`
			`%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]`
			`%exitcond = icmp ne i64 %indvars.iv, 1000`
			`br i1 %exitcond, label %for.body, label %for.end`

			`for.body: ; preds = %for.cond`
			`%loadI1a = load i32, i32* %I0, align 4`
			`%arrayidx = getelementptr inbounds i32, i32* %V, i64 %indvars.iv`
			`%loadI1a1 = load i32, i32* %arrayidx, align 4`
			`%add = add nsw i32 %loadI1a1, %loadI1a`
			`store i32 %add, i32* %arrayidx, align 4`
			`%loadI0 = load i32, i32* %I0, align 4`
			`%loadI0ext = zext i32 %loadI0 to i64`
			`%cmp1 = icmp eq i64 %loadI0ext, 0`
			`br i1 %cmp1, label %if.then, label %if.end`

			`if.then: ; preds = %for.body`
			`%loadI1b = load i32, i32* %I1, align 4`
			`%arrayidx4 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv`
			`%loadI1a4 = load i32, i32* %arrayidx4, align 4`
			`%add5 = add nsw i32 %loadI1a4, %loadI1b`
			`store i32 %add5, i32* %arrayidx4, align 4`
			`br label %if.end`

			`if.end: ; preds = %if.then, %for.body`
			`br label %for.inc`

			`for.inc: ; preds = %if.end`
			`%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1`
			`br label %for.cond`

			`for.end: ; preds = %for.cond`
			`ret void`
			`}`