llvm-project/llvm/test/Transforms/GVN/pre-load.ll

; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

define i32 @test1(i32* %p, i1 %C) {
; CHECK: @test1
block1:
	br i1 %C, label %block2, label %block3

block2:
 br label %block4
; CHECK: block2:
; CHECK-NEXT: load i32* %p

block3:
  store i32 0, i32* %p
  br label %block4

block4:
  %PRE = load i32* %p
  ret i32 %PRE
; CHECK: block4:
; CHECK-NEXT: phi i32
; CHECK-NEXT: ret i32
}

; This is a simple phi translation case.
define i32 @test2(i32* %p, i32* %q, i1 %C) {
; CHECK: @test2
block1:
	br i1 %C, label %block2, label %block3

block2:
 br label %block4
; CHECK: block2:
; CHECK-NEXT: load i32* %q

block3:
  store i32 0, i32* %p
  br label %block4

block4:
  %P2 = phi i32* [%p, %block3], [%q, %block2]
  %PRE = load i32* %P2
  ret i32 %PRE
; CHECK: block4:
; CHECK-NEXT: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}

; This is a PRE case that requires phi translation through a GEP.
define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
; CHECK: @test3
block1:
  %B = getelementptr i32* %q, i32 1
  store i32* %B, i32** %Hack
	br i1 %C, label %block2, label %block3

block2:
 br label %block4
; CHECK: block2:
; CHECK-NEXT: load i32* %B

block3:
  %A = getelementptr i32* %p, i32 1
  store i32 0, i32* %A
  br label %block4

block4:
  %P2 = phi i32* [%p, %block3], [%q, %block2]
  %P3 = getelementptr i32* %P2, i32 1
  %PRE = load i32* %P3
  ret i32 %PRE
; CHECK: block4:
; CHECK-NEXT: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}

;; Here the loaded address is available, but the computation is in 'block3'
;; which does not dominate 'block2'.
define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
; CHECK: @test4
block1:
	br i1 %C, label %block2, label %block3

block2:
 br label %block4
; CHECK: block2:
; CHECK:   load i32*
; CHECK:   br label %block4

block3:
  %B = getelementptr i32* %q, i32 1
  store i32* %B, i32** %Hack

  %A = getelementptr i32* %p, i32 1
  store i32 0, i32* %A
  br label %block4

block4:
  %P2 = phi i32* [%p, %block3], [%q, %block2]
  %P3 = getelementptr i32* %P2, i32 1
  %PRE = load i32* %P3
  ret i32 %PRE
; CHECK: block4:
; CHECK-NEXT: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}

;void test5(int N, double *G) {
;  int j;
;  for (j = 0; j < N - 1; j++)
;    G[j] = G[j] + G[j+1];
;}

define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
; CHECK: @test5
entry:
  %0 = add i32 %N, -1           
  %1 = icmp sgt i32 %0, 0       
  br i1 %1, label %bb.nph, label %return

bb.nph:                         
  %tmp = zext i32 %0 to i64     
  br label %bb

; CHECK: bb.nph:
; CHECK: load double*
; CHECK: br label %bb

bb:             
  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
  %tmp6 = add i64 %indvar, 1                    
  %scevgep = getelementptr double* %G, i64 %tmp6
  %scevgep7 = getelementptr double* %G, i64 %indvar
  %2 = load double* %scevgep7, align 8
  %3 = load double* %scevgep, align 8 
  %4 = fadd double %2, %3             
  store double %4, double* %scevgep7, align 8
  %exitcond = icmp eq i64 %tmp6, %tmp 
  br i1 %exitcond, label %return, label %bb

; Should only be one load in the loop.
; CHECK: bb:
; CHECK: load double*
; CHECK-NOT: load double*
; CHECK: br i1 %exitcond

return:                               
  ret void
}

;void test6(int N, double *G) {
;  int j;
;  for (j = 0; j < N - 1; j++)
;    G[j+1] = G[j] + G[j+1];
;}

define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
; CHECK: @test6
entry:
  %0 = add i32 %N, -1           
  %1 = icmp sgt i32 %0, 0       
  br i1 %1, label %bb.nph, label %return

bb.nph:                         
  %tmp = zext i32 %0 to i64     
  br label %bb

; CHECK: bb.nph:
; CHECK: load double*
; CHECK: br label %bb

bb:             
  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
  %tmp6 = add i64 %indvar, 1                    
  %scevgep = getelementptr double* %G, i64 %tmp6
  %scevgep7 = getelementptr double* %G, i64 %indvar
  %2 = load double* %scevgep7, align 8
  %3 = load double* %scevgep, align 8 
  %4 = fadd double %2, %3             
  store double %4, double* %scevgep, align 8
  %exitcond = icmp eq i64 %tmp6, %tmp 
  br i1 %exitcond, label %return, label %bb

; Should only be one load in the loop.
; CHECK: bb:
; CHECK: load double*
; CHECK-NOT: load double*
; CHECK: br i1 %exitcond

return:                               
  ret void
}

;void test7(int N, double* G) {
;  long j;
;  G[1] = 1;
;  for (j = 1; j < N - 1; j++)
;      G[j+1] = G[j] + G[j+1];
;}

; This requires phi translation of the adds.
define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
entry:
  %0 = getelementptr inbounds double* %G, i64 1   
  store double 1.000000e+00, double* %0, align 8
  %1 = add i32 %N, -1                             
  %2 = icmp sgt i32 %1, 1                         
  br i1 %2, label %bb.nph, label %return

bb.nph:                                           
  %tmp = sext i32 %1 to i64                       
  %tmp7 = add i64 %tmp, -1                        
  br label %bb

bb:                                               
  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
  %tmp8 = add i64 %indvar, 2                      
  %scevgep = getelementptr double* %G, i64 %tmp8  
  %tmp9 = add i64 %indvar, 1                      
  %scevgep10 = getelementptr double* %G, i64 %tmp9 
  %3 = load double* %scevgep10, align 8           
  %4 = load double* %scevgep, align 8             
  %5 = fadd double %3, %4                         
  store double %5, double* %scevgep, align 8
  %exitcond = icmp eq i64 %tmp9, %tmp7            
  br i1 %exitcond, label %return, label %bb

; Should only be one load in the loop.
; CHECK: bb:
; CHECK: load double*
; CHECK-NOT: load double*
; CHECK: br i1 %exitcond

return:                                           
  ret void
}

;; Here the loaded address isn't available in 'block2' at all, requiring a new
;; GEP to be inserted into it.
define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
; CHECK: @test8
block1:
	br i1 %C, label %block2, label %block3

block2:
 br label %block4
; CHECK: block2:
; CHECK:   load i32*
; CHECK:   br label %block4

block3:
  %A = getelementptr i32* %p, i32 1
  store i32 0, i32* %A
  br label %block4

block4:
  %P2 = phi i32* [%p, %block3], [%q, %block2]
  %P3 = getelementptr i32* %P2, i32 1
  %PRE = load i32* %P3
  ret i32 %PRE
; CHECK: block4:
; CHECK-NEXT: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}

;void test9(int N, double* G) {
;  long j;
;  for (j = 1; j < N - 1; j++)
;      G[j+1] = G[j] + G[j+1];
;}

; This requires phi translation of the adds.
define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
entry:
  add i32 0, 0
  %1 = add i32 %N, -1                             
  %2 = icmp sgt i32 %1, 1                         
  br i1 %2, label %bb.nph, label %return

bb.nph:                                           
  %tmp = sext i32 %1 to i64                       
  %tmp7 = add i64 %tmp, -1                        
  br label %bb

; CHECK: bb.nph:
; CHECK:   load double*
; CHECK:   br label %bb

bb:                                               
  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
  %tmp8 = add i64 %indvar, 2                      
  %scevgep = getelementptr double* %G, i64 %tmp8  
  %tmp9 = add i64 %indvar, 1                      
  %scevgep10 = getelementptr double* %G, i64 %tmp9 
  %3 = load double* %scevgep10, align 8           
  %4 = load double* %scevgep, align 8             
  %5 = fadd double %3, %4                         
  store double %5, double* %scevgep, align 8
  %exitcond = icmp eq i64 %tmp9, %tmp7            
  br i1 %exitcond, label %return, label %bb

; Should only be one load in the loop.
; CHECK: bb:
; CHECK: load double*
; CHECK-NOT: load double*
; CHECK: br i1 %exitcond

return:                                           
  ret void
}

;void test10(int N, double* G) {
;  long j;
;  for (j = 1; j < N - 1; j++)
;      G[j] = G[j] + G[j+1] + G[j-1];
;}

; PR5501
define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
entry:
  %0 = add i32 %N, -1
  %1 = icmp sgt i32 %0, 1
  br i1 %1, label %bb.nph, label %return

bb.nph:
  %tmp = sext i32 %0 to i64
  %tmp8 = add i64 %tmp, -1
  br label %bb
; CHECK: bb.nph:
; CHECK:   load double*
; CHECK:   load double*
; CHECK:   br label %bb


bb:
  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
  %scevgep = getelementptr double* %G, i64 %indvar
  %tmp9 = add i64 %indvar, 2
  %scevgep10 = getelementptr double* %G, i64 %tmp9
  %tmp11 = add i64 %indvar, 1
  %scevgep12 = getelementptr double* %G, i64 %tmp11
  %2 = load double* %scevgep12, align 8
  %3 = load double* %scevgep10, align 8
  %4 = fadd double %2, %3
  %5 = load double* %scevgep, align 8
  %6 = fadd double %4, %5
  store double %6, double* %scevgep12, align 8
  %exitcond = icmp eq i64 %tmp11, %tmp8
  br i1 %exitcond, label %return, label %bb

; Should only be one load in the loop.
; CHECK: bb:
; CHECK: load double*
; CHECK-NOT: load double*
; CHECK: br i1 %exitcond

return:
  ret void
}

; Test critical edge splitting.
define i32 @test11(i32* %p, i1 %C, i32 %N) {
; CHECK: @test11
block1:
        br i1 %C, label %block2, label %block3

block2:
 %cond = icmp sgt i32 %N, 1
 br i1 %cond, label %block4, label %block5
; CHECK: load i32* %p
; CHECK-NEXT: br label %block4

block3:
  store i32 0, i32* %p
  br label %block4

block4:
  %PRE = load i32* %p
  br label %block5

block5:
  %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
  ret i32 %ret
; CHECK: block4:
; CHECK-NEXT: phi i32
}
Make BasicAliasAnalysis a normal AliasAnalysis implementation which does normal initialization and normal chaining. Change the default AliasAnalysis implementation to NoAlias. Update StandardCompileOpts.h and friends to explicitly request BasicAliasAnalysis. Update tests to explicitly request -basicaa. llvm-svn: 116720 2010-10-19 02:04:47 +08:00			`; RUN: opt < %s -basicaa -gvn -enable-load-pre -S \| FileCheck %s`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00			`target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"`
Implement PRE of loads in the GVN pass with a pretty cheap and straight-forward implementation. This does not require any extra alias analysis queries beyond what we already do for non-local loads. Some programs really really like load PRE. For example, SPASS triggers this ~1000 times, ~300 times in 255.vortex, and ~1500 times on 403.gcc. The biggest limitation to the implementation is that it does not split critical edges. This is a huge killer on many programs and should be addressed after the initial patch is enabled by default. The implementation of this should incidentally speed up rejection of non-local loads because it avoids creating the repl densemap in cases when it won't be used for fully redundant loads. This is currently disabled by default. Before I turn this on, I need to fix a couple of miscompilations in the testsuite, look at compile time performance numbers, and look at perf impact. This is pretty close to ready though. llvm-svn: 60408 2008-12-02 16:16:11 +08:00
filecheckize llvm-svn: 89994 2009-11-27 14:33:09 +08:00			`define i32 @test1(i32* %p, i1 %C) {`
			`; CHECK: @test1`
Implement PRE of loads in the GVN pass with a pretty cheap and straight-forward implementation. This does not require any extra alias analysis queries beyond what we already do for non-local loads. Some programs really really like load PRE. For example, SPASS triggers this ~1000 times, ~300 times in 255.vortex, and ~1500 times on 403.gcc. The biggest limitation to the implementation is that it does not split critical edges. This is a huge killer on many programs and should be addressed after the initial patch is enabled by default. The implementation of this should incidentally speed up rejection of non-local loads because it avoids creating the repl densemap in cases when it won't be used for fully redundant loads. This is currently disabled by default. Before I turn this on, I need to fix a couple of miscompilations in the testsuite, look at compile time performance numbers, and look at perf impact. This is pretty close to ready though. llvm-svn: 60408 2008-12-02 16:16:11 +08:00			`block1:`
			`br i1 %C, label %block2, label %block3`

			`block2:`
			`br label %block4`
filecheckize llvm-svn: 89994 2009-11-27 14:33:09 +08:00			`; CHECK: block2:`
			`; CHECK-NEXT: load i32* %p`
Implement PRE of loads in the GVN pass with a pretty cheap and straight-forward implementation. This does not require any extra alias analysis queries beyond what we already do for non-local loads. Some programs really really like load PRE. For example, SPASS triggers this ~1000 times, ~300 times in 255.vortex, and ~1500 times on 403.gcc. The biggest limitation to the implementation is that it does not split critical edges. This is a huge killer on many programs and should be addressed after the initial patch is enabled by default. The implementation of this should incidentally speed up rejection of non-local loads because it avoids creating the repl densemap in cases when it won't be used for fully redundant loads. This is currently disabled by default. Before I turn this on, I need to fix a couple of miscompilations in the testsuite, look at compile time performance numbers, and look at perf impact. This is pretty close to ready though. llvm-svn: 60408 2008-12-02 16:16:11 +08:00
			`block3:`
add some tests for memdep phi translation + PRE. llvm-svn: 89996 2009-11-27 14:42:42 +08:00			`store i32 0, i32* %p`
Implement PRE of loads in the GVN pass with a pretty cheap and straight-forward implementation. This does not require any extra alias analysis queries beyond what we already do for non-local loads. Some programs really really like load PRE. For example, SPASS triggers this ~1000 times, ~300 times in 255.vortex, and ~1500 times on 403.gcc. The biggest limitation to the implementation is that it does not split critical edges. This is a huge killer on many programs and should be addressed after the initial patch is enabled by default. The implementation of this should incidentally speed up rejection of non-local loads because it avoids creating the repl densemap in cases when it won't be used for fully redundant loads. This is currently disabled by default. Before I turn this on, I need to fix a couple of miscompilations in the testsuite, look at compile time performance numbers, and look at perf impact. This is pretty close to ready though. llvm-svn: 60408 2008-12-02 16:16:11 +08:00			`br label %block4`

			`block4:`
			`%PRE = load i32* %p`
			`ret i32 %PRE`
filecheckize llvm-svn: 89994 2009-11-27 14:33:09 +08:00			`; CHECK: block4:`
			`; CHECK-NEXT: phi i32`
			`; CHECK-NEXT: ret i32`
Implement PRE of loads in the GVN pass with a pretty cheap and straight-forward implementation. This does not require any extra alias analysis queries beyond what we already do for non-local loads. Some programs really really like load PRE. For example, SPASS triggers this ~1000 times, ~300 times in 255.vortex, and ~1500 times on 403.gcc. The biggest limitation to the implementation is that it does not split critical edges. This is a huge killer on many programs and should be addressed after the initial patch is enabled by default. The implementation of this should incidentally speed up rejection of non-local loads because it avoids creating the repl densemap in cases when it won't be used for fully redundant loads. This is currently disabled by default. Before I turn this on, I need to fix a couple of miscompilations in the testsuite, look at compile time performance numbers, and look at perf impact. This is pretty close to ready though. llvm-svn: 60408 2008-12-02 16:16:11 +08:00			`}`
add some tests for memdep phi translation + PRE. llvm-svn: 89996 2009-11-27 14:42:42 +08:00
teach GVN's load PRE to insert computations of the address in predecessors where it is not available. It's unclear how to get this inserted computation into GVN's scalar availability sets, Owen, help? :) llvm-svn: 89997 2009-11-27 16:25:10 +08:00			`; This is a simple phi translation case.`
add some tests for memdep phi translation + PRE. llvm-svn: 89996 2009-11-27 14:42:42 +08:00			`define i32 @test2(i32* %p, i32* %q, i1 %C) {`
			`; CHECK: @test2`
			`block1:`
			`br i1 %C, label %block2, label %block3`

			`block2:`
			`br label %block4`
			`; CHECK: block2:`
			`; CHECK-NEXT: load i32* %q`

			`block3:`
			`store i32 0, i32* %p`
			`br label %block4`

			`block4:`
			`%P2 = phi i32* [%p, %block3], [%q, %block2]`
			`%PRE = load i32* %P2`
			`ret i32 %PRE`
			`; CHECK: block4:`
			`; CHECK-NEXT: phi i32 [`
			`; CHECK-NOT: load`
			`; CHECK: ret i32`
			`}`

teach GVN's load PRE to insert computations of the address in predecessors where it is not available. It's unclear how to get this inserted computation into GVN's scalar availability sets, Owen, help? :) llvm-svn: 89997 2009-11-27 16:25:10 +08:00			`; This is a PRE case that requires phi translation through a GEP.`
add some tests for memdep phi translation + PRE. llvm-svn: 89996 2009-11-27 14:42:42 +08:00			`define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {`
			`; CHECK: @test3`
			`block1:`
			`%B = getelementptr i32* %q, i32 1`
			`store i32* %B, i32** %Hack`
			`br i1 %C, label %block2, label %block3`

			`block2:`
			`br label %block4`
			`; CHECK: block2:`
			`; CHECK-NEXT: load i32* %B`

			`block3:`
			`%A = getelementptr i32* %p, i32 1`
			`store i32 0, i32* %A`
			`br label %block4`

			`block4:`
			`%P2 = phi i32* [%p, %block3], [%q, %block2]`
			`%P3 = getelementptr i32* %P2, i32 1`
			`%PRE = load i32* %P3`
			`ret i32 %PRE`
			`; CHECK: block4:`
			`; CHECK-NEXT: phi i32 [`
			`; CHECK-NOT: load`
			`; CHECK: ret i32`
			`}`
teach GVN's load PRE to insert computations of the address in predecessors where it is not available. It's unclear how to get this inserted computation into GVN's scalar availability sets, Owen, help? :) llvm-svn: 89997 2009-11-27 16:25:10 +08:00
			`;; Here the loaded address is available, but the computation is in 'block3'`
			`;; which does not dominate 'block2'.`
			`define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {`
			`; CHECK: @test4`
			`block1:`
			`br i1 %C, label %block2, label %block3`

			`block2:`
			`br label %block4`
reenable load address insertion in load pre. This allows us to handle cases like this: void test(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j+1] = G[j] + G[j+1]; } where G[1] isn't live into the loop. llvm-svn: 90041 2009-11-29 00:08:18 +08:00			`; CHECK: block2:`
			`; CHECK: load i32*`
			`; CHECK: br label %block4`
teach GVN's load PRE to insert computations of the address in predecessors where it is not available. It's unclear how to get this inserted computation into GVN's scalar availability sets, Owen, help? :) llvm-svn: 89997 2009-11-27 16:25:10 +08:00
			`block3:`
			`%B = getelementptr i32* %q, i32 1`
			`store i32* %B, i32** %Hack`

			`%A = getelementptr i32* %p, i32 1`
			`store i32 0, i32* %A`
			`br label %block4`

			`block4:`
			`%P2 = phi i32* [%p, %block3], [%q, %block2]`
			`%P3 = getelementptr i32* %P2, i32 1`
			`%PRE = load i32* %P3`
			`ret i32 %PRE`
reenable load address insertion in load pre. This allows us to handle cases like this: void test(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j+1] = G[j] + G[j+1]; } where G[1] isn't live into the loop. llvm-svn: 90041 2009-11-29 00:08:18 +08:00			`; CHECK: block4:`
			`; CHECK-NEXT: phi i32 [`
			`; CHECK-NOT: load`
			`; CHECK: ret i32`
teach GVN's load PRE to insert computations of the address in predecessors where it is not available. It's unclear how to get this inserted computation into GVN's scalar availability sets, Owen, help? :) llvm-svn: 89997 2009-11-27 16:25:10 +08:00			`}`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00
			`;void test5(int N, double *G) {`
			`; int j;`
			`; for (j = 0; j < N - 1; j++)`
			`; G[j] = G[j] + G[j+1];`
			`;}`

			`define void @test5(i32 %N, double* nocapture %G) nounwind ssp {`
			`; CHECK: @test5`
			`entry:`
			`%0 = add i32 %N, -1`
			`%1 = icmp sgt i32 %0, 0`
			`br i1 %1, label %bb.nph, label %return`

			`bb.nph:`
			`%tmp = zext i32 %0 to i64`
			`br label %bb`

			`; CHECK: bb.nph:`
			`; CHECK: load double*`
			`; CHECK: br label %bb`

			`bb:`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]`
			`%tmp6 = add i64 %indvar, 1`
			`%scevgep = getelementptr double* %G, i64 %tmp6`
			`%scevgep7 = getelementptr double* %G, i64 %indvar`
			`%2 = load double* %scevgep7, align 8`
			`%3 = load double* %scevgep, align 8`
			`%4 = fadd double %2, %3`
			`store double %4, double* %scevgep7, align 8`
			`%exitcond = icmp eq i64 %tmp6, %tmp`
			`br i1 %exitcond, label %return, label %bb`

			`; Should only be one load in the loop.`
			`; CHECK: bb:`
			`; CHECK: load double*`
			`; CHECK-NOT: load double*`
			`; CHECK: br i1 %exitcond`

			`return:`
			`ret void`
			`}`

			`;void test6(int N, double *G) {`
			`; int j;`
			`; for (j = 0; j < N - 1; j++)`
			`; G[j+1] = G[j] + G[j+1];`
			`;}`

			`define void @test6(i32 %N, double* nocapture %G) nounwind ssp {`
			`; CHECK: @test6`
			`entry:`
			`%0 = add i32 %N, -1`
			`%1 = icmp sgt i32 %0, 0`
			`br i1 %1, label %bb.nph, label %return`

			`bb.nph:`
			`%tmp = zext i32 %0 to i64`
			`br label %bb`

			`; CHECK: bb.nph:`
			`; CHECK: load double*`
			`; CHECK: br label %bb`

			`bb:`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]`
			`%tmp6 = add i64 %indvar, 1`
			`%scevgep = getelementptr double* %G, i64 %tmp6`
			`%scevgep7 = getelementptr double* %G, i64 %indvar`
			`%2 = load double* %scevgep7, align 8`
			`%3 = load double* %scevgep, align 8`
			`%4 = fadd double %2, %3`
			`store double %4, double* %scevgep, align 8`
			`%exitcond = icmp eq i64 %tmp6, %tmp`
			`br i1 %exitcond, label %return, label %bb`

			`; Should only be one load in the loop.`
			`; CHECK: bb:`
			`; CHECK: load double*`
			`; CHECK-NOT: load double*`
			`; CHECK: br i1 %exitcond`

			`return:`
			`ret void`
			`}`

add support for recursive phi translation and phi translation of add with immediate. This allows us to optimize this function: void test(int N, double* G) { long j; G[1] = 1; for (j = 1; j < N - 1; j++) G[j+1] = G[j] + G[j+1]; } to only do one load every iteration of the loop. llvm-svn: 90013 2009-11-28 03:11:31 +08:00			`;void test7(int N, double* G) {`
			`; long j;`
			`; G[1] = 1;`
			`; for (j = 1; j < N - 1; j++)`
			`; G[j+1] = G[j] + G[j+1];`
			`;}`

			`; This requires phi translation of the adds.`
			`define void @test7(i32 %N, double* nocapture %G) nounwind ssp {`
			`entry:`
			`%0 = getelementptr inbounds double* %G, i64 1`
			`store double 1.000000e+00, double* %0, align 8`
			`%1 = add i32 %N, -1`
			`%2 = icmp sgt i32 %1, 1`
			`br i1 %2, label %bb.nph, label %return`

			`bb.nph:`
			`%tmp = sext i32 %1 to i64`
			`%tmp7 = add i64 %tmp, -1`
			`br label %bb`

			`bb:`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]`
			`%tmp8 = add i64 %indvar, 2`
			`%scevgep = getelementptr double* %G, i64 %tmp8`
			`%tmp9 = add i64 %indvar, 1`
			`%scevgep10 = getelementptr double* %G, i64 %tmp9`
			`%3 = load double* %scevgep10, align 8`
			`%4 = load double* %scevgep, align 8`
			`%5 = fadd double %3, %4`
			`store double %5, double* %scevgep, align 8`
			`%exitcond = icmp eq i64 %tmp9, %tmp7`
			`br i1 %exitcond, label %return, label %bb`

			`; Should only be one load in the loop.`
			`; CHECK: bb:`
			`; CHECK: load double*`
			`; CHECK-NOT: load double*`
			`; CHECK: br i1 %exitcond`

			`return:`
			`ret void`
			`}`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00
disable value insertion for now, I need to figure out how to inform GVN about the newly inserted values. This fixes PR5631. llvm-svn: 90022 2009-11-28 06:50:07 +08:00			`;; Here the loaded address isn't available in 'block2' at all, requiring a new`
			`;; GEP to be inserted into it.`
			`define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {`
			`; CHECK: @test8`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00			`block1:`
			`br i1 %C, label %block2, label %block3`

			`block2:`
			`br label %block4`
reenable load address insertion in load pre. This allows us to handle cases like this: void test(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j+1] = G[j] + G[j+1]; } where G[1] isn't live into the loop. llvm-svn: 90041 2009-11-29 00:08:18 +08:00			`; CHECK: block2:`
			`; CHECK: load i32*`
			`; CHECK: br label %block4`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00
			`block3:`
			`%A = getelementptr i32* %p, i32 1`
			`store i32 0, i32* %A`
			`br label %block4`

			`block4:`
			`%P2 = phi i32* [%p, %block3], [%q, %block2]`
			`%P3 = getelementptr i32* %P2, i32 1`
			`%PRE = load i32* %P3`
			`ret i32 %PRE`
reenable load address insertion in load pre. This allows us to handle cases like this: void test(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j+1] = G[j] + G[j+1]; } where G[1] isn't live into the loop. llvm-svn: 90041 2009-11-29 00:08:18 +08:00			`; CHECK: block4:`
			`; CHECK-NEXT: phi i32 [`
			`; CHECK-NOT: load`
			`; CHECK: ret i32`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00			`}`

add a testcase for void test9(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j+1] = G[j] + G[j+1]; } llvm-svn: 90047 2009-11-29 09:04:40 +08:00			`;void test9(int N, double* G) {`
			`; long j;`
			`; for (j = 1; j < N - 1; j++)`
			`; G[j+1] = G[j] + G[j+1];`
			`;}`

			`; This requires phi translation of the adds.`
			`define void @test9(i32 %N, double* nocapture %G) nounwind ssp {`
			`entry:`
			`add i32 0, 0`
			`%1 = add i32 %N, -1`
			`%2 = icmp sgt i32 %1, 1`
			`br i1 %2, label %bb.nph, label %return`

			`bb.nph:`
			`%tmp = sext i32 %1 to i64`
			`%tmp7 = add i64 %tmp, -1`
			`br label %bb`

			`; CHECK: bb.nph:`
			`; CHECK: load double*`
			`; CHECK: br label %bb`

			`bb:`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]`
			`%tmp8 = add i64 %indvar, 2`
			`%scevgep = getelementptr double* %G, i64 %tmp8`
			`%tmp9 = add i64 %indvar, 1`
			`%scevgep10 = getelementptr double* %G, i64 %tmp9`
			`%3 = load double* %scevgep10, align 8`
			`%4 = load double* %scevgep, align 8`
			`%5 = fadd double %3, %4`
			`store double %5, double* %scevgep, align 8`
			`%exitcond = icmp eq i64 %tmp9, %tmp7`
			`br i1 %exitcond, label %return, label %bb`

			`; Should only be one load in the loop.`
			`; CHECK: bb:`
			`; CHECK: load double*`
			`; CHECK-NOT: load double*`
			`; CHECK: br i1 %exitcond`

			`return:`
			`ret void`
			`}`
add two simple test cases we now optimize (to one load in the loop each) and one we don't (corresponding to the fixme I added yesterday). llvm-svn: 90012 2009-11-28 02:08:30 +08:00
Add a testcase for: void test(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j] = G[j] + G[j+1] + G[j-1]; } which we now compile to one load in the loop: LBB1_2: ## %bb movsd 16(%rsi,%rax,8), %xmm2 incq %rdx addsd %xmm2, %xmm1 addsd %xmm1, %xmm0 movapd %xmm2, %xmm1 movsd %xmm0, 8(%rsi,%rax,8) incq %rax cmpq %rcx, %rax jne LBB1_2 instead of: LBB1_2: ## %bb movsd 8(%rsi,%rax,8), %xmm0 addsd 16(%rsi,%rax,8), %xmm0 addsd (%rsi,%rax,8), %xmm0 movsd %xmm0, 8(%rsi,%rax,8) incq %rax cmpq %rcx, %rax jne LBB1_2 llvm-svn: 90048 2009-11-29 09:15:43 +08:00			`;void test10(int N, double* G) {`
			`; long j;`
			`; for (j = 1; j < N - 1; j++)`
			`; G[j] = G[j] + G[j+1] + G[j-1];`
			`;}`

add PR# llvm-svn: 90049 2009-11-29 09:28:58 +08:00			`; PR5501`
Add a testcase for: void test(int N, double* G) { long j; for (j = 1; j < N - 1; j++) G[j] = G[j] + G[j+1] + G[j-1]; } which we now compile to one load in the loop: LBB1_2: ## %bb movsd 16(%rsi,%rax,8), %xmm2 incq %rdx addsd %xmm2, %xmm1 addsd %xmm1, %xmm0 movapd %xmm2, %xmm1 movsd %xmm0, 8(%rsi,%rax,8) incq %rax cmpq %rcx, %rax jne LBB1_2 instead of: LBB1_2: ## %bb movsd 8(%rsi,%rax,8), %xmm0 addsd 16(%rsi,%rax,8), %xmm0 addsd (%rsi,%rax,8), %xmm0 movsd %xmm0, 8(%rsi,%rax,8) incq %rax cmpq %rcx, %rax jne LBB1_2 llvm-svn: 90048 2009-11-29 09:15:43 +08:00			`define void @test10(i32 %N, double* nocapture %G) nounwind ssp {`
			`entry:`
			`%0 = add i32 %N, -1`
			`%1 = icmp sgt i32 %0, 1`
			`br i1 %1, label %bb.nph, label %return`

			`bb.nph:`
			`%tmp = sext i32 %0 to i64`
			`%tmp8 = add i64 %tmp, -1`
			`br label %bb`
			`; CHECK: bb.nph:`
			`; CHECK: load double*`
			`; CHECK: load double*`
			`; CHECK: br label %bb`


			`bb:`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]`
			`%scevgep = getelementptr double* %G, i64 %indvar`
			`%tmp9 = add i64 %indvar, 2`
			`%scevgep10 = getelementptr double* %G, i64 %tmp9`
			`%tmp11 = add i64 %indvar, 1`
			`%scevgep12 = getelementptr double* %G, i64 %tmp11`
			`%2 = load double* %scevgep12, align 8`
			`%3 = load double* %scevgep10, align 8`
			`%4 = fadd double %2, %3`
			`%5 = load double* %scevgep, align 8`
			`%6 = fadd double %4, %5`
			`store double %6, double* %scevgep12, align 8`
			`%exitcond = icmp eq i64 %tmp11, %tmp8`
			`br i1 %exitcond, label %return, label %bb`

			`; Should only be one load in the loop.`
			`; CHECK: bb:`
			`; CHECK: load double*`
			`; CHECK-NOT: load double*`
			`; CHECK: br i1 %exitcond`

			`return:`
			`ret void`
			`}`
Testcase for critical edge splitting with load PRE. llvm-svn: 96385 2010-02-17 04:48:55 +08:00
			`; Test critical edge splitting.`
			`define i32 @test11(i32* %p, i1 %C, i32 %N) {`
			`; CHECK: @test11`
			`block1:`
			`br i1 %C, label %block2, label %block3`

			`block2:`
			`%cond = icmp sgt i32 %N, 1`
			`br i1 %cond, label %block4, label %block5`
			`; CHECK: load i32* %p`
			`; CHECK-NEXT: br label %block4`

			`block3:`
			`store i32 0, i32* %p`
			`br label %block4`

			`block4:`
			`%PRE = load i32* %p`
			`br label %block5`

			`block5:`
			`%ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]`
			`ret i32 %ret`
			`; CHECK: block4:`
			`; CHECK-NEXT: phi i32`
			`}`