llvm-project/llvm/test/Transforms/LoopIdiom/basic.ll

; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"

define void @test1(i8* %Base, i64 %Size) nounwind ssp {
bb.nph:                                           ; preds = %entry
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  store i8 0, i8* %I.0.014, align 1
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test1
; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
; CHECK-NOT: store
}

; This is a loop that was rotated but where the blocks weren't merged.  This
; shouldn't perturb us.
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
bb.nph:                                           ; preds = %entry
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  store i8 0, i8* %I.0.014, align 1
  %indvar.next = add i64 %indvar, 1
  br label %for.body.cont
for.body.cont:
  %exitcond = icmp eq i64 %indvar.next, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test1a
; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
; CHECK-NOT: store
}


define void @test2(i32* %Base, i64 %Size) nounwind ssp {
entry:
  %cmp10 = icmp eq i64 %Size, 0
  br i1 %cmp10, label %for.end, label %for.body

for.body:                                         ; preds = %entry, %for.body
  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
  store i32 16843009, i32* %add.ptr.i, align 4
  %inc = add nsw i64 %i.011, 1
  %exitcond = icmp eq i64 %inc, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test2
; CHECK: br i1 %cmp10,
; CHECK: %tmp = mul i64 %Size, 4
; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)
; CHECK-NOT: store
}

; This is a case where there is an extra may-aliased store in the loop, we can't
; promote the memset.
define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
entry:
  br label %for.body

for.body:                                         ; preds = %entry, %for.body
  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
  store i32 16843009, i32* %add.ptr.i, align 4
  
  store i8 42, i8* %MayAlias
  %inc = add nsw i64 %i.011, 1
  %exitcond = icmp eq i64 %inc, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %entry
  ret void
; CHECK: @test3
; CHECK-NOT: memset
; CHECK: ret void
}


;; TODO: We should be able to promote this memset.  Not yet though.
define void @test4(i8* %Base) nounwind ssp {
bb.nph:                                           ; preds = %entry
  %Base100 = getelementptr i8* %Base, i64 1000
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  store i8 0, i8* %I.0.014, align 1
  
  ;; Store beyond the range memset, should be safe to promote.
  store i8 42, i8* %Base100
  
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, 100
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK-TODO: @test4
; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
; CHECK-TODO-NOT: store
}

; This can't be promoted: the memset is a store of a loop variant value.
define void @test5(i8* %Base, i64 %Size) nounwind ssp {
bb.nph:                                           ; preds = %entry
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  
  %V = trunc i64 %indvar to i8
  store i8 %V, i8* %I.0.014, align 1
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test5
; CHECK-NOT: memset
; CHECK: ret void
}


;; memcpy formation
define void @test6(i64 %Size) nounwind ssp {
bb.nph:
  %Base = alloca i8, i32 10000
  %Dest = alloca i8, i32 10000
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  %DestI = getelementptr i8* %Dest, i64 %indvar
  %V = load i8* %I.0.014, align 1
  store i8 %V, i8* %DestI, align 1
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test6
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
; CHECK-NOT: store
; CHECK: ret void
}


; This is a loop that was rotated but where the blocks weren't merged.  This
; shouldn't perturb us.
define void @test7(i8* %Base, i64 %Size) nounwind ssp {
bb.nph:                                           ; preds = %entry
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
  br label %for.body.cont
for.body.cont:
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  store i8 0, i8* %I.0.014, align 1
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test7
; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
; CHECK-NOT: store
}

; This is a loop should not be transformed, it only executes one iteration.
define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
bb.nph:                                           ; preds = %entry
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  %PI = getelementptr i64* %Ptr, i64 %indvar
  store i64 0, i64 *%PI
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, 1
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test8
; CHECK: store i64 0, i64* %PI
}

declare i8* @external(i8*)

;; This cannot be transformed into a memcpy, because the read-from location is
;; mutated by the loop.
define void @test9(i64 %Size) nounwind ssp {
bb.nph:
  %Base = alloca i8, i32 10000
  %Dest = alloca i8, i32 10000
  
  %BaseAlias = call i8* @external(i8* %Base)
  br label %for.body

for.body:                                         ; preds = %bb.nph, %for.body
  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  %I.0.014 = getelementptr i8* %Base, i64 %indvar
  %DestI = getelementptr i8* %Dest, i64 %indvar
  %V = load i8* %I.0.014, align 1
  store i8 %V, i8* %DestI, align 1

  ;; This store can clobber the input.
  store i8 4, i8* %BaseAlias
 
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar.next, %Size
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret void
; CHECK: @test9
; CHECK-NOT: llvm.memcpy
; CHECK: ret void
}
improve validity check to handle constant-trip-count loops more aggressively. In practice, this doesn't help anything though, see the todo. llvm-svn: 122660 2011-01-02 03:54:22 +08:00			`; RUN: opt -basicaa -loop-idiom < %s -S \| FileCheck %s`
implement enough of the memset inference algorithm to recognize and insert memsets. This is still missing one important validity check, but this is enough to compile stuff like this: void test0(std::vector<char> &X) { for (std::vector<char>::iterator I = X.begin(), E = X.end(); I != E; ++I) *I = 0; } void test1(std::vector<int> &X) { for (long i = 0, e = X.size(); i != e; ++i) X[i] = 0x01010101; } With: $ clang t.cpp -S -o - -O2 -emit-llvm \| opt -loop-idiom \| opt -O3 \| llc to: __Z5test0RSt6vectorIcSaIcEE: ## @_Z5test0RSt6vectorIcSaIcEE ## BB#0: ## %entry subq $8, %rsp movq (%rdi), %rax movq 8(%rdi), %rsi cmpq %rsi, %rax je LBB0_2 ## BB#1: ## %bb.nph subq %rax, %rsi movq %rax, %rdi callq ___bzero LBB0_2: ## %for.end addq $8, %rsp ret ... __Z5test1RSt6vectorIiSaIiEE: ## @_Z5test1RSt6vectorIiSaIiEE ## BB#0: ## %entry subq $8, %rsp movq (%rdi), %rax movq 8(%rdi), %rdx subq %rax, %rdx cmpq $4, %rdx jb LBB1_2 ## BB#1: ## %for.body.preheader andq $-4, %rdx movl $1, %esi movq %rax, %rdi callq _memset LBB1_2: ## %for.end addq $8, %rsp ret llvm-svn: 122573 2010-12-27 07:42:51 +08:00			`target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"`
			`target triple = "x86_64-apple-darwin10.0.0"`

			`define void @test1(i8* %Base, i64 %Size) nounwind ssp {`
			`bb.nph: ; preds = %entry`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`
			`store i8 0, i8* %I.0.014, align 1`
			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test1`
			`; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)`
			`; CHECK-NOT: store`
			`}`

Allow loop-idiom to run on multiple BB loops, but still only scan the loop header for now for memset/memcpy opportunities. It turns out that loop-rotate is successfully rotating loops, but DOESN'T MERGE THE BLOCKS, turning "for loops" into 2 basic block loops that loop-idiom was ignoring. With this fix, we form many many more memcpy and memsets than before, including on the "history" loops in the viterbi benchmark, which look like this: for (j=0; j<MAX_history; ++j) { history_new[i][j+1] = history[2*i][j]; } Transforming these loops into memcpy's speeds up the viterbi benchmark from 11.98s to 3.55s on my machine. Woo. llvm-svn: 122685 2011-01-02 15:58:36 +08:00			`; This is a loop that was rotated but where the blocks weren't merged. This`
			`; shouldn't perturb us.`
			`define void @test1a(i8* %Base, i64 %Size) nounwind ssp {`
			`bb.nph: ; preds = %entry`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`
			`store i8 0, i8* %I.0.014, align 1`
			`%indvar.next = add i64 %indvar, 1`
			`br label %for.body.cont`
			`for.body.cont:`
			`%exitcond = icmp eq i64 %indvar.next, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test1a`
			`; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)`
			`; CHECK-NOT: store`
			`}`


implement enough of the memset inference algorithm to recognize and insert memsets. This is still missing one important validity check, but this is enough to compile stuff like this: void test0(std::vector<char> &X) { for (std::vector<char>::iterator I = X.begin(), E = X.end(); I != E; ++I) *I = 0; } void test1(std::vector<int> &X) { for (long i = 0, e = X.size(); i != e; ++i) X[i] = 0x01010101; } With: $ clang t.cpp -S -o - -O2 -emit-llvm \| opt -loop-idiom \| opt -O3 \| llc to: __Z5test0RSt6vectorIcSaIcEE: ## @_Z5test0RSt6vectorIcSaIcEE ## BB#0: ## %entry subq $8, %rsp movq (%rdi), %rax movq 8(%rdi), %rsi cmpq %rsi, %rax je LBB0_2 ## BB#1: ## %bb.nph subq %rax, %rsi movq %rax, %rdi callq ___bzero LBB0_2: ## %for.end addq $8, %rsp ret ... __Z5test1RSt6vectorIiSaIiEE: ## @_Z5test1RSt6vectorIiSaIiEE ## BB#0: ## %entry subq $8, %rsp movq (%rdi), %rax movq 8(%rdi), %rdx subq %rax, %rdx cmpq $4, %rdx jb LBB1_2 ## BB#1: ## %for.body.preheader andq $-4, %rdx movl $1, %esi movq %rax, %rdi callq _memset LBB1_2: ## %for.end addq $8, %rsp ret llvm-svn: 122573 2010-12-27 07:42:51 +08:00			`define void @test2(i32* %Base, i64 %Size) nounwind ssp {`
			`entry:`
			`%cmp10 = icmp eq i64 %Size, 0`
			`br i1 %cmp10, label %for.end, label %for.body`

			`for.body: ; preds = %entry, %for.body`
			`%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]`
			`%add.ptr.i = getelementptr i32* %Base, i64 %i.011`
			`store i32 16843009, i32* %add.ptr.i, align 4`
			`%inc = add nsw i64 %i.011, 1`
			`%exitcond = icmp eq i64 %inc, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test2`
			`; CHECK: br i1 %cmp10,`
			`; CHECK: %tmp = mul i64 %Size, 4`
			`; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)`
			`; CHECK-NOT: store`
			`}`
implement the "no aliasing accesses in loop" safety check. This pass should be correct now. llvm-svn: 122659 2011-01-02 03:39:01 +08:00
			`; This is a case where there is an extra may-aliased store in the loop, we can't`
			`; promote the memset.`
			`define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {`
			`entry:`
			`br label %for.body`

			`for.body: ; preds = %entry, %for.body`
			`%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]`
			`%add.ptr.i = getelementptr i32* %Base, i64 %i.011`
			`store i32 16843009, i32* %add.ptr.i, align 4`

			`store i8 42, i8* %MayAlias`
			`%inc = add nsw i64 %i.011, 1`
			`%exitcond = icmp eq i64 %inc, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %entry`
			`ret void`
			`; CHECK: @test3`
			`; CHECK-NOT: memset`
			`; CHECK: ret void`
			`}`
improve validity check to handle constant-trip-count loops more aggressively. In practice, this doesn't help anything though, see the todo. llvm-svn: 122660 2011-01-02 03:54:22 +08:00

			`;; TODO: We should be able to promote this memset. Not yet though.`
			`define void @test4(i8* %Base) nounwind ssp {`
			`bb.nph: ; preds = %entry`
			`%Base100 = getelementptr i8* %Base, i64 1000`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`
			`store i8 0, i8* %I.0.014, align 1`

			`;; Store beyond the range memset, should be safe to promote.`
			`store i8 42, i8* %Base100`

			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, 100`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK-TODO: @test4`
			`; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)`
			`; CHECK-TODO-NOT: store`
			`}`
add a validity check that was missed, fixing a crash on the new testcase. llvm-svn: 122662 2011-01-02 04:12:04 +08:00
			`; This can't be promoted: the memset is a store of a loop variant value.`
			`define void @test5(i8* %Base, i64 %Size) nounwind ssp {`
			`bb.nph: ; preds = %entry`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`

			`%V = trunc i64 %indvar to i8`
			`store i8 %V, i8* %I.0.014, align 1`
			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test5`
			`; CHECK-NOT: memset`
			`; CHECK: ret void`
			`}`

teach loop idiom recognition to form memcpy's from simple loops. llvm-svn: 122678 2011-01-02 11:37:56 +08:00
			`;; memcpy formation`
			`define void @test6(i64 %Size) nounwind ssp {`
			`bb.nph:`
			`%Base = alloca i8, i32 10000`
			`%Dest = alloca i8, i32 10000`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`
			`%DestI = getelementptr i8* %Dest, i64 %indvar`
			`%V = load i8* %I.0.014, align 1`
			`store i8 %V, i8* %DestI, align 1`
			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test6`
			`; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)`
			`; CHECK-NOT: store`
			`; CHECK: ret void`
			`}`


enhance loop idiom recognition to scan all unconditionally executed blocks in a loop, instead of just the header block. This makes it more aggressive, able to handle Duncan's Ada examples. llvm-svn: 122704 2011-01-03 03:01:03 +08:00			`; This is a loop that was rotated but where the blocks weren't merged. This`
			`; shouldn't perturb us.`
			`define void @test7(i8* %Base, i64 %Size) nounwind ssp {`
			`bb.nph: ; preds = %entry`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]`
			`br label %for.body.cont`
			`for.body.cont:`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`
			`store i8 0, i8* %I.0.014, align 1`
			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test7`
			`; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)`
			`; CHECK-NOT: store`
			`}`

If a loop iterates exactly once (has backedge count = 0) then don't mess with it. We'd rather peel/unroll it than convert all of its stores into memsets. llvm-svn: 122711 2011-01-03 04:24:21 +08:00			`; This is a loop should not be transformed, it only executes one iteration.`
			`define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {`
			`bb.nph: ; preds = %entry`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]`
			`%PI = getelementptr i64* %Ptr, i64 %indvar`
			`store i64 0, i64 *%PI`
			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, 1`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test8`
			`; CHECK: store i64 0, i64* %PI`
			`}`
teach loop idiom recognition to form memcpy's from simple loops. llvm-svn: 122678 2011-01-02 11:37:56 +08:00
fix a miscompilation of tramp3d-v4: when forming a memcpy, we have to make sure that the loop we're promoting into a memcpy doesn't mutate the input of the memcpy. Before we were just checking that the dest of the memcpy wasn't mod/ref'd by the loop. llvm-svn: 122712 2011-01-03 05:14:18 +08:00			`declare i8* @external(i8*)`

			`;; This cannot be transformed into a memcpy, because the read-from location is`
			`;; mutated by the loop.`
			`define void @test9(i64 %Size) nounwind ssp {`
			`bb.nph:`
			`%Base = alloca i8, i32 10000`
			`%Dest = alloca i8, i32 10000`

			`%BaseAlias = call i8* @external(i8* %Base)`
			`br label %for.body`

			`for.body: ; preds = %bb.nph, %for.body`
			`%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]`
			`%I.0.014 = getelementptr i8* %Base, i64 %indvar`
			`%DestI = getelementptr i8* %Dest, i64 %indvar`
			`%V = load i8* %I.0.014, align 1`
			`store i8 %V, i8* %DestI, align 1`

			`;; This store can clobber the input.`
			`store i8 4, i8* %BaseAlias`

			`%indvar.next = add i64 %indvar, 1`
			`%exitcond = icmp eq i64 %indvar.next, %Size`
			`br i1 %exitcond, label %for.end, label %for.body`

			`for.end: ; preds = %for.body, %entry`
			`ret void`
			`; CHECK: @test9`
			`; CHECK-NOT: llvm.memcpy`
			`; CHECK: ret void`
			`}`