llvm-project/llvm/test/CodeGen/X86/hoist-spill.ll

; RUN: llc < %s | FileCheck %s

; Check no spills to the same stack slot after hoisting.
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = external global i32*, align 8
@b = external global i32, align 4
@d = external global i32*, align 8

; Function Attrs: norecurse noreturn nounwind uwtable
define void @fn1(i32 %p1) {
entry:
  %tmp = load i32*, i32** @d, align 8
  %tmp1 = load i32*, i32** @a, align 8
  %tmp2 = sext i32 %p1 to i64
  br label %for.cond

for.cond:                                         ; preds = %for.inc14, %entry
  %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]
  %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]
  %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]
  %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]
  %tmp3 = icmp sgt i32 undef, 0
  %smax52 = select i1 %tmp3, i32 %c.0, i32 0
  %tmp4 = zext i32 %smax52 to i64
  %tmp5 = icmp sgt i64 undef, %tmp4
  %smax53 = select i1 %tmp5, i64 %tmp2, i64 %tmp4
  %tmp6 = add nsw i64 %smax53, 1
  %tmp7 = sub nsw i64 %tmp6, %tmp4
  %tmp8 = add nsw i64 %tmp7, -8
  %tmp9 = sub i32 undef, %indvar
  %tmp10 = icmp sgt i64 %tmp2, 0
  %smax40 = select i1 %tmp10, i64 %tmp2, i64 0
  %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40
  %indvars.iv30 = add i32 %indvars.iv30.in, -1
  %tmp11 = icmp sgt i32 %indvars.iv30, 0
  %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0
  %tmp12 = zext i32 %smax to i64
  %sub = sub nsw i32 %p1, %c.0
  %cmp = icmp sgt i32 %sub, 0
  %sub. = select i1 %cmp, i32 %sub, i32 0
  %cmp326 = icmp sgt i32 %k.0, %p1
  br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader

for.cond4.preheader:                              ; preds = %for.body, %for.cond
  %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]
  %cmp528 = icmp sgt i32 %sub., %p1
  br i1 %cmp528, label %for.inc14, label %for.body6.preheader

for.body6.preheader:                              ; preds = %for.cond4.preheader
  br i1 undef, label %for.body6, label %min.iters.checked

min.iters.checked:                                ; preds = %for.body6.preheader
  br i1 undef, label %for.body6, label %vector.memcheck

vector.memcheck:                                  ; preds = %min.iters.checked
  %bound1 = icmp ule i32* undef, %scevgep41
  %memcheck.conflict = and i1 undef, %bound1
  br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader

vector.body.preheader:                            ; preds = %vector.memcheck
  %lcmp.mod = icmp eq i64 undef, 0
  br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol

vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.preheader
  %prol.iter.cmp = icmp eq i64 undef, 0
  br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol

vector.body.preheader.split:                      ; preds = %vector.body.prol, %vector.body.preheader
  %tmp13 = icmp ult i64 %tmp8, 24
  br i1 %tmp13, label %middle.block, label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.body.preheader.split
  %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]
  %index.next = add i64 %index, 8
  %offset.idx.1 = add i64 %tmp12, %index.next
  %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1
  %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
  %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4
  %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1
  %tmp17 = bitcast i32* %tmp16 to <4 x i32>*
  store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4
  %index.next.3 = add i64 %index, 32
  br i1 undef, label %middle.block, label %vector.body

middle.block:                                     ; preds = %vector.body, %vector.body.preheader.split
  br i1 undef, label %for.inc14, label %for.body6

for.body.preheader:                               ; preds = %for.cond
  br label %for.body

for.body:                                         ; preds = %for.body, %for.body.preheader
  %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]
  %add = add nsw i32 %k.127, 1
  %tmp18 = load i32, i32* undef, align 4
  store i32 %tmp18, i32* @b, align 4
  br i1 undef, label %for.body, label %for.cond4.preheader

for.body6:                                        ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader
  %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]
  %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32
  %tmp19 = load i32, i32* %arrayidx8, align 4
  %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32
  store i32 %tmp19, i32* %arrayidx10, align 4
  %cmp5 = icmp slt i64 %indvars.iv32, undef
  br i1 %cmp5, label %for.body6, label %for.inc14

for.inc14:                                        ; preds = %for.body6, %middle.block, %for.cond4.preheader
  %inc15 = add nuw nsw i32 %c.0, 1
  %indvar.next = add i32 %indvar, 1
  br label %for.cond
}
Recommit r265547, and r265610,r265639,r265657 on top of it, plus two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 llvm-svn: 266162 2016-04-13 11:08:27 +08:00			`; RUN: llc < %s \| FileCheck %s`

			`; Check no spills to the same stack slot after hoisting.`
			`; CHECK: mov{{.}} %{{.}}, [[SPOFFSET1:-?[0-9]]](%rsp)`
Revert r345165 "[X86] Bring back the MOV64r0 pseudo instruction" Google is reporting regressions on some benchmarks. llvm-svn: 345785 2018-11-01 05:53:24 +08:00			`; CHECK: mov{{.}} %{{.}}, [[SPOFFSET2:-?[0-9]]](%rsp)`
Recommit r265547, and r265610,r265639,r265657 on top of it, plus two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 llvm-svn: 266162 2016-04-13 11:08:27 +08:00			`; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)`
Revert r345165 "[X86] Bring back the MOV64r0 pseudo instruction" Google is reporting regressions on some benchmarks. llvm-svn: 345785 2018-11-01 05:53:24 +08:00			`; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)`
Recommit r265547, and r265610,r265639,r265657 on top of it, plus two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 llvm-svn: 266162 2016-04-13 11:08:27 +08:00
			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`
			`target triple = "x86_64-unknown-linux-gnu"`

			`@a = external global i32*, align 8`
			`@b = external global i32, align 4`
			`@d = external global i32*, align 8`

			`; Function Attrs: norecurse noreturn nounwind uwtable`
			`define void @fn1(i32 %p1) {`
			`entry:`
			`%tmp = load i32, i32* @d, align 8`
			`%tmp1 = load i32, i32* @a, align 8`
			`%tmp2 = sext i32 %p1 to i64`
			`br label %for.cond`

			`for.cond: ; preds = %for.inc14, %entry`
			`%indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]`
			`%indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]`
			`%c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]`
			`%k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]`
			`%tmp3 = icmp sgt i32 undef, 0`
DAG combiner: fold (select, C, X, undef) -> X Differential Revision: https://reviews.llvm.org/D54646 llvm-svn: 347110 2018-11-17 07:13:38 +08:00			`%smax52 = select i1 %tmp3, i32 %c.0, i32 0`
Recommit r265547, and r265610,r265639,r265657 on top of it, plus two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 llvm-svn: 266162 2016-04-13 11:08:27 +08:00			`%tmp4 = zext i32 %smax52 to i64`
			`%tmp5 = icmp sgt i64 undef, %tmp4`
DAG combiner: fold (select, C, X, undef) -> X Differential Revision: https://reviews.llvm.org/D54646 llvm-svn: 347110 2018-11-17 07:13:38 +08:00			`%smax53 = select i1 %tmp5, i64 %tmp2, i64 %tmp4`
Recommit r265547, and r265610,r265639,r265657 on top of it, plus two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 llvm-svn: 266162 2016-04-13 11:08:27 +08:00			`%tmp6 = add nsw i64 %smax53, 1`
			`%tmp7 = sub nsw i64 %tmp6, %tmp4`
			`%tmp8 = add nsw i64 %tmp7, -8`
			`%tmp9 = sub i32 undef, %indvar`
			`%tmp10 = icmp sgt i64 %tmp2, 0`
			`%smax40 = select i1 %tmp10, i64 %tmp2, i64 0`
			`%scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40`
			`%indvars.iv30 = add i32 %indvars.iv30.in, -1`
			`%tmp11 = icmp sgt i32 %indvars.iv30, 0`
			`%smax = select i1 %tmp11, i32 %indvars.iv30, i32 0`
			`%tmp12 = zext i32 %smax to i64`
			`%sub = sub nsw i32 %p1, %c.0`
			`%cmp = icmp sgt i32 %sub, 0`
			`%sub. = select i1 %cmp, i32 %sub, i32 0`
			`%cmp326 = icmp sgt i32 %k.0, %p1`
			`br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader`

			`for.cond4.preheader: ; preds = %for.body, %for.cond`
			`%k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]`
			`%cmp528 = icmp sgt i32 %sub., %p1`
			`br i1 %cmp528, label %for.inc14, label %for.body6.preheader`

			`for.body6.preheader: ; preds = %for.cond4.preheader`
			`br i1 undef, label %for.body6, label %min.iters.checked`

			`min.iters.checked: ; preds = %for.body6.preheader`
			`br i1 undef, label %for.body6, label %vector.memcheck`

			`vector.memcheck: ; preds = %min.iters.checked`
			`%bound1 = icmp ule i32* undef, %scevgep41`
			`%memcheck.conflict = and i1 undef, %bound1`
			`br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader`

			`vector.body.preheader: ; preds = %vector.memcheck`
			`%lcmp.mod = icmp eq i64 undef, 0`
			`br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol`

			`vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader`
			`%prol.iter.cmp = icmp eq i64 undef, 0`
			`br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol`

			`vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader`
			`%tmp13 = icmp ult i64 %tmp8, 24`
			`br i1 %tmp13, label %middle.block, label %vector.body`

			`vector.body: ; preds = %vector.body, %vector.body.preheader.split`
			`%index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]`
			`%index.next = add i64 %index, 8`
			`%offset.idx.1 = add i64 %tmp12, %index.next`
			`%tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1`
			`%tmp15 = bitcast i32* %tmp14 to <4 x i32>*`
			`%wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4`
			`%tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1`
			`%tmp17 = bitcast i32* %tmp16 to <4 x i32>*`
			`store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4`
			`%index.next.3 = add i64 %index, 32`
			`br i1 undef, label %middle.block, label %vector.body`

			`middle.block: ; preds = %vector.body, %vector.body.preheader.split`
			`br i1 undef, label %for.inc14, label %for.body6`

Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred. Summary: Two utils methods have essentially the same functionality. This is an attempt to merge them into one. 1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred 2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor Prior to the patch: 1. MergeBasicBlockIntoOnlyPred Updates either DomTree or DeferredDominance Moves all instructions from Pred to BB, deletes Pred Asserts BB has single predecessor If address was taken, replace the block address with constant 1 (?) 2. MergeBlockIntoPredecessor Updates DomTree, LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken After the patch: Method 2. MergeBlockIntoPredecessor is attempting to become the new default: Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken Uses of MergeBasicBlockIntoOnlyPred that need to be replaced: 1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp Updated in this patch. No challenges. 2. lib/CodeGen/CodeGenPrepare.cpp Updated in this patch. i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation. ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks Some interesting aspects: - Since Pred is not deleted (BB is), the entry block does not need updating. - The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred. - isMergingEmptyBlockProfitable assumes BB is the one to be deleted. - eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead. - adding some test owner as subscribers for the interesting tests modified: test/CodeGen/X86/avx-cmp.ll test/CodeGen/AMDGPU/nested-loop-conditions.ll test/CodeGen/AMDGPU/si-annotate-cf.ll test/CodeGen/X86/hoist-spill.ll test/CodeGen/X86/2006-11-17-IllegalMove.ll 3. lib/Transforms/Scalar/JumpThreading.cpp Not covered in this patch. It is the only use case using the DeferredDominance. I would defer to Brian Rzycki to make this replacement. Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits Differential Revision: https://reviews.llvm.org/D48202 llvm-svn: 335183 2018-06-21 06:01:04 +08:00			`for.body.preheader: ; preds = %for.cond`
			`br label %for.body`

Recommit r265547, and r265610,r265639,r265657 on top of it, plus two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 llvm-svn: 266162 2016-04-13 11:08:27 +08:00			`for.body: ; preds = %for.body, %for.body.preheader`
			`%k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]`
			`%add = add nsw i32 %k.127, 1`
			`%tmp18 = load i32, i32* undef, align 4`
			`store i32 %tmp18, i32* @b, align 4`
			`br i1 undef, label %for.body, label %for.cond4.preheader`

			`for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader`
			`%indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]`
			`%arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32`
			`%tmp19 = load i32, i32* %arrayidx8, align 4`
			`%arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32`
			`store i32 %tmp19, i32* %arrayidx10, align 4`
			`%cmp5 = icmp slt i64 %indvars.iv32, undef`
			`br i1 %cmp5, label %for.body6, label %for.inc14`

			`for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader`
			`%inc15 = add nuw nsw i32 %c.0, 1`
			`%indvar.next = add i32 %indvar, 1`
			`br label %for.cond`
			`}`