2009-09-12 02:01:28 +08:00
|
|
|
; RUN: opt < %s -licm -S | FileCheck %s
|
2017-01-18 03:18:12 +08:00
|
|
|
; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
|
2003-08-06 02:52:42 +08:00
|
|
|
|
2008-03-19 11:47:13 +08:00
|
|
|
@X = global i32 0 ; <i32*> [#uses=1]
|
2003-08-06 02:52:42 +08:00
|
|
|
|
2008-03-19 11:47:13 +08:00
|
|
|
declare void @foo()
|
2003-08-06 02:52:42 +08:00
|
|
|
|
2017-01-10 01:57:08 +08:00
|
|
|
declare i32 @llvm.bitreverse.i32(i32)
|
|
|
|
|
2009-08-31 06:13:26 +08:00
|
|
|
; This testcase tests for a problem where LICM hoists
|
|
|
|
; potentially trapping instructions when they are not guaranteed to execute.
|
|
|
|
define i32 @test1(i1 %c) {
|
2013-07-14 09:42:54 +08:00
|
|
|
; CHECK-LABEL: @test1(
|
2015-02-28 05:17:42 +08:00
|
|
|
%A = load i32, i32* @X ; <i32> [#uses=2]
|
2008-03-19 11:47:13 +08:00
|
|
|
br label %Loop
|
|
|
|
Loop: ; preds = %LoopTail, %0
|
|
|
|
call void @foo( )
|
|
|
|
br i1 %c, label %LoopTail, label %IfUnEqual
|
2009-08-31 06:13:26 +08:00
|
|
|
|
2008-03-19 11:47:13 +08:00
|
|
|
IfUnEqual: ; preds = %Loop
|
2009-08-31 06:13:26 +08:00
|
|
|
; CHECK: IfUnEqual:
|
|
|
|
; CHECK-NEXT: sdiv i32 4, %A
|
2008-03-19 11:47:13 +08:00
|
|
|
%B1 = sdiv i32 4, %A ; <i32> [#uses=1]
|
2003-08-06 02:52:42 +08:00
|
|
|
br label %LoopTail
|
2009-08-31 06:13:26 +08:00
|
|
|
|
2008-03-19 11:47:13 +08:00
|
|
|
LoopTail: ; preds = %IfUnEqual, %Loop
|
|
|
|
%B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1]
|
|
|
|
br i1 %c, label %Loop, label %Out
|
|
|
|
Out: ; preds = %LoopTail
|
|
|
|
%C = sub i32 %A, %B ; <i32> [#uses=1]
|
|
|
|
ret i32 %C
|
2003-08-06 02:52:42 +08:00
|
|
|
}
|
2008-03-19 11:47:13 +08:00
|
|
|
|
2009-08-31 06:13:26 +08:00
|
|
|
|
2012-09-04 18:25:04 +08:00
|
|
|
declare void @foo2(i32) nounwind
|
2009-08-31 06:13:26 +08:00
|
|
|
|
|
|
|
|
|
|
|
;; It is ok and desirable to hoist this potentially trapping instruction.
|
|
|
|
define i32 @test2(i1 %c) {
|
2013-07-14 09:42:54 +08:00
|
|
|
; CHECK-LABEL: @test2(
|
2015-02-28 05:17:42 +08:00
|
|
|
; CHECK-NEXT: load i32, i32* @X
|
2009-08-31 06:13:26 +08:00
|
|
|
; CHECK-NEXT: %B = sdiv i32 4, %A
|
[LICM] Make isGuaranteedToExecute more accurate.
Summary:
Make isGuaranteedToExecute use the
isGuaranteedToTransferExecutionToSuccessor helper, and make that helper
a bit more accurate.
There's a potential performance impact here from assuming that arbitrary
calls might not return. This probably has little impact on loads and
stores to a pointer because most things alias analysis can reason about
are dereferenceable anyway. The other impacts, like less aggressive
hoisting of sdiv by a variable and less aggressive hoisting around
volatile memory operations, are unlikely to matter for real code.
This also impacts SCEV, which uses the same helper. It's a minor
improvement there because we can tell that, for example, memcpy always
returns normally. Strictly speaking, it's also introducing
a bug, but it's not any worse than everywhere else we assume readonly
functions terminate.
Fixes http://llvm.org/PR27857.
Reviewers: hfinkel, reames, chandlerc, sanjoy
Subscribers: broune, llvm-commits
Differential Revision: http://reviews.llvm.org/D21167
llvm-svn: 272489
2016-06-12 05:48:25 +08:00
|
|
|
%A = load i32, i32* @X
|
|
|
|
br label %Loop
|
|
|
|
|
2009-08-31 06:13:26 +08:00
|
|
|
Loop:
|
[LICM] Make isGuaranteedToExecute more accurate.
Summary:
Make isGuaranteedToExecute use the
isGuaranteedToTransferExecutionToSuccessor helper, and make that helper
a bit more accurate.
There's a potential performance impact here from assuming that arbitrary
calls might not return. This probably has little impact on loads and
stores to a pointer because most things alias analysis can reason about
are dereferenceable anyway. The other impacts, like less aggressive
hoisting of sdiv by a variable and less aggressive hoisting around
volatile memory operations, are unlikely to matter for real code.
This also impacts SCEV, which uses the same helper. It's a minor
improvement there because we can tell that, for example, memcpy always
returns normally. Strictly speaking, it's also introducing
a bug, but it's not any worse than everywhere else we assume readonly
functions terminate.
Fixes http://llvm.org/PR27857.
Reviewers: hfinkel, reames, chandlerc, sanjoy
Subscribers: broune, llvm-commits
Differential Revision: http://reviews.llvm.org/D21167
llvm-svn: 272489
2016-06-12 05:48:25 +08:00
|
|
|
;; Should have hoisted this div!
|
|
|
|
%B = sdiv i32 4, %A
|
|
|
|
br label %loop2
|
|
|
|
|
|
|
|
loop2:
|
|
|
|
call void @foo2( i32 %B )
|
|
|
|
br i1 %c, label %Loop, label %Out
|
|
|
|
|
|
|
|
Out:
|
|
|
|
%C = sub i32 %A, %B
|
|
|
|
ret i32 %C
|
2009-08-31 06:13:26 +08:00
|
|
|
}
|
2010-09-01 07:00:16 +08:00
|
|
|
|
|
|
|
|
|
|
|
; This loop invariant instruction should be constant folded, not hoisted.
|
|
|
|
define i32 @test3(i1 %c) {
|
2013-07-14 09:50:49 +08:00
|
|
|
; CHECK-LABEL: define i32 @test3(
|
2010-09-01 07:00:16 +08:00
|
|
|
; CHECK: call void @foo2(i32 6)
|
2015-02-28 05:17:42 +08:00
|
|
|
%A = load i32, i32* @X ; <i32> [#uses=2]
|
2010-09-01 07:00:16 +08:00
|
|
|
br label %Loop
|
|
|
|
Loop:
|
|
|
|
%B = add i32 4, 2 ; <i32> [#uses=2]
|
|
|
|
call void @foo2( i32 %B )
|
|
|
|
br i1 %c, label %Loop, label %Out
|
|
|
|
Out: ; preds = %Loop
|
|
|
|
%C = sub i32 %A, %B ; <i32> [#uses=1]
|
|
|
|
ret i32 %C
|
|
|
|
}
|
2012-09-04 18:25:04 +08:00
|
|
|
|
2013-07-14 09:42:54 +08:00
|
|
|
; CHECK-LABEL: @test4(
|
2012-09-04 18:25:04 +08:00
|
|
|
; CHECK: call
|
|
|
|
; CHECK: sdiv
|
|
|
|
; CHECK: ret
|
|
|
|
define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
|
|
|
|
entry:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
|
|
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
|
|
%n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
|
|
call void @foo_may_call_exit(i32 0)
|
|
|
|
%div = sdiv i32 %x, %y
|
|
|
|
%add = add nsw i32 %n.01, %div
|
|
|
|
%inc = add nsw i32 %i.02, 1
|
|
|
|
%cmp = icmp slt i32 %inc, 10000
|
|
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
|
|
|
|
for.end: ; preds = %for.body
|
|
|
|
%n.0.lcssa = phi i32 [ %add, %for.body ]
|
|
|
|
ret i32 %n.0.lcssa
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @foo_may_call_exit(i32)
|
|
|
|
|
2013-01-10 02:12:03 +08:00
|
|
|
; PR14854
|
2013-07-14 09:42:54 +08:00
|
|
|
; CHECK-LABEL: @test5(
|
2013-01-10 02:12:03 +08:00
|
|
|
; CHECK: extractvalue
|
|
|
|
; CHECK: br label %tailrecurse
|
|
|
|
; CHECK: tailrecurse:
|
|
|
|
; CHECK: ifend:
|
|
|
|
; CHECK: insertvalue
|
|
|
|
define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
|
|
|
|
entry:
|
|
|
|
br label %tailrecurse
|
|
|
|
|
|
|
|
tailrecurse: ; preds = %then, %entry
|
|
|
|
%i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
|
|
|
|
%out = extractvalue { i32*, i32 } %e, 1
|
|
|
|
%d = insertvalue { i32*, i32 } %e, i32* null, 0
|
|
|
|
%cmp1 = icmp sgt i32 %out, %i.tr
|
|
|
|
br i1 %cmp1, label %then, label %ifend
|
|
|
|
|
|
|
|
then: ; preds = %tailrecurse
|
|
|
|
call void @foo()
|
|
|
|
%cmp2 = add i32 %i.tr, 1
|
|
|
|
br label %tailrecurse
|
|
|
|
|
|
|
|
ifend: ; preds = %tailrecurse
|
|
|
|
ret { i32*, i32 } %d
|
|
|
|
}
|
2017-01-10 01:57:08 +08:00
|
|
|
|
|
|
|
; CHECK: define i32 @hoist_bitreverse(i32)
|
|
|
|
; CHECK: bitreverse
|
|
|
|
; CHECK: br label %header
|
|
|
|
define i32 @hoist_bitreverse(i32) {
|
|
|
|
br label %header
|
|
|
|
|
|
|
|
header:
|
|
|
|
%sum = phi i32 [ 0, %1 ], [ %5, %latch ]
|
|
|
|
%2 = phi i32 [ 0, %1 ], [ %6, %latch ]
|
|
|
|
%3 = icmp slt i32 %2, 1024
|
|
|
|
br i1 %3, label %body, label %return
|
|
|
|
|
|
|
|
body:
|
|
|
|
%4 = call i32 @llvm.bitreverse.i32(i32 %0)
|
|
|
|
%5 = add i32 %sum, %4
|
|
|
|
br label %latch
|
|
|
|
|
|
|
|
latch:
|
|
|
|
%6 = add nsw i32 %2, 1
|
|
|
|
br label %header
|
|
|
|
|
|
|
|
return:
|
|
|
|
ret i32 %sum
|
|
|
|
}
|
2017-02-02 21:22:03 +08:00
|
|
|
|
|
|
|
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
|
|
|
|
declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
|
|
|
|
declare void @escaping.invariant.start({}*) nounwind
|
|
|
|
; invariant.start dominates the load, and in this scope, the
|
|
|
|
; load is invariant. So, we can hoist the `addrld` load out of the loop.
|
|
|
|
define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
|
|
|
|
; CHECK-LABEL: @test_fence
|
|
|
|
; CHECK-LABEL: entry
|
|
|
|
; CHECK: invariant.start
|
|
|
|
; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
; CHECK: br label %loop
|
|
|
|
entry:
|
|
|
|
%gep = getelementptr inbounds i8, i8* %addr, i64 8
|
|
|
|
%addr.i = bitcast i8* %gep to i32 *
|
|
|
|
store atomic i32 5, i32 * %addr.i unordered, align 8
|
|
|
|
fence release
|
|
|
|
%invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
|
|
|
|
%sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
|
|
|
|
%volload = load atomic i8, i8* %volatile unordered, align 8
|
|
|
|
fence acquire
|
|
|
|
%volchk = icmp eq i8 %volload, 0
|
|
|
|
%addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
%sel = select i1 %volchk, i32 0, i32 %addrld
|
|
|
|
%sum.next = add i32 %sel, %sum
|
|
|
|
%indvar.next = add i32 %indvar, 1
|
|
|
|
%cond = icmp slt i32 %indvar.next, %n
|
|
|
|
br i1 %cond, label %loop, label %loopexit
|
|
|
|
|
|
|
|
loopexit:
|
|
|
|
ret i32 %sum
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
; Same as test above, but the load is no longer invariant (presence of
|
|
|
|
; invariant.end). We cannot hoist the addrld out of loop.
|
|
|
|
define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
|
|
|
|
; CHECK-LABEL: @test_fence1
|
|
|
|
; CHECK-LABEL: entry
|
|
|
|
; CHECK: invariant.start
|
|
|
|
; CHECK-NEXT: invariant.end
|
|
|
|
; CHECK-NEXT: br label %loop
|
|
|
|
entry:
|
|
|
|
%gep = getelementptr inbounds i8, i8* %addr, i64 8
|
|
|
|
%addr.i = bitcast i8* %gep to i32 *
|
|
|
|
store atomic i32 5, i32 * %addr.i unordered, align 8
|
|
|
|
fence release
|
|
|
|
%invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
|
|
|
|
call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
|
|
|
|
%sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
|
|
|
|
%volload = load atomic i8, i8* %volatile unordered, align 8
|
|
|
|
fence acquire
|
|
|
|
%volchk = icmp eq i8 %volload, 0
|
|
|
|
%addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
%sel = select i1 %volchk, i32 0, i32 %addrld
|
|
|
|
%sum.next = add i32 %sel, %sum
|
|
|
|
%indvar.next = add i32 %indvar, 1
|
|
|
|
%cond = icmp slt i32 %indvar.next, %n
|
|
|
|
br i1 %cond, label %loop, label %loopexit
|
|
|
|
|
|
|
|
loopexit:
|
|
|
|
ret i32 %sum
|
|
|
|
}
|
|
|
|
|
|
|
|
; same as test above, but instead of invariant.end, we have the result of
|
|
|
|
; invariant.start escaping through a call. We cannot hoist the load.
|
|
|
|
define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
|
|
|
|
; CHECK-LABEL: @test_fence2
|
|
|
|
; CHECK-LABEL: entry
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: br label %loop
|
|
|
|
entry:
|
|
|
|
%gep = getelementptr inbounds i8, i8* %addr, i64 8
|
|
|
|
%addr.i = bitcast i8* %gep to i32 *
|
|
|
|
store atomic i32 5, i32 * %addr.i unordered, align 8
|
|
|
|
fence release
|
|
|
|
%invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
|
|
|
|
call void @escaping.invariant.start({}* %invst)
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
|
|
|
|
%sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
|
|
|
|
%volload = load atomic i8, i8* %volatile unordered, align 8
|
|
|
|
fence acquire
|
|
|
|
%volchk = icmp eq i8 %volload, 0
|
|
|
|
%addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
%sel = select i1 %volchk, i32 0, i32 %addrld
|
|
|
|
%sum.next = add i32 %sel, %sum
|
|
|
|
%indvar.next = add i32 %indvar, 1
|
|
|
|
%cond = icmp slt i32 %indvar.next, %n
|
|
|
|
br i1 %cond, label %loop, label %loopexit
|
|
|
|
|
|
|
|
loopexit:
|
|
|
|
ret i32 %sum
|
|
|
|
}
|
|
|
|
|
|
|
|
; FIXME: invariant.start dominates the load, and in this scope, the
|
|
|
|
; load is invariant. So, we can hoist the `addrld` load out of the loop.
|
|
|
|
; Consider the loadoperand addr.i bitcasted before being passed to
|
|
|
|
; invariant.start
|
|
|
|
define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
|
|
|
|
; CHECK-LABEL: @test_fence3
|
|
|
|
; CHECK-LABEL: entry
|
|
|
|
; CHECK: invariant.start
|
|
|
|
; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
; CHECK: br label %loop
|
|
|
|
entry:
|
|
|
|
%addr.i = getelementptr inbounds i32, i32* %addr, i64 8
|
|
|
|
%gep = bitcast i32* %addr.i to i8 *
|
|
|
|
store atomic i32 5, i32 * %addr.i unordered, align 8
|
|
|
|
fence release
|
|
|
|
%invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
|
|
|
|
%sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
|
|
|
|
%volload = load atomic i8, i8* %volatile unordered, align 8
|
|
|
|
fence acquire
|
|
|
|
%volchk = icmp eq i8 %volload, 0
|
|
|
|
%addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
%sel = select i1 %volchk, i32 0, i32 %addrld
|
|
|
|
%sum.next = add i32 %sel, %sum
|
|
|
|
%indvar.next = add i32 %indvar, 1
|
|
|
|
%cond = icmp slt i32 %indvar.next, %n
|
|
|
|
br i1 %cond, label %loop, label %loopexit
|
|
|
|
|
|
|
|
loopexit:
|
|
|
|
ret i32 %sum
|
|
|
|
}
|
|
|
|
|
|
|
|
; We should not hoist the addrld out of the loop.
|
|
|
|
define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
|
|
|
|
; CHECK-LABEL: @test_fence4
|
|
|
|
; CHECK-LABEL: entry
|
|
|
|
; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
; CHECK: br label %loop
|
|
|
|
entry:
|
|
|
|
%addr.i = getelementptr inbounds i32, i32* %addr, i64 8
|
|
|
|
%gep = bitcast i32* %addr.i to i8 *
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
|
|
|
|
%sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
|
|
|
|
store atomic i32 5, i32 * %addr.i unordered, align 8
|
|
|
|
fence release
|
|
|
|
%invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
|
|
|
|
%volload = load atomic i8, i8* %volatile unordered, align 8
|
|
|
|
fence acquire
|
|
|
|
%volchk = icmp eq i8 %volload, 0
|
|
|
|
%addrld = load atomic i32, i32* %addr.i unordered, align 8
|
|
|
|
%sel = select i1 %volchk, i32 0, i32 %addrld
|
|
|
|
%sum.next = add i32 %sel, %sum
|
|
|
|
%indvar.next = add i32 %indvar, 1
|
|
|
|
%cond = icmp slt i32 %indvar.next, %n
|
|
|
|
br i1 %cond, label %loop, label %loopexit
|
|
|
|
|
|
|
|
loopexit:
|
|
|
|
ret i32 %sum
|
|
|
|
}
|