[EarlyCSE] Teach about CSE'ing over invariant.start intrinsics

Summary:
Teach EarlyCSE about invariant.start intrinsic. Specifically, we can perform
store-load, load-load forwarding over this call.

Reviewers: majnemer, reames, dberlin, sanjoy

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D23268

llvm-svn: 278153
This commit is contained in:
Anna Thomas 2016-08-09 20:00:47 +00:00
parent 6b1f5f891f
commit b2d12b81c3
2 changed files with 84 additions and 0 deletions

View File

@ -562,6 +562,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
// Skip invariant.start intrinsics since they only read memory, and we can
// forward values across it. Also, we dont need to consume the last store
// since the semantics of invariant.start allow us to perform DSE of the
// last store, if there was a store following invariant.start. Consider:
//
// store 30, i8* p
// invariant.start(p)
// store 40, i8* p
// We can DSE the store to 30, since the store 40 to invariant location p
// causes undefined behaviour.
if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>()))
continue;
if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
if (auto *CondI =
dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {

View File

@ -0,0 +1,71 @@
; RUN: opt < %s -S -early-cse | FileCheck %s
; RUN: opt < %s -S -passes=early-cse | FileCheck %s
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
; Check that we do load-load forwarding over invariant.start, since it does not
; clobber memory
define i8 @test1(i8 *%P) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: %V1 = load i8, i8* %P
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
; CHECK-NEXT: ret i8 0
%V1 = load i8, i8* %P
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
%V2 = load i8, i8* %P
%Diff = sub i8 %V1, %V2
ret i8 %Diff
}
; Trivial Store->load forwarding over invariant.start
define i8 @test2(i8 *%P) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: store i8 42, i8* %P
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
; CHECK-NEXT: ret i8 42
store i8 42, i8* %P
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
%V1 = load i8, i8* %P
ret i8 %V1
}
; We can DSE over invariant.start calls, since the first store to
; %P is valid, and the second store is actually unreachable based on semantics
; of invariant.start.
define void @test3(i8* %P) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
; CHECK-NEXT: store i8 60, i8* %P
store i8 50, i8* %P
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
store i8 60, i8* %P
ret void
}
; FIXME: Now the first store can actually be eliminated, since there is no read within
; the invariant region, between start and end.
define void @test4(i8* %P) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: store i8 50, i8* %P
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P)
; CHECK-NEXT: store i8 60, i8* %P
store i8 50, i8* %P
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P)
call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P)
store i8 60, i8* %P
ret void
}