[CGP] Allow cycles during Phi traversal in OptimizaMemoryInst

Allowing cycles in Phi traversal increases the scope of optimize memory instruction in case we are in loop. The added test shows an example of enabling optimization inside a loop. Reviewers: loladiro, spatel, efriedma Reviewed By: efriedma Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D35294 llvm-svn: 308419
2017-07-19 04:49:17 +00:00 · 2017-07-19 04:49:17 +00:00 · 4ea855ebe5
parent bb83558f00
commit 4ea855ebe5
2 changed files with 45 additions and 6 deletions
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@ -4278,11 +4278,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
    Value *V = worklist.back();
    worklist.pop_back();

-    // Break use-def graph loops.
-    if (!Visited.insert(V).second) {
-      AddrModeFound = false;
-      break;
-    }
+    // We allow traversing cyclic Phi nodes.
+    // In case of success after this loop we ensure that traversing through
+    // Phi nodes ends up with all cases to compute address of the form
+    //    BaseGV + Base + Scale * Index + Offset
+    // where Scale and Offset are constans and BaseGV, Base and Index
+    // are exactly the same Values in all cases.
+    // It means that BaseGV, Scale and Offset dominate our memory instruction
+    // and have the same value as they had in address computation represented
+    // as Phi. So we can safely sink address computation to memory instruction.
+    if (!Visited.insert(V).second)
+      continue;

    // For a PHI node, push all of its incoming values.
    if (PHINode *P = dyn_cast<PHINode>(V)) {
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@ -194,7 +194,6 @@ rare.2:
  br label %fallthrough
 }

-
 declare void @slowpath(i32, i32*)

 ; Make sure we don't end up in an infinite loop after we fail to sink.
@ -218,3 +217,37 @@ load.i145:
 pl_loop.i.i122:
  br label %pl_loop.i.i122
 }
+
+; Make sure we can sink address computation even
+; if there is a cycle in phi nodes.
+define void @test9(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test9
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br label %header
+
+header:
+  %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
+  %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
+  br i1 %cond, label %if.then, label %backedge
+
+if.then:
+  call void @foo(i32 %iv)
+  %addr.1 = getelementptr inbounds i64, i64* %base, i64 5
+  %casted.1 = bitcast i64* %addr.1 to i32*
+  br label %backedge
+
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK: getelementptr i8, {{.+}} 40
+  %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
+  %v = load i32, i32* %casted.merged, align 4
+  call void @foo(i32 %v)
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, 1000
+  br i1 %cmp, label %header, label %exit
+
+exit:
+  ret void
+}