LoopIdiom: Fix a serious missed optimization: we only turned top-level loops into memmove.

Thanks to Preston Briggs for catching this! llvm-svn: 167045
2012-10-30 19:49:39 +00:00 · 2012-10-30 19:49:39 +00:00 · 48a6478242
parent 2eaadd1a2d
commit 48a6478242
2 changed files with 47 additions and 4 deletions
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@ -555,10 +555,11 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
  // can safely emit a memcpy.
  OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));
  if (Dep) {
-    // If there is a dependence but the direction is positive we can still
-    // safely turn this into memmove.
-    if (Dep->getLevels() != 1 ||
-        Dep->getDirection(1) != Dependence::DVEntry::GT)
+    // If there is a dependence but the direction is positive (or none) we can
+    // still safely turn this into memmove.
+    unsigned Direction = Dep->getDirection(Dep->getLevels());
+    if (Direction != Dependence::DVEntry::NONE &&
+        Direction != Dependence::DVEntry::GT)
      return false;
    isMemcpySafe = false;
  }
--- a/llvm/test/Transforms/LoopIdiom/memmove.ll
+++ b/llvm/test/Transforms/LoopIdiom/memmove.ll
@ -0,0 +1,42 @@
+; RUN: opt -S -basicaa -loop-idiom < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+declare i64 @foo() nounwind
+
+; Nested loops
+define void @test1(i8* nocapture %A, i64 %n) nounwind {
+entry:
+  %call8 = tail call i64 @foo() nounwind
+  %tobool9 = icmp eq i64 %call8, 0
+  br i1 %tobool9, label %while.end, label %for.cond.preheader.lr.ph
+
+for.cond.preheader.lr.ph:                         ; preds = %entry
+  %cmp6 = icmp eq i64 %n, 0
+  br label %for.cond.preheader
+
+while.cond.loopexit:                              ; preds = %for.body, %for.cond.preheader
+  %call = tail call i64 @foo() nounwind
+  %tobool = icmp eq i64 %call, 0
+  br i1 %tobool, label %while.end, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %for.cond.preheader.lr.ph, %while.cond.loopexit
+  br i1 %cmp6, label %while.cond.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.cond.preheader, %for.body
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %for.cond.preheader ]
+  %add = add i64 %i.07, 10
+  %arrayidx = getelementptr inbounds i8* %A, i64 %add
+  %0 = load i8* %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8* %A, i64 %i.07
+  store i8 %0, i8* %arrayidx1, align 1
+  %inc = add i64 %i.07, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %while.cond.loopexit, label %for.body
+
+while.end:                                        ; preds = %while.cond.loopexit, %entry
+  ret void
+
+; CHECK: @test1
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(
+}