diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f5c9bb31e053..5152ec11e5b8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -715,6 +715,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { ValueVector Reads; ValueVector Writes; + SmallPtrSet AnalyzedPtrs; unsigned NumPhis = 0; for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { Instruction *I = it; @@ -766,7 +767,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { DEBUG(dbgs() << "LV: Found a non-simple load.\n"); return false; } - GetUnderlyingObjects(Ld->getPointerOperand(), Reads, DL); + + Value* Ptr = Ld->getPointerOperand(); + if (AnalyzedPtrs.insert(Ptr)) + GetUnderlyingObjects(Ptr, Reads, DL); } // Record store pointers. Abort on all other instructions that write to @@ -778,7 +782,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { DEBUG(dbgs() << "LV: Found a non-simple store.\n"); return false; } - GetUnderlyingObjects(St->getPointerOperand(), Writes, DL); + + Value* Ptr = St->getPointerOperand(); + if (AnalyzedPtrs.insert(Ptr)) + GetUnderlyingObjects(St->getPointerOperand(), Writes, DL); } // We still don't handle functions. diff --git a/llvm/test/Transforms/LoopVectorize/increment.ll b/llvm/test/Transforms/LoopVectorize/increment.ll new file mode 100644 index 000000000000..e944a9af92da --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/increment.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 + +; This is the loop. +; for (i=0; i +;CHECK: add <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @inc(i32 %n) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = trunc i64 %indvars.iv to i32 + %5 = add nsw i32 %3, %4 + store i32 %5, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +}