diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1b242c93ba13..62542737de00 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -518,8 +518,9 @@ class LoopVectorizationCostModel { public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, - const TargetTransformInfo &TTI) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} + const TargetTransformInfo &TTI, + DataLayout *DL) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {} /// Information about vectorization costs struct VectorizationFactor { @@ -575,6 +576,10 @@ private: /// the scalar type. static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// Returns whether the instruction is a load or store and will be a emitted + /// as a vector operation. + bool isConsecutiveLoadOrStore(Instruction *I); + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -585,6 +590,8 @@ private: LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; + /// Target data layout information. + DataLayout *DL; }; /// The LoopVectorize Pass. @@ -624,7 +631,7 @@ struct LoopVectorize : public LoopPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); // Check the function attribues to find out if this function should be // optimized for size. @@ -2786,14 +2793,17 @@ unsigned LoopVectorizationCostModel::getWidestType() { continue; // Examine the stored values. - if (StoreInst *ST = dyn_cast(it)) + StoreInst *ST = 0; + if ((ST = dyn_cast(it))) T = ST->getValueOperand()->getType(); - // Ignore stored/loaded pointer types. - if (T->isPointerTy()) - continue; - - MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); + // Ignore loaded pointer types and stored pointer types that are not + // consecutive. However, we do want to take consecutive stores/loads of + // pointer vectors into account. + if (T->isPointerTy() && isConsecutiveLoadOrStore(it)) + MaxWidth = std::max(MaxWidth, DL->getPointerSizeInBits()); + else + MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); } } @@ -3241,4 +3251,16 @@ namespace llvm { } } +bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { + // Check for a store. + StoreInst *ST = dyn_cast(Inst); + if (ST) + return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; + // Check for a load. + LoadInst *LI = dyn_cast(Inst); + if (LI) + return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; + + return false; +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll new file mode 100644 index 000000000000..c6777184d24f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -0,0 +1,149 @@ +;RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%0 = type { %0*, %1 } +%1 = type { i8*, i32 } + +@p = global [2048 x [8 x i32*]] zeroinitializer, align 16 +@q = global [2048 x i16] zeroinitializer, align 16 +@r = global [2048 x i16] zeroinitializer, align 16 + +; Tests for widest type +; Ensure that we count the pointer store in the first test case. We have a +; consecutive vector of pointers store, therefore we should count it towards the +; widest vector count. +; +; CHECK: test_consecutive_store +; CHECK: The Widest type: 64 bits +define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 { + %4 = load %0** %2, align 8 + %5 = icmp eq %0** %0, %1 + br i1 %5, label %12, label %6 + +;