diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bead39225b82..6f6685bde224 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -18,10 +18,13 @@ // // This pass has three parts: // 1. The main loop pass that drives the different parts. -// 2. LoopVectorizationLegality - A helper class that checks for the legality +// 2. LoopVectorizationLegality - A unit that checks for the legality // of the vectorization. -// 3. SingleBlockLoopVectorizer - A helper class that performs the actual +// 3. SingleBlockLoopVectorizer - A unit that performs the actual // widening of instructions. +// 4. LoopVectorizationCostModel - A unit that checks for the profitability +// of vectorization. It decides on the optimal vector width, which +// can be one, if vectorization is not profitable. //===----------------------------------------------------------------------===// // // The reduction-variable vectorization is based on the paper: @@ -51,13 +54,14 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -67,13 +71,14 @@ using namespace llvm; static cl::opt -DefaultVectorizationFactor("default-loop-vectorize-width", - cl::init(4), cl::Hidden, - cl::desc("Set the default loop vectorization width")); +VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, + cl::desc("Set the default vectorization width. Zero is autoselect.")); + namespace { -// Forward declaration. +// Forward declarations. class LoopVectorizationLegality; +class LoopVectorizationCostModel; /// SingleBlockLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). @@ -229,11 +234,10 @@ public: /// of the reductions that were found in the loop. typedef DenseMap ReductionList; - /// Returns the maximum vectorization factor that we *can* use to vectorize - /// this loop. This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. This may be a large number. We - /// can vectorize to any SIMD width below this number. - unsigned getLoopMaxVF(); + /// Returns true if it is legal to vectorize this loop. + /// This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. + bool canVectorize(); /// Returns the Induction variable. PHINode *getInduction() {return Induction;} @@ -286,6 +290,49 @@ private: SmallPtrSet AllowedExit; }; +/// LoopVectorizationCostModel - estimates the expected speedups due to +/// vectorization. +/// In many cases vectorization is not profitable. This can happen because +/// of a number of reasons. In this class we mainly attempt to predict +/// the expected speedup/slowdowns due to the supported instruction set. +/// We use the VectorTargetTransformInfo to query the different backends +/// for the cost of different operations. +class LoopVectorizationCostModel { +public: + /// C'tor. + LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl, + LoopVectorizationLegality *Leg, + const VectorTargetTransformInfo *Vtti): + TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { } + + /// Returns the most profitable vectorization factor for the loop that is + /// smaller or equal to the VF argument. This method checks every power + /// of two up to VF. + unsigned findBestVectorizationFactor(unsigned VF = 4); + +private: + /// Returns the expected execution cost. The unit of the cost does + /// not matter because we use the 'cost' units to compare different + /// vector widths. The cost that is returned is *not* normalized by + /// the factor width. + unsigned expectedCost(unsigned VF); + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + unsigned getInstructionCost(Instruction *I, unsigned VF); + + /// The loop that we evaluate. + Loop *TheLoop; + /// Scev analysis. + ScalarEvolution *SE; + /// DataLayout analysis. + DataLayout *DL; + /// Vectorization legality. + LoopVectorizationLegality *Legal; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; +}; + struct LoopVectorize : public LoopPass { static char ID; // Pass identification, replacement for typeid @@ -296,6 +343,7 @@ struct LoopVectorize : public LoopPass { ScalarEvolution *SE; DataLayout *DL; LoopInfo *LI; + TargetTransformInfo *TTI; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -305,25 +353,42 @@ struct LoopVectorize : public LoopPass { SE = &getAnalysis(); DL = getAnalysisIfAvailable(); LI = &getAnalysis(); + TTI = getAnalysisIfAvailable(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. LoopVectorizationLegality LVL(L, SE, DL); - unsigned MaxVF = LVL.getLoopMaxVF(); - - // Check that we can vectorize this loop using the chosen vectorization - // width. - if (MaxVF < DefaultVectorizationFactor) { - DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n"); + if (!LVL.canVectorize()) { + DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; } - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n"); + // Select the preffered vectorization factor. + unsigned VF = 1; + if (VectorizationFactor == 0) { + const VectorTargetTransformInfo *VTTI = 0; + if (TTI) + VTTI = TTI->getVectorTargetTransformInfo(); + // Use the cost model. + LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI); + VF = CM.findBestVectorizationFactor(); + + if (VF == 1) { + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + return false; + } + + } else { + // Use the user command flag. + VF = VectorizationFactor; + } + + DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ").\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor); + SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -656,6 +721,13 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal void SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { + //===------------------------------------------------===// + // + // Notice: any optimization or new instruction that go + // into the code below should be also be implemented in + // the cost-model. + // + //===------------------------------------------------===// typedef SmallVector PhiVector; BasicBlock &BB = *OrigLoop->getHeader(); Constant *Zero = ConstantInt::get( @@ -957,18 +1029,18 @@ void SingleBlockLoopVectorizer::cleanup() { SE->forgetLoop(OrigLoop); } -unsigned LoopVectorizationLegality::getLoopMaxVF() { +bool LoopVectorizationLegality::canVectorize() { if (!TheLoop->getLoopPreheader()) { assert(false && "No preheader!!"); DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); - return 1; + return false; } // We can only vectorize single basic block loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1) { DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n"); - return 1; + return false; } // We need to have a loop header. @@ -978,22 +1050,22 @@ unsigned LoopVectorizationLegality::getLoopMaxVF() { // Go over each instruction and look at memory deps. if (!canVectorizeBlock(*BB)) { DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); - return 1; + return false; } // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); if (ExitCount == SE->getCouldNotCompute()) { DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return 1; + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); // Okay! We can vectorize. At this point we don't have any other mem analysis - // which may limit our maximum vectorization factor, so just return the - // maximum SIMD size. - return DefaultVectorizationFactor; + // which may limit our maximum vectorization factor, so just return true with + // no restrictions. + return true; } bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { @@ -1323,6 +1395,177 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { return true; } +unsigned +LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) { + if (!VTTI) { + DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n"); + return 1; + } + + float Cost = expectedCost(1); + unsigned Width = 1; + DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n"); + for (unsigned i=2; i <= VF; i*=2) { + // Notice that the vector loop needs to be executed less times, so + // we need to divide the cost of the vector loops by the width of + // the vector elements. + float VectorCost = expectedCost(i) / (float)i; + DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " << + (int)VectorCost << ".\n"); + if (VectorCost < Cost) { + Cost = VectorCost; + Width = i; + } + } + + DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); + return Width; +} + +unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { + // We can only estimate the cost of single basic block loops. + assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop"); + + BasicBlock *BB = TheLoop->getHeader(); + unsigned Cost = 0; + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + Instruction *Inst = it; + Cost += getInstructionCost(Inst, VF); + } + + // Return the cost divided by VF, because we will be executing + // less iterations of the vector form. + return Cost; +} + +unsigned +LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { + assert(VTTI && "Invalid vector target transformation info"); + switch (I->getOpcode()) { + case Instruction::Br: { + return VTTI->getInstrCost(I->getOpcode()); + } + case Instruction::PHI: + // PHIs are handled the same as the binary instructions below. + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + Type *VTy = VectorType::get(I->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), VTy); + } + case Instruction::Select: { + SelectInst *SI = cast(I); + Type *VTy = VectorType::get(I->getType(), VF); + const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); + bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); + Type *CondTy = SI->getCondition()->getType(); + if (ScalarCond) + CondTy = VectorType::get(CondTy, VF); + + return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), VTy); + } + case Instruction::Store: { + StoreInst *SI = cast(I); + Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF); + + // Scalarized stores. + if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { + unsigned Cost = 0; + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy); + // The cost of extracting from the vector value. + Cost += VF * ExtCost; + // The cost of the scalar stores. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType()); + return Cost; + } + + // Wide stores. + return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast(I); + Type *VTy = VectorType::get(I->getType(), VF); + + // Scalarized loads. + if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { + unsigned Cost = 0; + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); + // The cost of inserting the loaded value into the result vector. + Cost += VF * InCost; + // The cost of the scalar stores. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType()); + return Cost; + } + + // Wide loads. + return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF); + Type *DstTy = VectorType::get(I->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy); + } + default: { + // We are scalarizing the instruction. Return the cost of the scalar + // instruction, plus the cost of insert and extract into vector + // elements, times the vector width. + unsigned Cost = 0; + Type *Ty = I->getType(); + + if (!Ty->isVoidTy()) { + Type *VTy = VectorType::get(Ty, VF); + unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy); + Cost += VF * (InsCost + ExtCost); + } + + /// We don't have any information on the scalar instruction, but maybe + /// the target has. + /// TODO: This may be a target-specific intrinsic. + /// Need to add API for that. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty); + + return Cost; + } + }// end of switch. +} + + } // namespace char LoopVectorize::ID = 0; diff --git a/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll index 5caaffc8ddea..0176c9a18966 100644 --- a/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll +++ b/llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce ; Check that we don't fall into an infinite loop. define void @test() nounwind { diff --git a/llvm/test/Transforms/LoopVectorize/cost-model.ll b/llvm/test/Transforms/LoopVectorize/cost-model.ll new file mode 100644 index 000000000000..20c710db4662 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/cost-model.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@c = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +; At this point the cost model is pretty bad and we are vectorizing the code below. +; TODO: This code should not be vectorized on x86. +;CHECK: cost_model_1 +;CHECK: <4 x i32> +;CHECK: ret void +define void @cost_model_1() nounwind uwtable noinline ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 1 + %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0 + %1 = load i32* %arrayidx, align 8 + %idxprom1 = sext i32 %1 to i64 + %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1 + %2 = load i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv + %3 = load i32* %arrayidx4, align 4 + %idxprom5 = sext i32 %3 to i64 + %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5 + store i32 %2, i32* %arrayidx6, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll index 12a91b079147..26902eba9e29 100644 --- a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll +++ b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll index 6fb1792b2c8d..d8942ac861f5 100644 --- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/increment.ll b/llvm/test/Transforms/LoopVectorize/increment.ll index e818d685626d..069b7ea031c4 100644 --- a/llvm/test/Transforms/LoopVectorize/increment.ll +++ b/llvm/test/Transforms/LoopVectorize/increment.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll index bd90113e5230..b31bceb50df6 100644 --- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll +++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/non-const-n.ll b/llvm/test/Transforms/LoopVectorize/non-const-n.ll index 04c5c84a4f44..7727b0a2dce2 100644 --- a/llvm/test/Transforms/LoopVectorize/non-const-n.ll +++ b/llvm/test/Transforms/LoopVectorize/non-const-n.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/read-only.ll b/llvm/test/Transforms/LoopVectorize/read-only.ll index 4095ea68ef7a..b4d1bac132f0 100644 --- a/llvm/test/Transforms/LoopVectorize/read-only.ll +++ b/llvm/test/Transforms/LoopVectorize/read-only.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index 3e871b229b5c..ca51c7b37d5d 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopVectorize/scalar-select.ll b/llvm/test/Transforms/LoopVectorize/scalar-select.ll index 8d5b6fd8affa..e537bde31bb0 100644 --- a/llvm/test/Transforms/LoopVectorize/scalar-select.ll +++ b/llvm/test/Transforms/LoopVectorize/scalar-select.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0"