forked from OSchip/llvm-project
LoopVectorizer: When -Os is used, vectorize only loops that dont require a tail loop. There is no testcase because I dont know of a way to initialize the loop vectorizer pass without adding an additional hidden flag.
llvm-svn: 169950
This commit is contained in:
parent
12023e1ec3
commit
aeb17df802
|
@ -156,7 +156,7 @@ namespace {
|
||||||
(void) llvm::createCorrelatedValuePropagationPass();
|
(void) llvm::createCorrelatedValuePropagationPass();
|
||||||
(void) llvm::createMemDepPrinter();
|
(void) llvm::createMemDepPrinter();
|
||||||
(void) llvm::createInstructionSimplifierPass();
|
(void) llvm::createInstructionSimplifierPass();
|
||||||
(void) llvm::createLoopVectorizePass();
|
(void) llvm::createLoopVectorizePass(0);
|
||||||
(void) llvm::createBBVectorizePass();
|
(void) llvm::createBBVectorizePass();
|
||||||
|
|
||||||
(void)new llvm::IntervalPartition();
|
(void)new llvm::IntervalPartition();
|
||||||
|
|
|
@ -111,7 +111,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
|
||||||
//
|
//
|
||||||
// LoopVectorize - Create a loop vectorization pass.
|
// LoopVectorize - Create a loop vectorization pass.
|
||||||
//
|
//
|
||||||
Pass *createLoopVectorizePass();
|
Pass *createLoopVectorizePass(bool OptForSize);
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
/// @brief Vectorize the BasicBlock.
|
/// @brief Vectorize the BasicBlock.
|
||||||
|
|
|
@ -189,7 +189,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
|
||||||
MPM.add(createLoopDeletionPass()); // Delete dead loops
|
MPM.add(createLoopDeletionPass()); // Delete dead loops
|
||||||
|
|
||||||
if (LoopVectorize && OptLevel > 1)
|
if (LoopVectorize && OptLevel > 1)
|
||||||
MPM.add(createLoopVectorizePass());
|
MPM.add(createLoopVectorizePass(SizeLevel));
|
||||||
|
|
||||||
if (!DisableUnrollLoops)
|
if (!DisableUnrollLoops)
|
||||||
MPM.add(createLoopUnrollPass()); // Unroll small loops
|
MPM.add(createLoopUnrollPass()); // Unroll small loops
|
||||||
|
|
|
@ -51,9 +51,12 @@ namespace {
|
||||||
|
|
||||||
/// The LoopVectorize Pass.
|
/// The LoopVectorize Pass.
|
||||||
struct LoopVectorize : public LoopPass {
|
struct LoopVectorize : public LoopPass {
|
||||||
static char ID; // Pass identification, replacement for typeid
|
/// Pass identification, replacement for typeid
|
||||||
|
static char ID;
|
||||||
|
/// Optimize for size. Do not generate tail loops.
|
||||||
|
bool OptForSize;
|
||||||
|
|
||||||
LoopVectorize() : LoopPass(ID) {
|
explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) {
|
||||||
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
|
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,23 +88,17 @@ struct LoopVectorize : public LoopPass {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select the preffered vectorization factor.
|
// Select the preffered vectorization factor.
|
||||||
unsigned VF = 1;
|
const VectorTargetTransformInfo *VTTI = 0;
|
||||||
if (VectorizationFactor == 0) {
|
if (TTI)
|
||||||
const VectorTargetTransformInfo *VTTI = 0;
|
VTTI = TTI->getVectorTargetTransformInfo();
|
||||||
if (TTI)
|
// Use the cost model.
|
||||||
VTTI = TTI->getVectorTargetTransformInfo();
|
LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
|
||||||
// Use the cost model.
|
unsigned VF = CM.selectVectorizationFactor(OptForSize,
|
||||||
LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
|
VectorizationFactor);
|
||||||
VF = CM.findBestVectorizationFactor();
|
|
||||||
|
|
||||||
if (VF == 1) {
|
if (VF == 1) {
|
||||||
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
|
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// Use the user command flag.
|
|
||||||
VF = VectorizationFactor;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
|
DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
|
||||||
|
@ -1886,7 +1883,48 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
|
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
|
||||||
|
unsigned UserVF) {
|
||||||
|
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
|
||||||
|
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the trip count.
|
||||||
|
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
|
||||||
|
DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
|
||||||
|
|
||||||
|
unsigned VF = MaxVectorSize;
|
||||||
|
|
||||||
|
// If we optimize the program for size, avoid creating the tail loop.
|
||||||
|
if (OptForSize) {
|
||||||
|
// If we are unable to calculate the trip count then don't try to vectorize.
|
||||||
|
if (TC < 2) {
|
||||||
|
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the maximum SIMD width that can fit within the trip count.
|
||||||
|
VF = TC % MaxVectorSize;
|
||||||
|
|
||||||
|
if (VF == 0)
|
||||||
|
VF = MaxVectorSize;
|
||||||
|
|
||||||
|
// If the trip count that we found modulo the vectorization factor is not
|
||||||
|
// zero then we require a tail.
|
||||||
|
if (VF < 2) {
|
||||||
|
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (UserVF != 0) {
|
||||||
|
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
|
||||||
|
DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
|
||||||
|
|
||||||
|
return UserVF;
|
||||||
|
}
|
||||||
|
|
||||||
if (!VTTI) {
|
if (!VTTI) {
|
||||||
DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
|
DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -2121,8 +2159,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
||||||
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
|
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
Pass *createLoopVectorizePass() {
|
Pass *createLoopVectorizePass(bool OptForSize = false) {
|
||||||
return new LoopVectorize();
|
return new LoopVectorize(OptForSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -420,10 +420,11 @@ public:
|
||||||
const VectorTargetTransformInfo *Vtti):
|
const VectorTargetTransformInfo *Vtti):
|
||||||
TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
|
TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
|
||||||
|
|
||||||
/// Returns the most profitable vectorization factor for the loop that is
|
/// Returns the most profitable vectorization factor in powers of two.
|
||||||
/// smaller or equal to the VF argument. This method checks every power
|
/// This method checks every power of two up to VF. If UserVF is not ZERO
|
||||||
/// of two up to VF.
|
/// then this vectorization factor will be selected if vectorization is
|
||||||
unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
|
/// possible.
|
||||||
|
unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Returns the expected execution cost. The unit of the cost does
|
/// Returns the expected execution cost. The unit of the cost does
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//===-- Vectorize.cpp -----------------------------------------------------===//
|
//===-- Vectorize.cpp -----------------------------------------------------===//
|
||||||
//
|
//
|
||||||
// The LLVM Compiler Infrastructure
|
// The LLVM Compiler Infrastructure
|
||||||
//
|
//
|
||||||
|
@ -39,5 +39,5 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
|
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
|
||||||
unwrap(PM)->add(createLoopVectorizePass());
|
unwrap(PM)->add(createLoopVectorizePass(0));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue