Reformat.

llvm-svn: 229651
This commit is contained in:
NAKAMURA Takumi 2015-02-18 08:36:14 +00:00
parent fa520c5f49
commit a250484c4c
4 changed files with 91 additions and 103 deletions

View File

@ -56,8 +56,7 @@ public:
/// \brief Emit an analysis note with the debug location from the instruction
/// in \p Message if available. Otherwise use the location of \p TheLoop.
static void emitAnalysis(VectorizationReport &Message,
const Function *TheFunction,
const Loop *TheLoop);
const Function *TheFunction, const Loop *TheLoop);
};
/// \brief Drive the analysis of memory accesses in the loop
@ -90,14 +89,13 @@ public:
/// make more than this number of comparisons.
unsigned RuntimeMemoryCheckThreshold;
VectorizerParams(unsigned MaxVectorWidth,
unsigned VectorizationFactor,
VectorizerParams(unsigned MaxVectorWidth, unsigned VectorizationFactor,
unsigned VectorizationInterleave,
unsigned RuntimeMemoryCheckThreshold) :
MaxVectorWidth(MaxVectorWidth),
VectorizationFactor(VectorizationFactor),
VectorizationInterleave(VectorizationInterleave),
RuntimeMemoryCheckThreshold(RuntimeMemoryCheckThreshold) {}
unsigned RuntimeMemoryCheckThreshold)
: MaxVectorWidth(MaxVectorWidth),
VectorizationFactor(VectorizationFactor),
VectorizationInterleave(VectorizationInterleave),
RuntimeMemoryCheckThreshold(RuntimeMemoryCheckThreshold) {}
};
/// This struct holds information about the memory runtime legality check that
@ -144,10 +142,10 @@ public:
LoopAccessInfo(Function *F, Loop *L, ScalarEvolution *SE,
const DataLayout *DL, const TargetLibraryInfo *TLI,
AliasAnalysis *AA, DominatorTree *DT,
const VectorizerParams &VectParams) :
TheFunction(F), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U),
VectParams(VectParams) {}
const VectorizerParams &VectParams)
: TheFunction(F), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U),
VectParams(VectParams) {}
/// Return true we can analyze the memory accesses in the loop and there are
/// no memory dependence cycles. Replaces symbolic strides using Strides.

View File

@ -269,23 +269,23 @@ void initializeDataLayoutPassPass(PassRegistry &);
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &);
void initializeAssumptionCacheTrackerPass(PassRegistry &);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
void initializeScopedNoAliasAAPass(PassRegistry&);
void initializeUnifyFunctionExitNodesPass(PassRegistry&);
void initializeUnreachableBlockElimPass(PassRegistry&);
void initializeUnreachableMachineBlockElimPass(PassRegistry&);
void initializeVerifierLegacyPassPass(PassRegistry&);
void initializeVirtRegMapPass(PassRegistry&);
void initializeVirtRegRewriterPass(PassRegistry&);
void initializeInstSimplifierPass(PassRegistry&);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
void initializeLoopVectorizePass(PassRegistry&);
void initializeSLPVectorizerPass(PassRegistry&);
void initializeBBVectorizePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeStackMapLivenessPass(PassRegistry&);
void initializeTwoAddressInstructionPassPass(PassRegistry &);
void initializeTypeBasedAliasAnalysisPass(PassRegistry &);
void initializeScopedNoAliasAAPass(PassRegistry &);
void initializeUnifyFunctionExitNodesPass(PassRegistry &);
void initializeUnreachableBlockElimPass(PassRegistry &);
void initializeUnreachableMachineBlockElimPass(PassRegistry &);
void initializeVerifierLegacyPassPass(PassRegistry &);
void initializeVirtRegMapPass(PassRegistry &);
void initializeVirtRegRewriterPass(PassRegistry &);
void initializeInstSimplifierPass(PassRegistry &);
void initializeUnpackMachineBundlesPass(PassRegistry &);
void initializeFinalizeMachineBundlesPass(PassRegistry &);
void initializeLoopVectorizePass(PassRegistry &);
void initializeSLPVectorizerPass(PassRegistry &);
void initializeBBVectorizePass(PassRegistry &);
void initializeMachineFunctionPrinterPassPass(PassRegistry &);
void initializeStackMapLivenessPass(PassRegistry &);
void initializeMachineCombinerPass(PassRegistry &);
void initializeLoadCombinePass(PassRegistry&);
void initializeRewriteSymbolsPass(PassRegistry&);

View File

@ -302,7 +302,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
if (ASi != ASj) {
DEBUG(dbgs() << "LV: Runtime check would require comparison between"
" different address spaces\n");
" different address spaces\n");
return false;
}
}
@ -553,8 +553,8 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
// Make sure that the pointer does not point to aggregate types.
const PointerType *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
"\n");
DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr
<< "\n");
return 0;
}
@ -562,15 +562,15 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer " << *Ptr
<< " SCEV: " << *PtrScev << "\n");
return 0;
}
// The accesss function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
*Ptr << " SCEV: " << *PtrScev << "\n");
DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " << *Ptr
<< " SCEV: " << *PtrScev << "\n");
}
// The address calculation must not wrap. Otherwise, a dependence could be
@ -585,7 +585,7 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
<< *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
@ -595,8 +595,8 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
" SCEV: " << *PtrScev << "\n");
DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr
<< " SCEV: " << *PtrScev << "\n");
return 0;
}
@ -638,8 +638,9 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
// Store-load forwarding distance.
const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
// Maximum vector factor.
unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize;
if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
unsigned MaxVFWithoutSLForwardIssues =
VectParams.MaxVectorWidth * TypeByteSize;
if (MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
@ -650,14 +651,14 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
}
}
if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
DEBUG(dbgs() << "LV: Distance " << Distance <<
" that could cause a store-load forwarding conflict\n");
if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
DEBUG(dbgs() << "LV: Distance " << Distance
<< " that could cause a store-load forwarding conflict\n");
return true;
}
if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize)
MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth * TypeByteSize)
MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
return false;
}
@ -705,9 +706,9 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
<< "(Induction step: " << StrideAPtr << ")\n");
DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
<< *InstMap[BIdx] << ": " << *Dist << "\n");
// Need consecutive accesses. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
@ -754,18 +755,19 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Positive distance bigger than max vectorization factor.
if (ATy != BTy) {
DEBUG(dbgs() <<
"LV: ReadWrite-Write positive dependency with different types\n");
DEBUG(dbgs()
<< "LV: ReadWrite-Write positive dependency with different types\n");
return false;
}
unsigned Distance = (unsigned) Val.getZExtValue();
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectParams.VectorizationFactor ?
VectParams.VectorizationFactor : 1);
unsigned ForcedUnroll = (VectParams.VectorizationInterleave ?
VectParams.VectorizationInterleave : 1);
unsigned ForcedFactor =
(VectParams.VectorizationFactor ? VectParams.VectorizationFactor : 1);
unsigned ForcedUnroll =
(VectParams.VectorizationInterleave ? VectParams.VectorizationInterleave
: 1);
// The distance must be bigger than the size needed for a vectorized version
// of the operation and the size of the vectorized operation must not be
@ -774,7 +776,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2*TypeByteSize > MaxSafeDepDistBytes ||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
DEBUG(dbgs() << "LV: Failure because of Positive distance "
<< Val.getSExtValue() << '\n');
<< Val.getSExtValue() << '\n');
return true;
}
@ -786,8 +788,9 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
couldPreventStoreLoadForward(Distance, TypeByteSize))
return true;
DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue()
<< " with max VF = " << MaxSafeDepDistBytes / TypeByteSize
<< '\n');
return false;
}
@ -886,8 +889,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) {
emitAnalysis(VectorizationReport(it) <<
"instruction cannot be vectorized");
emitAnalysis(VectorizationReport(it)
<< "instruction cannot be vectorized");
return false;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
@ -953,9 +956,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
}
if (IsAnnotatedParallel) {
DEBUG(dbgs()
<< "LV: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
DEBUG(dbgs() << "LV: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
return true;
}
@ -1007,8 +1009,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
Strides);
DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
" pointer comparisons.\n");
DEBUG(dbgs() << "LV: We need to do " << NumComparisons
<< " pointer comparisons.\n");
// If we only have one set of dependences to check pointers among we don't
// need a runtime check.
@ -1028,8 +1030,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
if (NeedRTCheck && !CanDoRT) {
emitAnalysis(VectorizationReport() << "cannot identify array bounds");
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
DEBUG(dbgs() << "LV: We can't vectorize because we can't find "
<< "the array bounds.\n");
PtrRtCheck.reset();
return false;
}
@ -1076,11 +1078,11 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
}
if (!CanVecMem)
emitAnalysis(VectorizationReport() <<
"unsafe dependent memory operations in loop");
emitAnalysis(VectorizationReport()
<< "unsafe dependent memory operations in loop");
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't")
<< " need a runtime memory check.\n");
return CanVecMem;
}
@ -1132,8 +1134,8 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) {
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
*Ptr <<"\n");
DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" << *Ptr
<< "\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {

View File

@ -107,13 +107,13 @@ STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
static cl::opt<unsigned>
VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
cl::desc("Sets the vectorization interleave count. "
"Zero is autoselect."));
VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
cl::desc("Sets the vectorization interleave count. "
"Zero is autoselect."));
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
@ -548,9 +548,8 @@ public:
DominatorTree *DT, TargetLibraryInfo *TLI,
AliasAnalysis *AA, Function *F,
const TargetTransformInfo *TTI)
: NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), Induction(nullptr),
WidestIndTy(nullptr),
: NumPredStores(0), TheLoop(L), SE(SE), DL(DL), TLI(TLI), TheFunction(F),
TTI(TTI), DT(DT), Induction(nullptr), WidestIndTy(nullptr),
LAI(F, L, SE, DL, TLI, AA, DT,
LoopAccessInfo::VectorizerParams(
MaxVectorWidth, VectorizationFactor, VectorizationInterleave,
@ -744,9 +743,7 @@ public:
return LAI.getRuntimePointerCheck();
}
LoopAccessInfo *getLAI() {
return &LAI;
}
LoopAccessInfo *getLAI() { return &LAI; }
/// This function returns the identity element (or neutral element) for
/// the operation K.
@ -773,18 +770,11 @@ public:
}
/// Returns true if vector representation of the instruction \p I
/// requires mask.
bool isMaskRequired(const Instruction* I) {
return (MaskedOp.count(I) != 0);
}
unsigned getNumStores() const {
return LAI.getNumStores();
}
unsigned getNumLoads() const {
return LAI.getNumLoads();
}
unsigned getNumPredStores() const {
return NumPredStores;
}
bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
unsigned getNumStores() const { return LAI.getNumStores(); }
unsigned getNumLoads() const { return LAI.getNumLoads(); }
unsigned getNumPredStores() const { return NumPredStores; }
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@ -875,7 +865,7 @@ private:
SmallPtrSet<Value*, 4> AllowedExit;
/// This set holds the variables which are known to be uniform after
/// vectorization.
SmallPtrSet<Instruction*, 4> Uniforms;
SmallPtrSet<Instruction *, 4> Uniforms;
LoopAccessInfo LAI;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
@ -1659,9 +1649,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0;
}
bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI.isUniform(V);
}
bool LoopVectorizationLegality::isUniform(Value *V) { return LAI.isUniform(V); }
InnerLoopVectorizer::VectorParts&
InnerLoopVectorizer::getVectorValue(Value *V) {
@ -3399,10 +3387,10 @@ bool LoopVectorizationLegality::canVectorize() {
// Collect all of the variables that remain uniform after vectorization.
collectLoopUniforms();
DEBUG(dbgs() << "LV: We can vectorize this loop" <<
(LAI.getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
"")
<<"!\n");
DEBUG(dbgs() << "LV: We can vectorize this loop"
<< (LAI.getRuntimePointerCheck()->Need
? " (with a runtime bound check)"
: "") << "!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with