diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index bd0f880340f5..941b50a2b256 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -16,13 +16,11 @@ #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H #include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -36,52 +34,30 @@ class SCEV; /// Optimization analysis message produced during vectorization. Messages inform /// the user why vectorization did not occur. -class LoopAccessReport { +class VectorizationReport { std::string Message; - const Instruction *Instr; - -protected: - LoopAccessReport(const Twine &Message, const Instruction *I) - : Message(Message.str()), Instr(I) {} + Instruction *Instr; public: - LoopAccessReport(const Instruction *I = nullptr) : Instr(I) {} + VectorizationReport(Instruction *I = nullptr) + : Message("loop not vectorized: "), Instr(I) {} - template LoopAccessReport &operator<<(const A &Value) { + template VectorizationReport &operator<<(const A &Value) { raw_string_ostream Out(Message); Out << Value; return *this; } - const Instruction *getInstr() const { return Instr; } + Instruction *getInstr() { return Instr; } std::string &str() { return Message; } - const std::string &str() const { return Message; } operator Twine() { return Message; } - /// \brief Emit an analysis note for \p PassName with the debug location from - /// the instruction in \p Message if available. Otherwise use the location of - /// \p TheLoop. - static void emitAnalysis(const LoopAccessReport &Message, + /// \brief Emit an analysis note with the debug location from the instruction + /// in \p Message if available. Otherwise use the location of \p TheLoop. + static void emitAnalysis(VectorizationReport &Message, const Function *TheFunction, - const Loop *TheLoop, - const char *PassName); -}; - -/// \brief Collection of parameters shared beetween the Loop Vectorizer and the -/// Loop Access Analysis. -struct VectorizerParams { - /// \brief Maximum SIMD width. - static const unsigned MaxVectorWidth; - - /// \brief VF as overridden by the user. - static unsigned VectorizationFactor; - /// \brief Interleave factor as overridden by the user. - static unsigned VectorizationInterleave; - - /// \\brief When performing memory disambiguation checks at runtime do not - /// make more than this number of comparisons. - static const unsigned RuntimeMemoryCheckThreshold; + const Loop *TheLoop); }; /// \brief Drive the analysis of memory accesses in the loop @@ -100,6 +76,30 @@ struct VectorizerParams { /// RuntimePointerCheck class. class LoopAccessInfo { public: + /// \brief Collection of parameters used from the vectorizer. + struct VectorizerParams { + /// \brief Maximum simd width. + unsigned MaxVectorWidth; + + /// \brief VF as overridden by the user. + unsigned VectorizationFactor; + /// \brief Interleave factor as overridden by the user. + unsigned VectorizationInterleave; + + /// \\brief When performing memory disambiguation checks at runtime do not + /// make more than this number of comparisons. + unsigned RuntimeMemoryCheckThreshold; + + VectorizerParams(unsigned MaxVectorWidth, + unsigned VectorizationFactor, + unsigned VectorizationInterleave, + unsigned RuntimeMemoryCheckThreshold) : + MaxVectorWidth(MaxVectorWidth), + VectorizationFactor(VectorizationFactor), + VectorizationInterleave(VectorizationInterleave), + RuntimeMemoryCheckThreshold(RuntimeMemoryCheckThreshold) {} + }; + /// This struct holds information about the memory runtime legality check that /// a group of pointers do not overlap. struct RuntimePointerCheck { @@ -120,16 +120,10 @@ public: void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides); - /// \brief No run-time memory checking is necessary. - bool empty() const { return Pointers.empty(); } - /// \brief Decide whether we need to issue a run-time check for pointer at /// index \p I and \p J to prove their independence. bool needsChecking(unsigned I, unsigned J) const; - /// \brief Print the list run-time memory checks necessary. - void print(raw_ostream &OS, unsigned Depth = 0) const; - /// This flag indicates if we need to add the runtime check. bool Need; /// Holds the pointers that we need to check. @@ -147,17 +141,19 @@ public: SmallVector AliasSetId; }; - LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout *DL, - const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, ValueToValueMap &Strides); + LoopAccessInfo(Function *F, Loop *L, ScalarEvolution *SE, + const DataLayout *DL, const TargetLibraryInfo *TLI, + AliasAnalysis *AA, DominatorTree *DT, + const VectorizerParams &VectParams) : + TheFunction(F), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), + NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), + VectParams(VectParams) {} /// Return true we can analyze the memory accesses in the loop and there are - /// no memory dependence cycles. - bool canVectorizeMemory() const { return CanVecMem; } + /// no memory dependence cycles. Replaces symbolic strides using Strides. + bool canVectorizeMemory(ValueToValueMap &Strides); - const RuntimePointerCheck *getRuntimePointerCheck() const { - return &PtrRtCheck; - } + RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; } /// Return true if the block BB needs to be predicated in order for the loop /// to be vectorized. @@ -165,7 +161,7 @@ public: DominatorTree *DT); /// Returns true if the value V is uniform within the loop. - bool isUniform(Value *V) const; + bool isUniform(Value *V); unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } unsigned getNumStores() const { return NumStores; } @@ -176,34 +172,15 @@ public: /// Returns a pair of instructions where the first element is the first /// instruction generated in possibly a sequence of instructions and the /// second value is the final comparator value or NULL if no check is needed. - std::pair - addRuntimeCheck(Instruction *Loc) const; - - /// \brief The diagnostics report generated for the analysis. E.g. why we - /// couldn't analyze the loop. - const Optional &getReport() const { return Report; } - - /// \brief Print the information about the memory accesses in the loop. - void print(raw_ostream &OS, unsigned Depth = 0) const; - - /// \brief Used to ensure that if the analysis was run with speculating the - /// value of symbolic strides, the client queries it with the same assumption. - /// Only used in DEBUG build but we don't want NDEBUG-depedent ABI. - unsigned NumSymbolicStrides; + std::pair addRuntimeCheck(Instruction *Loc); private: - /// \brief Analyze the loop. Substitute symbolic strides using Strides. - void analyzeLoop(ValueToValueMap &Strides); - - /// \brief Check if the structure of the loop allows it to be analyzed by this - /// pass. - bool canAnalyzeLoop(); - - void emitAnalysis(LoopAccessReport &Message); + void emitAnalysis(VectorizationReport &Message); /// We need to check that all of the pointers in this list are disjoint /// at runtime. RuntimePointerCheck PtrRtCheck; + Function *TheFunction; Loop *TheLoop; ScalarEvolution *SE; const DataLayout *DL; @@ -216,12 +193,8 @@ private: unsigned MaxSafeDepDistBytes; - /// \brief Cache the result of analyzeLoop. - bool CanVecMem; - - /// \brief The diagnostics report generated for the analysis. E.g. why we - /// couldn't analyze the loop. - Optional Report; + /// \brief Vectorizer parameters used by the analysis. + VectorizerParams VectParams; }; Value *stripIntegerCast(Value *V); @@ -236,52 +209,6 @@ const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr = nullptr); -/// \brief This analysis provides dependence information for the memory accesses -/// of a loop. -/// -/// It runs the analysis for a loop on demand. This can be initiated by -/// querying the loop access info via LAA::getInfo. getInfo return a -/// LoopAccessInfo object. See this class for the specifics of what information -/// is provided. -class LoopAccessAnalysis : public FunctionPass { -public: - static char ID; - - LoopAccessAnalysis() : FunctionPass(ID) { - initializeLoopAccessAnalysisPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// \brief Query the result of the loop access information for the loop \p L. - /// - /// If the client speculates (and then issues run-time checks) for the values - /// of symbolic strides, \p Strides provides the mapping (see - /// replaceSymbolicStrideSCEV). If there is no cached result available run - /// the analysis. - const LoopAccessInfo &getInfo(Loop *L, ValueToValueMap &Strides); - - void releaseMemory() override { - // Invalidate the cache when the pass is freed. - LoopAccessInfoMap.clear(); - } - - /// \brief Print the result of the analysis when invoked with -analyze. - void print(raw_ostream &OS, const Module *M = nullptr) const override; - -private: - /// \brief The cache. - DenseMap> LoopAccessInfoMap; - - // The used analysis passes. - ScalarEvolution *SE; - const DataLayout *DL; - const TargetLibraryInfo *TLI; - AliasAnalysis *AA; - DominatorTree *DT; -}; } // End llvm namespace #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 3c69399aecf9..363937314861 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -281,7 +281,6 @@ void initializeVirtRegRewriterPass(PassRegistry&); void initializeInstSimplifierPass(PassRegistry&); void initializeUnpackMachineBundlesPass(PassRegistry&); void initializeFinalizeMachineBundlesPass(PassRegistry&); -void initializeLoopAccessAnalysisPass(PassRegistry&); void initializeLoopVectorizePass(PassRegistry&); void initializeSLPVectorizerPass(PassRegistry&); void initializeBBVectorizePass(PassRegistry&); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index a0e88370bc2d..35c5807be085 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -23,16 +23,15 @@ #include "llvm/Transforms/Utils/VectorUtils.h" using namespace llvm; -#define DEBUG_TYPE "loop-accesses" +#define DEBUG_TYPE "loop-vectorize" -void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, - const Function *TheFunction, - const Loop *TheLoop, - const char *PassName) { +void VectorizationReport::emitAnalysis(VectorizationReport &Message, + const Function *TheFunction, + const Loop *TheLoop) { DebugLoc DL = TheLoop->getStartLoc(); - if (const Instruction *I = Message.getInstr()) + if (Instruction *I = Message.getInstr()) DL = I->getDebugLoc(); - emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName, + emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE, *TheFunction, DL, Message.str()); } @@ -65,7 +64,7 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, const SCEV *ByOne = SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne << "\n"); return ByOne; } @@ -110,23 +109,6 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I, return true; } -void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS, - unsigned Depth) const { - unsigned NumPointers = Pointers.size(); - if (NumPointers == 0) - return; - - OS.indent(Depth) << "Run-time memory checks:\n"; - unsigned N = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J)) { - OS.indent(Depth) << N++ << ":\n"; - OS.indent(Depth + 2) << *Pointers[I] << "\n"; - OS.indent(Depth + 2) << *Pointers[J] << "\n"; - } -} - namespace { /// \brief Analyses memory accesses in a loop. /// @@ -282,7 +264,7 @@ bool AccessAnalysis::canCheckPtrAtRT( RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); - DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n'); } else { CanDoRT = false; } @@ -319,7 +301,7 @@ bool AccessAnalysis::canCheckPtrAtRT( unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); if (ASi != ASj) { - DEBUG(dbgs() << "LAA: Runtime check would require comparison between" + DEBUG(dbgs() << "LV: Runtime check would require comparison between" " different address spaces\n"); return false; } @@ -334,9 +316,9 @@ void AccessAnalysis::processMemAccesses() { // process read-only pointers. This allows us to skip dependence tests for // read-only pointers. - DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); + DEBUG(dbgs() << "LV: Processing memory accesses...\n"); DEBUG(dbgs() << " AST: "; AST.dump()); - DEBUG(dbgs() << "LAA: Accesses:\n"); + DEBUG(dbgs() << "LV: Accesses:\n"); DEBUG({ for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << @@ -472,9 +454,10 @@ public: typedef PointerIntPair MemAccessInfo; typedef SmallPtrSet MemAccessInfoSet; - MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L) + MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L, + const LoopAccessInfo::VectorizerParams &VectParams) : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0), - ShouldRetryWithRuntimeCheck(false) {} + ShouldRetryWithRuntimeCheck(false), VectParams(VectParams) {} /// \brief Register the location (instructions are given increasing numbers) /// of a write access. @@ -529,6 +512,9 @@ private: /// vectorize this loop with runtime checks. bool ShouldRetryWithRuntimeCheck; + /// \brief Vectorizer parameters used by the analysis. + LoopAccessInfo::VectorizerParams VectParams; + /// \brief Check whether there is a plausible dependence between the two /// accesses. /// @@ -567,8 +553,8 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, // Make sure that the pointer does not point to aggregate types. const PointerType *PtrTy = cast(Ty); if (PtrTy->getElementType()->isAggregateType()) { - DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" - << *Ptr << "\n"); + DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr << + "\n"); return 0; } @@ -576,14 +562,14 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { - DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " + DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer " << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // The accesss function must stride over the innermost loop. if (Lp != AR->getLoop()) { - DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << + DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *PtrScev << "\n"); } @@ -598,7 +584,7 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { - DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " + DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space " << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } @@ -609,7 +595,7 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast(Step); if (!C) { - DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << + DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } @@ -652,8 +638,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, // Store-load forwarding distance. const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize; // Maximum vector factor. - unsigned MaxVFWithoutSLForwardIssues = - VectorizerParams::MaxVectorWidth * TypeByteSize; + unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize; if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues) MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes; @@ -666,14 +651,13 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, } if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) { - DEBUG(dbgs() << "LAA: Distance " << Distance << + DEBUG(dbgs() << "LV: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && - MaxVFWithoutSLForwardIssues != - VectorizerParams::MaxVectorWidth * TypeByteSize) + MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize) MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; return false; } @@ -720,9 +704,9 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const SCEV *Dist = SE->getMinusSCEV(Sink, Src); - DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink + DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink << "(Induction step: " << StrideAPtr << ")\n"); - DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " + DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need consecutive accesses. We don't want to vectorize @@ -735,7 +719,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const SCEVConstant *C = dyn_cast(Dist); if (!C) { - DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); + DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; return true; } @@ -753,7 +737,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, ATy != BTy)) return true; - DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); + DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n"); return false; } @@ -762,7 +746,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (Val == 0) { if (ATy == BTy) return false; - DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); + DEBUG(dbgs() << "LV: Zero dependence difference but different types\n"); return true; } @@ -771,17 +755,17 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Positive distance bigger than max vectorization factor. if (ATy != BTy) { DEBUG(dbgs() << - "LAA: ReadWrite-Write positive dependency with different types\n"); + "LV: ReadWrite-Write positive dependency with different types\n"); return false; } unsigned Distance = (unsigned) Val.getZExtValue(); // Bail out early if passed-in parameters make vectorization not feasible. - unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? - VectorizerParams::VectorizationFactor : 1); - unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? - VectorizerParams::VectorizationInterleave : 1); + unsigned ForcedFactor = (VectParams.VectorizationFactor ? + VectParams.VectorizationFactor : 1); + unsigned ForcedUnroll = (VectParams.VectorizationInterleave ? + VectParams.VectorizationInterleave : 1); // The distance must be bigger than the size needed for a vectorized version // of the operation and the size of the vectorized operation must not be @@ -789,7 +773,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (Distance < 2*TypeByteSize || 2*TypeByteSize > MaxSafeDepDistBytes || Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { - DEBUG(dbgs() << "LAA: Failure because of Positive distance " + DEBUG(dbgs() << "LV: Failure because of Positive distance " << Val.getSExtValue() << '\n'); return true; } @@ -802,7 +786,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, couldPreventStoreLoadForward(Distance, TypeByteSize)) return true; - DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << + DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); return false; @@ -847,56 +831,7 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, return true; } -bool LoopAccessInfo::canAnalyzeLoop() { - // We can only analyze innermost loops. - if (!TheLoop->empty()) { - emitAnalysis(LoopAccessReport() << "loop is not the innermost loop"); - return false; - } - - // We must have a single backedge. - if (TheLoop->getNumBackEdges() != 1) { - emitAnalysis( - LoopAccessReport() << - "loop control flow is not understood by analyzer"); - return false; - } - - // We must have a single exiting block. - if (!TheLoop->getExitingBlock()) { - emitAnalysis( - LoopAccessReport() << - "loop control flow is not understood by analyzer"); - return false; - } - - // We only handle bottom-tested loops, i.e. loop in which the condition is - // checked at the end of each iteration. With that we can assume that all - // instructions in the loop are executed the same number of times. - if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - emitAnalysis( - LoopAccessReport() << - "loop control flow is not understood by analyzer"); - return false; - } - - // We need to have a loop header. - DEBUG(dbgs() << "LAA: Found a loop: " << - TheLoop->getHeader()->getName() << '\n'); - - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() << - "could not determine number of loop iterations"); - DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); - return false; - } - - return true; -} - -void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { +bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) { typedef SmallVector ValueVector; typedef SmallPtrSet ValueSet; @@ -913,7 +848,7 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { PtrRtCheck.Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); - MemoryDepChecker DepChecker(SE, DL, TheLoop); + MemoryDepChecker DepChecker(SE, DL, TheLoop, VectParams); // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), @@ -936,11 +871,10 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { LoadInst *Ld = dyn_cast(it); if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { - emitAnalysis(LoopAccessReport(Ld) + emitAnalysis(VectorizationReport(Ld) << "read with atomic ordering or volatile read"); - DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); - CanVecMem = false; - return; + DEBUG(dbgs() << "LV: Found a non-simple load.\n"); + return false; } NumLoads++; Loads.push_back(Ld); @@ -952,17 +886,15 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast(it); if (!St) { - emitAnalysis(LoopAccessReport(it) << + emitAnalysis(VectorizationReport(it) << "instruction cannot be vectorized"); - CanVecMem = false; - return; + return false; } if (!St->isSimple() && !IsAnnotatedParallel) { - emitAnalysis(LoopAccessReport(St) + emitAnalysis(VectorizationReport(St) << "write with atomic ordering or volatile write"); - DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); - CanVecMem = false; - return; + DEBUG(dbgs() << "LV: Found a non-simple store.\n"); + return false; } NumStores++; Stores.push_back(St); @@ -977,9 +909,8 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { // Check if we see any stores. If there are no stores, then we don't // care if the pointers are *restrict*. if (!Stores.size()) { - DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); - CanVecMem = true; - return; + DEBUG(dbgs() << "LV: Found a read-only loop!\n"); + return true; } AccessAnalysis::DepCandidates DependentAccesses; @@ -999,11 +930,10 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { if (isUniform(Ptr)) { emitAnalysis( - LoopAccessReport(ST) + VectorizationReport(ST) << "write to a loop invariant address could not be vectorized"); - DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n"); - CanVecMem = false; - return; + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; } // If we did *not* see this pointer before, insert it to the read-write @@ -1024,10 +954,9 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { if (IsAnnotatedParallel) { DEBUG(dbgs() - << "LAA: A loop annotated parallel, ignore memory dependency " + << "LV: A loop annotated parallel, ignore memory dependency " << "checks.\n"); - CanVecMem = true; - return; + return true; } for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { @@ -1061,9 +990,8 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { // If we write (or read-write) to a single destination and there are no // other reads in this loop then is it safe to vectorize. if (NumReadWrites == 1 && NumReads == 0) { - DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); - CanVecMem = true; - return; + DEBUG(dbgs() << "LV: Found a write-only loop!\n"); + return true; } // Build dependence sets and check whether we need a runtime pointer bounds @@ -1079,7 +1007,7 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, Strides); - DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << + DEBUG(dbgs() << "LV: We need to do " << NumComparisons << " pointer comparisons.\n"); // If we only have one set of dependences to check pointers among we don't @@ -1089,36 +1017,34 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { // Check that we did not collect too many pointers or found an unsizeable // pointer. - if (!CanDoRT || - NumComparisons > VectorizerParams::RuntimeMemoryCheckThreshold) { + if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) { PtrRtCheck.reset(); CanDoRT = false; } if (CanDoRT) { - DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); + DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n"); } if (NeedRTCheck && !CanDoRT) { - emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); - DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << + emitAnalysis(VectorizationReport() << "cannot identify array bounds"); + DEBUG(dbgs() << "LV: We can't vectorize because we can't find " << "the array bounds.\n"); PtrRtCheck.reset(); - CanVecMem = false; - return; + return false; } PtrRtCheck.Need = NeedRTCheck; - CanVecMem = true; + bool CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { - DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); + DEBUG(dbgs() << "LV: Checking memory dependencies\n"); CanVecMem = DepChecker.areDepsSafe( DependentAccesses, Accesses.getDependenciesToCheck(), Strides); MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes(); if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { - DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); + DEBUG(dbgs() << "LV: Retrying with memory checks\n"); NeedRTCheck = true; // Clear the dependency checks. We assume they are not needed. @@ -1131,20 +1057,18 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { TheLoop, Strides, true); // Check that we did not collect too many pointers or found an unsizeable // pointer. - if (!CanDoRT || - NumComparisons > VectorizerParams::RuntimeMemoryCheckThreshold) { + if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) { if (!CanDoRT && NumComparisons > 0) - emitAnalysis(LoopAccessReport() + emitAnalysis(VectorizationReport() << "cannot check memory dependencies at runtime"); else - emitAnalysis(LoopAccessReport() + emitAnalysis(VectorizationReport() << NumComparisons << " exceeds limit of " - << VectorizerParams::RuntimeMemoryCheckThreshold + << VectParams.RuntimeMemoryCheckThreshold << " dependent memory operations checked at runtime"); - DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); + DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n"); PtrRtCheck.reset(); - CanVecMem = false; - return; + return false; } CanVecMem = true; @@ -1152,11 +1076,13 @@ void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) { } if (!CanVecMem) - emitAnalysis(LoopAccessReport() << + emitAnalysis(VectorizationReport() << "unsafe dependent memory operations in loop"); - DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") << + DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") << " need a runtime memory check.\n"); + + return CanVecMem; } bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, @@ -1168,12 +1094,11 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, return !DT->dominates(BB, Latch); } -void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { - assert(!Report && "Multiple report generated"); - Report = Message; +void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) { + VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop); } -bool LoopAccessInfo::isUniform(Value *V) const { +bool LoopAccessInfo::isUniform(Value *V) { return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); } @@ -1189,7 +1114,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, } std::pair -LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { +LoopAccessInfo::addRuntimeCheck(Instruction *Loc) { Instruction *tnullptr = nullptr; if (!PtrRtCheck.Need) return std::pair(tnullptr, tnullptr); @@ -1207,12 +1132,12 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { const SCEV *Sc = SE->getSCEV(Ptr); if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << + DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" << *Ptr <<"\n"); Starts.push_back(Ptr); Ends.push_back(Ptr); } else { - DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); + DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n'); unsigned AS = Ptr->getType()->getPointerAddressSpace(); // Use this type for pointer arithmetic. @@ -1272,100 +1197,3 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const { FirstInst = getFirstInst(FirstInst, Check, Loc); return std::make_pair(FirstInst, Check); } - -LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, - const DataLayout *DL, - const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, ValueToValueMap &Strides) - : TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0), - NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) { - if (canAnalyzeLoop()) - analyzeLoop(Strides); -} - -void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { - if (CanVecMem) { - if (PtrRtCheck.empty()) - OS.indent(Depth) << "Memory dependences are safe\n"; - else - OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; - } - - if (Report) - OS.indent(Depth) << "Report: " << Report->str() << "\n"; - - // FIXME: Print unsafe dependences - - // List the pair of accesses need run-time checks to prove independence. - PtrRtCheck.print(OS, Depth); - OS << "\n"; -} - -const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L, - ValueToValueMap &Strides) { - auto &LAI = LoopAccessInfoMap[L]; - -#ifndef NDEBUG - assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) && - "Symbolic strides changed for loop"); -#endif - - if (!LAI) { - LAI = llvm::make_unique(L, SE, DL, TLI, AA, DT, Strides); -#ifndef NDEBUG - LAI->NumSymbolicStrides = Strides.size(); -#endif - } - return *LAI.get(); -} - -void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { - LoopAccessAnalysis &LAA = *const_cast(this); - - LoopInfo *LI = &getAnalysis().getLoopInfo(); - ValueToValueMap NoSymbolicStrides; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) { - OS.indent(2) << L->getHeader()->getName() << ":\n"; - auto &LAI = LAA.getInfo(L, NoSymbolicStrides); - LAI.print(OS, 4); - } -} - -bool LoopAccessAnalysis::runOnFunction(Function &F) { - SE = &getAnalysis(); - DL = F.getParent()->getDataLayout(); - auto *TLIP = getAnalysisIfAvailable(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis(); - DT = &getAnalysis().getDomTree(); - - return false; -} - -void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - - AU.setPreservesAll(); -} - -char LoopAccessAnalysis::ID = 0; -static const char laa_name[] = "Loop Access Analysis"; -#define LAA_NAME "loop-accesses" - -INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) - -namespace llvm { - Pass *createLAAPass() { - return new LoopAccessAnalysis(); - } -} diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index c8a7a2c27b04..f0728dccd24f 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -46,7 +46,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); initializeLoopDeletionPass(Registry); - initializeLoopAccessAnalysisPass(Registry); initializeLoopInstSimplifyPass(Registry); initializeLoopRotatePass(Registry); initializeLoopStrengthReducePass(Registry); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d75eead0683a..1ddb45e5df6d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -106,19 +106,14 @@ using namespace llvm::PatternMatch; STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); -static cl::opt -VectorizationFactor("force-vector-width", cl::Hidden, - cl::desc("Sets the SIMD width. Zero is autoselect."), - cl::location(VectorizerParams::VectorizationFactor)); -unsigned VectorizerParams::VectorizationFactor = 0; +static cl::opt +VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, + cl::desc("Sets the SIMD width. Zero is autoselect.")); -static cl::opt -VectorizationInterleave("force-vector-interleave", cl::Hidden, - cl::desc("Sets the vectorization interleave count. " - "Zero is autoselect."), - cl::location( - VectorizerParams::VectorizationInterleave)); -unsigned VectorizerParams::VectorizationInterleave = 0; +static cl::opt +VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden, + cl::desc("Sets the vectorization interleave count. " + "Zero is autoselect.")); static cl::opt EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, @@ -152,10 +147,10 @@ static const unsigned TinyTripCountUnrollThreshold = 128; /// When performing memory disambiguation checks at runtime do not make more /// than this number of comparisons. -const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8; +static const unsigned RuntimeMemoryCheckThreshold = 8; /// Maximum simd width. -const unsigned VectorizerParams::MaxVectorWidth = 64; +static const unsigned MaxVectorWidth = 64; static cl::opt ForceTargetNumScalarRegs( "force-target-num-scalar-regs", cl::init(0), cl::Hidden, @@ -224,21 +219,6 @@ class LoopVectorizationLegality; class LoopVectorizationCostModel; class LoopVectorizeHints; -/// \brief This modifies LoopAccessReport to initialize message with -/// loop-vectorizer-specific part. -class VectorizationReport : public LoopAccessReport { -public: - VectorizationReport(Instruction *I = nullptr) - : LoopAccessReport("loop not vectorized: ", I) {} - - /// \brief This allows promotion of the loop-access analysis report into the - /// loop-vectorizer report. It modifies the message to add the - /// loop-vectorizer-specific part of the message. - explicit VectorizationReport(const LoopAccessReport &R) - : LoopAccessReport(Twine("loop not vectorized: ") + R.str(), - R.getInstr()) {} -}; - /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -567,11 +547,15 @@ public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, DominatorTree *DT, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, - const TargetTransformInfo *TTI, - LoopAccessAnalysis *LAA) + const TargetTransformInfo *TTI) : NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), - Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} + TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), Induction(nullptr), + WidestIndTy(nullptr), + LAI(F, L, SE, DL, TLI, AA, DT, + LoopAccessInfo::VectorizerParams( + MaxVectorWidth, VectorizationFactor, VectorizationInterleave, + RuntimeMemoryCheckThreshold)), + HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -756,19 +740,19 @@ public: bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); } /// Returns the information that we collected about runtime memory check. - const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const { - return LAI->getRuntimePointerCheck(); + LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() { + return LAI.getRuntimePointerCheck(); } - const LoopAccessInfo *getLAI() const { - return LAI; + LoopAccessInfo *getLAI() { + return &LAI; } /// This function returns the identity element (or neutral element) for /// the operation K. static Constant *getReductionIdentity(ReductionKind K, Type *Tp); - unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } + unsigned getMaxSafeDepDistBytes() { return LAI.getMaxSafeDepDistBytes(); } bool hasStride(Value *V) { return StrideSet.count(V); } bool mustCheckStrides() { return !StrideSet.empty(); } @@ -793,10 +777,10 @@ public: return (MaskedOp.count(I) != 0); } unsigned getNumStores() const { - return LAI->getNumStores(); + return LAI.getNumStores(); } unsigned getNumLoads() const { - return LAI->getNumLoads(); + return LAI.getNumLoads(); } unsigned getNumPredStores() const { return NumPredStores; @@ -850,11 +834,9 @@ private: void collectStridedAccess(Value *LoadOrStoreInst); /// Report an analysis message to assist the user in diagnosing loops that are - /// not vectorized. These are handled as LoopAccessReport rather than - /// VectorizationReport because the << operator of VectorizationReport returns - /// LoopAccessReport. - void emitAnalysis(const LoopAccessReport &Message) { - LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME); + /// not vectorized. + void emitAnalysis(VectorizationReport &Message) { + VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop); } unsigned NumPredStores; @@ -873,11 +855,6 @@ private: const TargetTransformInfo *TTI; /// Dominator Tree. DominatorTree *DT; - // LoopAccess analysis. - LoopAccessAnalysis *LAA; - // And the loop-accesses info corresponding to this loop. This pointer is - // null until canVectorizeMemory sets it up. - const LoopAccessInfo *LAI; // --- vectorization state --- // @@ -899,7 +876,7 @@ private: /// This set holds the variables which are known to be uniform after /// vectorization. SmallPtrSet Uniforms; - + LoopAccessInfo LAI; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; @@ -989,11 +966,9 @@ private: bool isConsecutiveLoadOrStore(Instruction *I); /// Report an analysis message to assist the user in diagnosing loops that are - /// not vectorized. These are handled as LoopAccessReport rather than - /// VectorizationReport because the << operator of VectorizationReport returns - /// LoopAccessReport. - void emitAnalysis(const LoopAccessReport &Message) { - LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME); + /// not vectorized. + void emitAnalysis(VectorizationReport &Message) { + VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop); } /// Values used only by @llvm.assume calls. @@ -1046,7 +1021,7 @@ class LoopVectorizeHints { bool validate(unsigned Val) { switch (Kind) { case HK_WIDTH: - return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; + return isPowerOf2_32(Val) && Val <= MaxVectorWidth; case HK_UNROLL: return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; case HK_FORCE: @@ -1282,7 +1257,6 @@ struct LoopVectorize : public FunctionPass { TargetLibraryInfo *TLI; AliasAnalysis *AA; AssumptionCache *AC; - LoopAccessAnalysis *LAA; bool DisableUnrolling; bool AlwaysVectorize; @@ -1300,7 +1274,6 @@ struct LoopVectorize : public FunctionPass { TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &getAnalysis(); AC = &getAnalysis().getAssumptionCache(F); - LAA = &getAnalysis(); // Compute some weights outside of the loop over the loops. Compute this // using a BranchProbability to re-use its scaling math. @@ -1411,7 +1384,7 @@ struct LoopVectorize : public FunctionPass { } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA); + LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1516,7 +1489,6 @@ struct LoopVectorize : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -1688,7 +1660,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { } bool LoopVectorizationLegality::isUniform(Value *V) { - return LAI->isUniform(V); + return LAI.isUniform(V); } InnerLoopVectorizer::VectorParts& @@ -3428,7 +3400,7 @@ bool LoopVectorizationLegality::canVectorize() { collectLoopUniforms(); DEBUG(dbgs() << "LV: We can vectorize this loop" << - (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" : + (LAI.getRuntimePointerCheck()->Need ? " (with a runtime bound check)" : "") <<"!\n"); @@ -3853,11 +3825,7 @@ void LoopVectorizationLegality::collectLoopUniforms() { } bool LoopVectorizationLegality::canVectorizeMemory() { - LAI = &LAA->getInfo(TheLoop, Strides); - auto &OptionalReport = LAI->getReport(); - if (OptionalReport) - emitAnalysis(VectorizationReport(*OptionalReport)); - return LAI->canVectorizeMemory(); + return LAI.canVectorizeMemory(Strides); } static bool hasMultipleUsesOf(Instruction *I, @@ -5032,7 +5000,6 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll deleted file mode 100644 index 62291d55b4c1..000000000000 --- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s - -; FIXME: This is the non-debug version of unsafe-and-rt-checks.ll not -; requiring "asserts". Once we can check memory dependences without -debug, -; we should remove this test. - -; Analyze this loop: -; for (i = 0; i < n; i++) -; A[i + 1] = A[i] * B[i] * C[i]; - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.10.0" - -; CHECK: Report: unsafe dependent memory operations in loop - -; CHECK: Run-time memory checks: -; CHECK-NEXT: 0: -; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add -; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3 -; CHECK-NEXT: 1: -; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add -; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3 - -@n = global i32 20, align 4 -@B = common global i16* null, align 8 -@A = common global i16* null, align 8 -@C = common global i16* null, align 8 - -define void @f() { -entry: - %a = load i16** @A, align 8 - %b = load i16** @B, align 8 - %c = load i16** @C, align 8 - br label %for.body - -for.body: ; preds = %for.body, %entry - %storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ] - - %arrayidxA = getelementptr inbounds i16* %a, i64 %storemerge3 - %loadA = load i16* %arrayidxA, align 2 - - %arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3 - %loadB = load i16* %arrayidxB, align 2 - - %arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3 - %loadC = load i16* %arrayidxC, align 2 - - %mul = mul i16 %loadB, %loadA - %mul1 = mul i16 %mul, %loadC - - %add = add nuw nsw i64 %storemerge3, 1 - %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add - store i16 %mul1, i16* %arrayidxA_plus_2, align 2 - - %exitcond = icmp eq i64 %add, 20 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll deleted file mode 100644 index 4769a3a47a66..000000000000 --- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll +++ /dev/null @@ -1,61 +0,0 @@ -; RUN: opt -loop-accesses -analyze < %s | FileCheck %s -; RUN: opt -loop-accesses -analyze -debug-only=loop-accesses < %s 2>&1 | FileCheck %s --check-prefix=DEBUG -; REQUIRES: asserts - -; Analyze this loop: -; for (i = 0; i < n; i++) -; A[i + 1] = A[i] * B[i] * C[i]; - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.10.0" - -; CHECK: Report: unsafe dependent memory operations in loop - -; DEBUG: LAA: Distance for %loadA = load i16* %arrayidxA, align 2 to store i16 %mul1, i16* %arrayidxA_plus_2, align 2: 2 -; DEBUG-NEXT: LAA: Failure because of Positive distance 2 - -; CHECK: Run-time memory checks: -; CHECK-NEXT: 0: -; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add -; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3 -; CHECK-NEXT: 1: -; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add -; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3 - -@n = global i32 20, align 4 -@B = common global i16* null, align 8 -@A = common global i16* null, align 8 -@C = common global i16* null, align 8 - -define void @f() { -entry: - %a = load i16** @A, align 8 - %b = load i16** @B, align 8 - %c = load i16** @C, align 8 - br label %for.body - -for.body: ; preds = %for.body, %entry - %storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ] - - %arrayidxA = getelementptr inbounds i16* %a, i64 %storemerge3 - %loadA = load i16* %arrayidxA, align 2 - - %arrayidxB = getelementptr inbounds i16* %b, i64 %storemerge3 - %loadB = load i16* %arrayidxB, align 2 - - %arrayidxC = getelementptr inbounds i16* %c, i64 %storemerge3 - %loadC = load i16* %arrayidxC, align 2 - - %mul = mul i16 %loadB, %loadA - %mul1 = mul i16 %mul, %loadC - - %add = add nuw nsw i64 %storemerge3, 1 - %arrayidxA_plus_2 = getelementptr inbounds i16* %a, i64 %add - store i16 %mul1, i16* %arrayidxA_plus_2, align 2 - - %exitcond = icmp eq i64 %add, 20 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -}