forked from OSchip/llvm-project
[Transforms] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC).
llvm-svn: 316128
This commit is contained in:
parent
2023e21327
commit
306d29977d
|
@ -1,4 +1,4 @@
|
|||
//===- LoopStrengthReduce.h - Loop Strength Reduce Pass -------*- C++ -*-===//
|
||||
//===- LoopStrengthReduce.h - Loop Strength Reduce Pass ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -22,18 +22,21 @@
|
|||
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H
|
||||
#define LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H
|
||||
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopAnalysisManager.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Loop;
|
||||
class LPMUpdater;
|
||||
|
||||
/// Performs Loop Strength Reduce Pass.
|
||||
class LoopStrengthReducePass : public PassInfoMixin<LoopStrengthReducePass> {
|
||||
public:
|
||||
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
|
||||
LoopStandardAnalysisResults &AR, LPMUpdater &U);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H
|
||||
|
|
|
@ -10,12 +10,15 @@
|
|||
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
|
||||
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
|
||||
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopAnalysisManager.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Function;
|
||||
class Loop;
|
||||
class LPMUpdater;
|
||||
|
||||
/// Loop unroll pass that only does full loop unrolling.
|
||||
class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
|
||||
const int OptLevel;
|
||||
|
@ -40,6 +43,7 @@ public:
|
|||
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//===---- MemCpyOptimizer.h - memcpy optimization ---------------*- C++ -*-===//
|
||||
//===- MemCpyOptimizer.h - memcpy optimization ------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -16,20 +16,27 @@
|
|||
#define LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H
|
||||
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AssumptionCache;
|
||||
class CallInst;
|
||||
class DominatorTree;
|
||||
class Function;
|
||||
class Instruction;
|
||||
class MemCpyInst;
|
||||
class MemMoveInst;
|
||||
class MemoryDependenceResults;
|
||||
class MemSetInst;
|
||||
class StoreInst;
|
||||
class TargetLibraryInfo;
|
||||
class Value;
|
||||
|
||||
class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
|
||||
MemoryDependenceResults *MD = nullptr;
|
||||
TargetLibraryInfo *TLI = nullptr;
|
||||
|
@ -41,6 +48,7 @@ public:
|
|||
MemCpyOptPass() = default;
|
||||
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
|
||||
// Glue for the old PM.
|
||||
bool runImpl(Function &F, MemoryDependenceResults *MD_,
|
||||
TargetLibraryInfo *TLI_,
|
||||
|
|
|
@ -23,22 +23,33 @@
|
|||
#ifndef LLVM_TRANSFORMS_SCALAR_REASSOCIATE_H
|
||||
#define LLVM_TRANSFORMS_SCALAR_REASSOCIATE_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class APInt;
|
||||
class BasicBlock;
|
||||
class BinaryOperator;
|
||||
class Function;
|
||||
class Instruction;
|
||||
class Value;
|
||||
|
||||
/// A private "module" namespace for types and utilities used by Reassociate.
|
||||
/// These are implementation details and should not be used by clients.
|
||||
namespace reassociate {
|
||||
|
||||
struct ValueEntry {
|
||||
unsigned Rank;
|
||||
Value *Op;
|
||||
|
||||
ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
|
||||
};
|
||||
|
||||
inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
|
||||
return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start.
|
||||
}
|
||||
|
@ -48,11 +59,13 @@ inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
|
|||
struct Factor {
|
||||
Value *Base;
|
||||
unsigned Power;
|
||||
|
||||
Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
|
||||
};
|
||||
|
||||
class XorOpnd;
|
||||
}
|
||||
|
||||
} // end namespace reassociate
|
||||
|
||||
/// Reassociate commutative expressions.
|
||||
class ReassociatePass : public PassInfoMixin<ReassociatePass> {
|
||||
|
@ -93,6 +106,7 @@ private:
|
|||
void OptimizeInst(Instruction *I);
|
||||
Instruction *canonicalizeNegConstExpr(Instruction *I);
|
||||
};
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_SCALAR_REASSOCIATE_H
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
|
||||
//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -11,22 +11,42 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AliasSetTracker.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
@ -34,6 +54,13 @@
|
|||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -127,6 +154,7 @@ NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
|
|||
// br %cmp, header, exit
|
||||
|
||||
namespace {
|
||||
|
||||
enum IterationLimits {
|
||||
/// The maximum number of iterations that we'll try and reroll.
|
||||
IL_MaxRerollIterations = 32,
|
||||
|
@ -139,6 +167,7 @@ namespace {
|
|||
class LoopReroll : public LoopPass {
|
||||
public:
|
||||
static char ID; // Pass ID, replacement for typeid
|
||||
|
||||
LoopReroll() : LoopPass(ID) {
|
||||
initializeLoopRerollPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
@ -158,11 +187,12 @@ namespace {
|
|||
DominatorTree *DT;
|
||||
bool PreserveLCSSA;
|
||||
|
||||
typedef SmallVector<Instruction *, 16> SmallInstructionVector;
|
||||
typedef SmallSet<Instruction *, 16> SmallInstructionSet;
|
||||
using SmallInstructionVector = SmallVector<Instruction *, 16>;
|
||||
using SmallInstructionSet = SmallSet<Instruction *, 16>;
|
||||
|
||||
// Map between induction variable and its increment
|
||||
DenseMap<Instruction *, int64_t> IVToIncMap;
|
||||
|
||||
// For loop with multiple induction variable, remember the one used only to
|
||||
// control the loop.
|
||||
Instruction *LoopControlIV;
|
||||
|
@ -171,8 +201,7 @@ namespace {
|
|||
// representing a reduction. Only the last value may be used outside the
|
||||
// loop.
|
||||
struct SimpleLoopReduction {
|
||||
SimpleLoopReduction(Instruction *P, Loop *L)
|
||||
: Valid(false), Instructions(1, P) {
|
||||
SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
|
||||
assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
|
||||
add(L);
|
||||
}
|
||||
|
@ -204,8 +233,8 @@ namespace {
|
|||
return Instructions.size()-1;
|
||||
}
|
||||
|
||||
typedef SmallInstructionVector::iterator iterator;
|
||||
typedef SmallInstructionVector::const_iterator const_iterator;
|
||||
using iterator = SmallInstructionVector::iterator;
|
||||
using const_iterator = SmallInstructionVector::const_iterator;
|
||||
|
||||
iterator begin() {
|
||||
assert(Valid && "Using invalid reduction");
|
||||
|
@ -221,7 +250,7 @@ namespace {
|
|||
const_iterator end() const { return Instructions.end(); }
|
||||
|
||||
protected:
|
||||
bool Valid;
|
||||
bool Valid = false;
|
||||
SmallInstructionVector Instructions;
|
||||
|
||||
void add(Loop *L);
|
||||
|
@ -230,7 +259,7 @@ namespace {
|
|||
// The set of all reductions, and state tracking of possible reductions
|
||||
// during loop instruction processing.
|
||||
struct ReductionTracker {
|
||||
typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
|
||||
using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
|
||||
|
||||
// Add a new possible reduction.
|
||||
void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
|
||||
|
@ -342,6 +371,7 @@ namespace {
|
|||
struct DAGRootSet {
|
||||
Instruction *BaseInst;
|
||||
SmallInstructionVector Roots;
|
||||
|
||||
// The instructions between IV and BaseInst (but not including BaseInst).
|
||||
SmallInstructionSet SubsumedInsts;
|
||||
};
|
||||
|
@ -361,15 +391,17 @@ namespace {
|
|||
|
||||
/// Stage 1: Find all the DAG roots for the induction variable.
|
||||
bool findRoots();
|
||||
|
||||
/// Stage 2: Validate if the found roots are valid.
|
||||
bool validate(ReductionTracker &Reductions);
|
||||
|
||||
/// Stage 3: Assuming validate() returned true, perform the
|
||||
/// replacement.
|
||||
/// @param IterCount The maximum iteration count of L.
|
||||
void replace(const SCEV *IterCount);
|
||||
|
||||
protected:
|
||||
typedef MapVector<Instruction*, BitVector> UsesTy;
|
||||
using UsesTy = MapVector<Instruction *, BitVector>;
|
||||
|
||||
void findRootsRecursive(Instruction *IVU,
|
||||
SmallInstructionSet SubsumedInsts);
|
||||
|
@ -412,22 +444,29 @@ namespace {
|
|||
|
||||
// The loop induction variable.
|
||||
Instruction *IV;
|
||||
|
||||
// Loop step amount.
|
||||
int64_t Inc;
|
||||
|
||||
// Loop reroll count; if Inc == 1, this records the scaling applied
|
||||
// to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
|
||||
// If Inc is not 1, Scale = Inc.
|
||||
uint64_t Scale;
|
||||
|
||||
// The roots themselves.
|
||||
SmallVector<DAGRootSet,16> RootSets;
|
||||
|
||||
// All increment instructions for IV.
|
||||
SmallInstructionVector LoopIncs;
|
||||
|
||||
// Map of all instructions in the loop (in order) to the iterations
|
||||
// they are used in (or specially, IL_All for instructions
|
||||
// used in the loop increment mechanism).
|
||||
UsesTy Uses;
|
||||
|
||||
// Map between induction variable and its increment
|
||||
DenseMap<Instruction *, int64_t> &IVToIncMap;
|
||||
|
||||
Instruction *LoopControlIV;
|
||||
};
|
||||
|
||||
|
@ -446,9 +485,11 @@ namespace {
|
|||
bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
|
||||
ReductionTracker &Reductions);
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char LoopReroll::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
||||
|
@ -1069,7 +1110,6 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
|
|||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
/// Get the next instruction in "In" that is a member of set Val.
|
||||
|
@ -1124,7 +1164,7 @@ static bool isIgnorableInst(const Instruction *I) {
|
|||
switch (II->getIntrinsicID()) {
|
||||
default:
|
||||
return false;
|
||||
case llvm::Intrinsic::annotation:
|
||||
case Intrinsic::annotation:
|
||||
case Intrinsic::ptr_annotation:
|
||||
case Intrinsic::var_annotation:
|
||||
// TODO: the following intrinsics may also be whitelisted:
|
||||
|
@ -1407,8 +1447,8 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
|
|||
BaseIt = nextInstr(0, Uses, Visited);
|
||||
RootIt = nextInstr(Iter, Uses, Visited);
|
||||
}
|
||||
assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
|
||||
"Mismatched set sizes!");
|
||||
assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
|
||||
"Mismatched set sizes!");
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
|
||||
|
|
|
@ -65,7 +65,9 @@
|
|||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/IVUsers.h"
|
||||
#include "llvm/Analysis/LoopAnalysisManager.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
|
@ -80,13 +82,18 @@
|
|||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/OperandTraits.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
@ -98,7 +105,6 @@
|
|||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include <algorithm>
|
||||
|
@ -107,8 +113,8 @@
|
|||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -160,15 +166,14 @@ namespace {
|
|||
|
||||
struct MemAccessTy {
|
||||
/// Used in situations where the accessed memory type is unknown.
|
||||
static const unsigned UnknownAddressSpace = ~0u;
|
||||
static const unsigned UnknownAddressSpace =
|
||||
std::numeric_limits<unsigned>::max();
|
||||
|
||||
Type *MemTy;
|
||||
unsigned AddrSpace;
|
||||
Type *MemTy = nullptr;
|
||||
unsigned AddrSpace = UnknownAddressSpace;
|
||||
|
||||
MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
|
||||
|
||||
MemAccessTy(Type *Ty, unsigned AS) :
|
||||
MemTy(Ty), AddrSpace(AS) {}
|
||||
MemAccessTy() = default;
|
||||
MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
|
||||
|
||||
bool operator==(MemAccessTy Other) const {
|
||||
return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
|
||||
|
@ -209,7 +214,7 @@ namespace {
|
|||
|
||||
/// Map register candidates to information about how they are used.
|
||||
class RegUseTracker {
|
||||
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
|
||||
using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
|
||||
|
||||
RegUsesTy RegUsesMap;
|
||||
SmallVector<const SCEV *, 16> RegSequence;
|
||||
|
@ -225,8 +230,9 @@ public:
|
|||
|
||||
void clear();
|
||||
|
||||
typedef SmallVectorImpl<const SCEV *>::iterator iterator;
|
||||
typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
|
||||
using iterator = SmallVectorImpl<const SCEV *>::iterator;
|
||||
using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
|
||||
|
||||
iterator begin() { return RegSequence.begin(); }
|
||||
iterator end() { return RegSequence.end(); }
|
||||
const_iterator begin() const { return RegSequence.begin(); }
|
||||
|
@ -299,16 +305,16 @@ namespace {
|
|||
/// satisfying a use. It may include broken-out immediates and scaled registers.
|
||||
struct Formula {
|
||||
/// Global base address used for complex addressing.
|
||||
GlobalValue *BaseGV;
|
||||
GlobalValue *BaseGV = nullptr;
|
||||
|
||||
/// Base offset for complex addressing.
|
||||
int64_t BaseOffset;
|
||||
int64_t BaseOffset = 0;
|
||||
|
||||
/// Whether any complex addressing has a base register.
|
||||
bool HasBaseReg;
|
||||
bool HasBaseReg = false;
|
||||
|
||||
/// The scale of any complex addressing.
|
||||
int64_t Scale;
|
||||
int64_t Scale = 0;
|
||||
|
||||
/// The list of "base" registers for this use. When this is non-empty. The
|
||||
/// canonical representation of a formula is
|
||||
|
@ -328,16 +334,14 @@ struct Formula {
|
|||
|
||||
/// The 'scaled' register for this use. This should be non-null when Scale is
|
||||
/// not zero.
|
||||
const SCEV *ScaledReg;
|
||||
const SCEV *ScaledReg = nullptr;
|
||||
|
||||
/// An additional constant offset which added near the use. This requires a
|
||||
/// temporary register, but the offset itself can live in an add immediate
|
||||
/// field rather than a register.
|
||||
int64_t UnfoldedOffset;
|
||||
int64_t UnfoldedOffset = 0;
|
||||
|
||||
Formula()
|
||||
: BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
|
||||
ScaledReg(nullptr), UnfoldedOffset(0) {}
|
||||
Formula() = default;
|
||||
|
||||
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
|
||||
|
||||
|
@ -955,6 +959,7 @@ class LSRUse;
|
|||
/// accurate cost model.
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F);
|
||||
|
||||
// Get the cost of the scaling factor used in F for LU.
|
||||
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F,
|
||||
|
@ -1025,11 +1030,11 @@ private:
|
|||
/// equivalent, possibly strength-reduced, replacement.
|
||||
struct LSRFixup {
|
||||
/// The instruction which will be updated.
|
||||
Instruction *UserInst;
|
||||
Instruction *UserInst = nullptr;
|
||||
|
||||
/// The operand of the instruction which will be replaced. The operand may be
|
||||
/// used more than once; every instance will be replaced.
|
||||
Value *OperandValToReplace;
|
||||
Value *OperandValToReplace = nullptr;
|
||||
|
||||
/// If this user is to use the post-incremented value of an induction
|
||||
/// variable, this variable is non-null and holds the loop associated with the
|
||||
|
@ -1039,12 +1044,12 @@ struct LSRFixup {
|
|||
/// A constant offset to be added to the LSRUse expression. This allows
|
||||
/// multiple fixups to share the same LSRUse with different offsets, for
|
||||
/// example in an unrolled loop.
|
||||
int64_t Offset;
|
||||
int64_t Offset = 0;
|
||||
|
||||
LSRFixup() = default;
|
||||
|
||||
bool isUseFullyOutsideLoop(const Loop *L) const;
|
||||
|
||||
LSRFixup();
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
void dump() const;
|
||||
};
|
||||
|
@ -1093,7 +1098,7 @@ public:
|
|||
// TODO: Add a generic icmp too?
|
||||
};
|
||||
|
||||
typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
|
||||
using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
|
||||
|
||||
KindType Kind;
|
||||
MemAccessTy AccessTy;
|
||||
|
@ -1102,25 +1107,25 @@ public:
|
|||
SmallVector<LSRFixup, 8> Fixups;
|
||||
|
||||
/// Keep track of the min and max offsets of the fixups.
|
||||
int64_t MinOffset;
|
||||
int64_t MaxOffset;
|
||||
int64_t MinOffset = std::numeric_limits<int64_t>::max();
|
||||
int64_t MaxOffset = std::numeric_limits<int64_t>::min();
|
||||
|
||||
/// This records whether all of the fixups using this LSRUse are outside of
|
||||
/// the loop, in which case some special-case heuristics may be used.
|
||||
bool AllFixupsOutsideLoop;
|
||||
bool AllFixupsOutsideLoop = true;
|
||||
|
||||
/// RigidFormula is set to true to guarantee that this use will be associated
|
||||
/// with a single formula--the one that initially matched. Some SCEV
|
||||
/// expressions cannot be expanded. This allows LSR to consider the registers
|
||||
/// used by those expressions without the need to expand them later after
|
||||
/// changing the formula.
|
||||
bool RigidFormula;
|
||||
bool RigidFormula = false;
|
||||
|
||||
/// This records the widest use type for any fixup using this
|
||||
/// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
|
||||
/// fixup widths to be equivalent, because the narrower one may be relying on
|
||||
/// the implicit truncation to truncate away bogus bits.
|
||||
Type *WidestFixupType;
|
||||
Type *WidestFixupType = nullptr;
|
||||
|
||||
/// A list of ways to build a value that can satisfy this user. After the
|
||||
/// list is populated, one of these is selected heuristically and used to
|
||||
|
@ -1130,10 +1135,7 @@ public:
|
|||
/// The set of register candidates used by all formulae in this LSRUse.
|
||||
SmallPtrSet<const SCEV *, 4> Regs;
|
||||
|
||||
LSRUse(KindType K, MemAccessTy AT)
|
||||
: Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
|
||||
AllFixupsOutsideLoop(true), RigidFormula(false),
|
||||
WidestFixupType(nullptr) {}
|
||||
LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
|
||||
|
||||
LSRFixup &getNewFixup() {
|
||||
Fixups.push_back(LSRFixup());
|
||||
|
@ -1339,14 +1341,14 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
|||
|
||||
/// Set this cost to a losing value.
|
||||
void Cost::Lose() {
|
||||
C.Insns = ~0u;
|
||||
C.NumRegs = ~0u;
|
||||
C.AddRecCost = ~0u;
|
||||
C.NumIVMuls = ~0u;
|
||||
C.NumBaseAdds = ~0u;
|
||||
C.ImmCost = ~0u;
|
||||
C.SetupCost = ~0u;
|
||||
C.ScaleCost = ~0u;
|
||||
C.Insns = std::numeric_limits<unsigned>::max();
|
||||
C.NumRegs = std::numeric_limits<unsigned>::max();
|
||||
C.AddRecCost = std::numeric_limits<unsigned>::max();
|
||||
C.NumIVMuls = std::numeric_limits<unsigned>::max();
|
||||
C.NumBaseAdds = std::numeric_limits<unsigned>::max();
|
||||
C.ImmCost = std::numeric_limits<unsigned>::max();
|
||||
C.SetupCost = std::numeric_limits<unsigned>::max();
|
||||
C.ScaleCost = std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
|
||||
/// Choose the lower cost.
|
||||
|
@ -1383,10 +1385,6 @@ LLVM_DUMP_METHOD void Cost::dump() const {
|
|||
}
|
||||
#endif
|
||||
|
||||
LSRFixup::LSRFixup()
|
||||
: UserInst(nullptr), OperandValToReplace(nullptr),
|
||||
Offset(0) {}
|
||||
|
||||
/// Test whether this fixup always uses its value outside of the given loop.
|
||||
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
|
||||
// PHI nodes use their value in their incoming blocks.
|
||||
|
@ -1579,7 +1577,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
|||
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
|
||||
// Offs is the ICmp immediate.
|
||||
if (Scale == 0)
|
||||
// The cast does the right thing with INT64_MIN.
|
||||
// The cast does the right thing with
|
||||
// std::numeric_limits<int64_t>::min().
|
||||
BaseOffset = -(uint64_t)BaseOffset;
|
||||
return TTI.isLegalICmpImmediate(BaseOffset);
|
||||
}
|
||||
|
@ -1777,22 +1776,21 @@ struct IVInc {
|
|||
Value* IVOperand;
|
||||
const SCEV *IncExpr;
|
||||
|
||||
IVInc(Instruction *U, Value *O, const SCEV *E):
|
||||
UserInst(U), IVOperand(O), IncExpr(E) {}
|
||||
IVInc(Instruction *U, Value *O, const SCEV *E)
|
||||
: UserInst(U), IVOperand(O), IncExpr(E) {}
|
||||
};
|
||||
|
||||
// The list of IV increments in program order. We typically add the head of a
|
||||
// chain without finding subsequent links.
|
||||
struct IVChain {
|
||||
SmallVector<IVInc,1> Incs;
|
||||
const SCEV *ExprBase;
|
||||
|
||||
IVChain() : ExprBase(nullptr) {}
|
||||
SmallVector<IVInc, 1> Incs;
|
||||
const SCEV *ExprBase = nullptr;
|
||||
|
||||
IVChain() = default;
|
||||
IVChain(const IVInc &Head, const SCEV *Base)
|
||||
: Incs(1, Head), ExprBase(Base) {}
|
||||
: Incs(1, Head), ExprBase(Base) {}
|
||||
|
||||
typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
|
||||
using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
|
||||
|
||||
// Return the first increment in the chain.
|
||||
const_iterator begin() const {
|
||||
|
@ -1834,13 +1832,13 @@ class LSRInstance {
|
|||
LoopInfo &LI;
|
||||
const TargetTransformInfo &TTI;
|
||||
Loop *const L;
|
||||
bool Changed;
|
||||
bool Changed = false;
|
||||
|
||||
/// This is the insert position that the current loop's induction variable
|
||||
/// increment should be placed. In simple loops, this is the latch block's
|
||||
/// terminator. But in more complicated cases, this is a position which will
|
||||
/// dominate all the in-loop post-increment users.
|
||||
Instruction *IVIncInsertPos;
|
||||
Instruction *IVIncInsertPos = nullptr;
|
||||
|
||||
/// Interesting factors between use strides.
|
||||
///
|
||||
|
@ -1886,7 +1884,7 @@ class LSRInstance {
|
|||
void CollectFixupsAndInitialFormulae();
|
||||
|
||||
// Support for sharing of LSRUses between LSRFixups.
|
||||
typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
|
||||
using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
|
||||
UseMapTy UseMap;
|
||||
|
||||
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
|
||||
|
@ -2127,7 +2125,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
|
|||
/// unfortunately this can come up even for loops where the user didn't use
|
||||
/// a C do-while loop. For example, seemingly well-behaved top-test loops
|
||||
/// will commonly be lowered like this:
|
||||
//
|
||||
///
|
||||
/// if (n > 0) {
|
||||
/// i = 0;
|
||||
/// do {
|
||||
|
@ -2161,7 +2159,6 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
|
|||
/// This function solves this problem by detecting this type of loop and
|
||||
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
|
||||
/// the instructions for the maximum computation.
|
||||
///
|
||||
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
|
||||
// Check that the loop matches the pattern we're looking for.
|
||||
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
|
||||
|
@ -2301,7 +2298,6 @@ LSRInstance::OptimizeLoopTermCond() {
|
|||
|
||||
// Otherwise treat this as a rotated loop.
|
||||
for (BasicBlock *ExitingBlock : ExitingBlocks) {
|
||||
|
||||
// Get the terminating condition for the loop if possible. If we
|
||||
// can, we want to change it to use a post-incremented version of its
|
||||
// induction variable, to allow coalescing the live ranges for the IV into
|
||||
|
@ -3468,7 +3464,6 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
|
|||
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
|
||||
JE = AddOps.end();
|
||||
J != JE; ++J) {
|
||||
|
||||
// Loop-variant "unknown" values are uninteresting; we won't be able to
|
||||
// do anything meaningful with them.
|
||||
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
|
||||
|
@ -3701,7 +3696,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
|||
// Check each interesting stride.
|
||||
for (int64_t Factor : Factors) {
|
||||
// Check that the multiplication doesn't overflow.
|
||||
if (Base.BaseOffset == INT64_MIN && Factor == -1)
|
||||
if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
|
||||
continue;
|
||||
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
|
||||
if (NewBaseOffset / Factor != Base.BaseOffset)
|
||||
|
@ -3713,7 +3708,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
|||
|
||||
// Check that multiplying with the use offset doesn't overflow.
|
||||
int64_t Offset = LU.MinOffset;
|
||||
if (Offset == INT64_MIN && Factor == -1)
|
||||
if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
|
||||
continue;
|
||||
Offset = (uint64_t)Offset * Factor;
|
||||
if (Offset / Factor != LU.MinOffset)
|
||||
|
@ -3751,7 +3746,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
|||
|
||||
// Check that multiplying with the unfolded offset doesn't overflow.
|
||||
if (F.UnfoldedOffset != 0) {
|
||||
if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
|
||||
if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
|
||||
Factor == -1)
|
||||
continue;
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
|
||||
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
|
||||
|
@ -3875,7 +3871,7 @@ struct WorkItem {
|
|||
const SCEV *OrigReg;
|
||||
|
||||
WorkItem(size_t LI, int64_t I, const SCEV *R)
|
||||
: LUIdx(LI), Imm(I), OrigReg(R) {}
|
||||
: LUIdx(LI), Imm(I), OrigReg(R) {}
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
void dump() const;
|
||||
|
@ -3898,7 +3894,8 @@ LLVM_DUMP_METHOD void WorkItem::dump() const {
|
|||
/// opportunities between them.
|
||||
void LSRInstance::GenerateCrossUseConstantOffsets() {
|
||||
// Group the registers by their value without any added constant offset.
|
||||
typedef std::map<int64_t, const SCEV *> ImmMapTy;
|
||||
using ImmMapTy = std::map<int64_t, const SCEV *>;
|
||||
|
||||
DenseMap<const SCEV *, ImmMapTy> Map;
|
||||
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
|
||||
SmallVector<const SCEV *, 8> Sequence;
|
||||
|
@ -4102,8 +4099,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
|
|||
|
||||
// Collect the best formula for each unique set of shared registers. This
|
||||
// is reset for each use.
|
||||
typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
|
||||
BestFormulaeTy;
|
||||
using BestFormulaeTy =
|
||||
DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
|
||||
|
||||
BestFormulaeTy BestFormulae;
|
||||
|
||||
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
|
||||
|
@ -4190,7 +4188,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
|
|||
}
|
||||
|
||||
// This is a rough guess that seems to work fairly well.
|
||||
static const size_t ComplexityLimit = UINT16_MAX;
|
||||
static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max();
|
||||
|
||||
/// Estimate the worst-case number of solutions the solver might have to
|
||||
/// consider. It almost never considers this many solutions because it prune the
|
||||
|
@ -4374,7 +4372,8 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
|
|||
"from the Formulae with the same Scale and ScaledReg.\n");
|
||||
|
||||
// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
|
||||
typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy;
|
||||
using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
|
||||
|
||||
BestFormulaeTy BestFormulae;
|
||||
#ifndef NDEBUG
|
||||
bool ChangedFormulae = false;
|
||||
|
@ -4496,7 +4495,6 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
|
|||
/// Use3:
|
||||
/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
|
||||
/// reg(c) + reg({b,+,1}) 1 + 2/3
|
||||
|
||||
void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
|
||||
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
|
||||
return;
|
||||
|
@ -4591,7 +4589,6 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
|
|||
print_uses(dbgs()));
|
||||
}
|
||||
|
||||
|
||||
/// Pick a register which seems likely to be profitable, and then in any use
|
||||
/// which has any reference to that register, delete all formulae which do not
|
||||
/// reference that register.
|
||||
|
@ -5238,8 +5235,7 @@ void LSRInstance::ImplementSolution(
|
|||
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
|
||||
DominatorTree &DT, LoopInfo &LI,
|
||||
const TargetTransformInfo &TTI)
|
||||
: IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
|
||||
IVIncInsertPos(nullptr) {
|
||||
: IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
|
||||
// If LoopSimplify form is not available, stay out of trouble.
|
||||
if (!L->isLoopSimplifyForm())
|
||||
return;
|
||||
|
@ -5490,6 +5486,7 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
|
|||
}
|
||||
|
||||
char LoopStrengthReduce::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
|
||||
"Loop Strength Reduction", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
|
||||
//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -13,30 +13,55 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseMapInfo.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopAnalysisManager.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
||||
#include "llvm/Transforms/Utils/LoopSimplify.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Transforms/Utils/UnrollLoop.h"
|
||||
#include <climits>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -135,7 +160,7 @@ static cl::opt<bool> UnrollRevisitChildLoops(
|
|||
/// A magic value for use with the Threshold parameter to indicate
|
||||
/// that the loop unroll should be performed regardless of how much
|
||||
/// code expansion would result.
|
||||
static const unsigned NoThreshold = UINT_MAX;
|
||||
static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
|
||||
|
||||
/// Gather the various unrolling parameters based on the defaults, compiler
|
||||
/// flags, TTI overrides and user specified parameters.
|
||||
|
@ -155,8 +180,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
|||
UP.Count = 0;
|
||||
UP.PeelCount = 0;
|
||||
UP.DefaultUnrollRuntimeCount = 8;
|
||||
UP.MaxCount = UINT_MAX;
|
||||
UP.FullUnrollMaxCount = UINT_MAX;
|
||||
UP.MaxCount = std::numeric_limits<unsigned>::max();
|
||||
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
|
||||
UP.BEInsns = 2;
|
||||
UP.Partial = false;
|
||||
UP.Runtime = false;
|
||||
|
@ -222,6 +247,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
|||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// A struct to densely store the state of an instruction after unrolling at
|
||||
/// each iteration.
|
||||
///
|
||||
|
@ -237,25 +263,27 @@ struct UnrolledInstState {
|
|||
|
||||
/// Hashing and equality testing for a set of the instruction states.
|
||||
struct UnrolledInstStateKeyInfo {
|
||||
typedef DenseMapInfo<Instruction *> PtrInfo;
|
||||
typedef DenseMapInfo<std::pair<Instruction *, int>> PairInfo;
|
||||
using PtrInfo = DenseMapInfo<Instruction *>;
|
||||
using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
|
||||
|
||||
static inline UnrolledInstState getEmptyKey() {
|
||||
return {PtrInfo::getEmptyKey(), 0, 0, 0};
|
||||
}
|
||||
|
||||
static inline UnrolledInstState getTombstoneKey() {
|
||||
return {PtrInfo::getTombstoneKey(), 0, 0, 0};
|
||||
}
|
||||
|
||||
static inline unsigned getHashValue(const UnrolledInstState &S) {
|
||||
return PairInfo::getHashValue({S.I, S.Iteration});
|
||||
}
|
||||
|
||||
static inline bool isEqual(const UnrolledInstState &LHS,
|
||||
const UnrolledInstState &RHS) {
|
||||
return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct EstimatedUnrollCost {
|
||||
/// \brief The estimated cost after unrolling.
|
||||
unsigned UnrolledCost;
|
||||
|
@ -264,7 +292,8 @@ struct EstimatedUnrollCost {
|
|||
/// rolled form.
|
||||
unsigned RolledDynamicCost;
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// \brief Figure out if the loop is worth full unrolling.
|
||||
///
|
||||
|
@ -286,7 +315,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
|
|||
// We want to be able to scale offsets by the trip count and add more offsets
|
||||
// to them without checking for overflows, and we already don't want to
|
||||
// analyze *massive* trip counts, so we force the max to be reasonably small.
|
||||
assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
|
||||
assert(UnrollMaxIterationsCountToAnalyze <
|
||||
(std::numeric_limits<int>::max() / 2) &&
|
||||
"The unroll iterations max is too large!");
|
||||
|
||||
// Only analyze inner loops. We can't properly estimate cost of nested loops
|
||||
|
@ -656,7 +686,7 @@ static unsigned UnrollCountPragmaValue(const Loop *L) {
|
|||
// the unroll threshold.
|
||||
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
|
||||
unsigned MaxPercentThresholdBoost) {
|
||||
if (Cost.RolledDynamicCost >= UINT_MAX / 100)
|
||||
if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
|
||||
return 100;
|
||||
else if (Cost.UnrolledCost != 0)
|
||||
// The boosting factor is RolledDynamicCost / UnrolledCost
|
||||
|
@ -891,7 +921,9 @@ static bool computeUnrollCount(
|
|||
"multiple, "
|
||||
<< TripMultiple << ". Reducing unroll count from "
|
||||
<< OrigCount << " to " << UP.Count << ".\n");
|
||||
|
||||
using namespace ore;
|
||||
|
||||
if (PragmaCount > 0 && !UP.AllowRemainder)
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkMissed(DEBUG_TYPE,
|
||||
|
@ -927,7 +959,7 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
<< "] Loop %" << L->getHeader()->getName() << "\n");
|
||||
if (HasUnrollDisablePragma(L))
|
||||
return LoopUnrollResult::Unmodified;
|
||||
if (!L->isLoopSimplifyForm()) {
|
||||
if (!L->isLoopSimplifyForm()) {
|
||||
DEBUG(
|
||||
dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
|
||||
return LoopUnrollResult::Unmodified;
|
||||
|
@ -1037,9 +1069,19 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class LoopUnroll : public LoopPass {
|
||||
public:
|
||||
static char ID; // Pass ID, replacement for typeid
|
||||
|
||||
int OptLevel;
|
||||
Optional<unsigned> ProvidedCount;
|
||||
Optional<unsigned> ProvidedThreshold;
|
||||
Optional<bool> ProvidedAllowPartial;
|
||||
Optional<bool> ProvidedRuntime;
|
||||
Optional<bool> ProvidedUpperBound;
|
||||
Optional<bool> ProvidedAllowPeeling;
|
||||
|
||||
LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
|
||||
Optional<unsigned> Count = None,
|
||||
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
|
||||
|
@ -1052,14 +1094,6 @@ public:
|
|||
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
int OptLevel;
|
||||
Optional<unsigned> ProvidedCount;
|
||||
Optional<unsigned> ProvidedThreshold;
|
||||
Optional<bool> ProvidedAllowPartial;
|
||||
Optional<bool> ProvidedRuntime;
|
||||
Optional<bool> ProvidedUpperBound;
|
||||
Optional<bool> ProvidedAllowPeeling;
|
||||
|
||||
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
|
||||
if (skipLoop(L))
|
||||
return false;
|
||||
|
@ -1091,7 +1125,6 @@ public:
|
|||
|
||||
/// This transformation requires natural loop information & requires that
|
||||
/// loop preheaders be inserted into the CFG...
|
||||
///
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
|
@ -1100,9 +1133,11 @@ public:
|
|||
getLoopAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char LoopUnroll::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopPass)
|
||||
|
@ -1125,7 +1160,7 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
|
|||
}
|
||||
|
||||
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
|
||||
return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
|
||||
return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
||||
|
|
|
@ -14,10 +14,12 @@
|
|||
|
||||
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||
|
@ -25,6 +27,8 @@
|
|||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
|
@ -41,6 +45,7 @@
|
|||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
|
@ -54,6 +59,7 @@
|
|||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -225,15 +231,18 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
|
|||
namespace {
|
||||
|
||||
class MemsetRanges {
|
||||
using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
|
||||
|
||||
/// A sorted list of the memset ranges.
|
||||
SmallVector<MemsetRange, 8> Ranges;
|
||||
typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
|
||||
|
||||
const DataLayout &DL;
|
||||
|
||||
public:
|
||||
MemsetRanges(const DataLayout &DL) : DL(DL) {}
|
||||
|
||||
typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;
|
||||
using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
|
||||
|
||||
const_iterator begin() const { return Ranges.begin(); }
|
||||
const_iterator end() const { return Ranges.end(); }
|
||||
bool empty() const { return Ranges.empty(); }
|
||||
|
@ -259,7 +268,6 @@ public:
|
|||
|
||||
void addRange(int64_t Start, int64_t Size, Value *Ptr,
|
||||
unsigned Alignment, Instruction *Inst);
|
||||
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -356,10 +364,10 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
char MemCpyOptLegacyPass::ID = 0;
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char MemCpyOptLegacyPass::ID = 0;
|
||||
|
||||
/// The public interface to this file...
|
||||
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
|
||||
|
||||
|
@ -450,7 +458,6 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
|||
// emit memset's for anything big enough to be worthwhile.
|
||||
Instruction *AMemSet = nullptr;
|
||||
for (const MemsetRange &Range : Ranges) {
|
||||
|
||||
if (Range.TheStores.size() == 1) continue;
|
||||
|
||||
// If it is profitable to lower this range to memset, do so now.
|
||||
|
|
|
@ -21,28 +21,44 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Scalar/Reassociate.h"
|
||||
#include "llvm/ADT/APFloat.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace reassociate;
|
||||
|
||||
|
@ -54,7 +70,6 @@ STATISTIC(NumFactor , "Number of multiplies factored");
|
|||
|
||||
#ifndef NDEBUG
|
||||
/// Print out the expression identified in the Ops list.
|
||||
///
|
||||
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
|
||||
Module *M = I->getModule();
|
||||
dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
|
||||
|
@ -354,7 +369,7 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
|
|||
}
|
||||
}
|
||||
|
||||
typedef std::pair<Value*, APInt> RepeatedValue;
|
||||
using RepeatedValue = std::pair<Value*, APInt>;
|
||||
|
||||
/// Given an associative binary expression, return the leaf
|
||||
/// nodes in Ops along with their weights (how many times the leaf occurs). The
|
||||
|
@ -429,7 +444,6 @@ typedef std::pair<Value*, APInt> RepeatedValue;
|
|||
/// that have all uses inside the expression (i.e. only used by non-leaf nodes
|
||||
/// of the expression) if it can turn them into binary operators of the right
|
||||
/// type and thus make the expression bigger.
|
||||
|
||||
static bool LinearizeExprTree(BinaryOperator *I,
|
||||
SmallVectorImpl<RepeatedValue> &Ops) {
|
||||
DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
|
||||
|
@ -467,12 +481,12 @@ static bool LinearizeExprTree(BinaryOperator *I,
|
|||
|
||||
// Leaves - Keeps track of the set of putative leaves as well as the number of
|
||||
// paths to each leaf seen so far.
|
||||
typedef DenseMap<Value*, APInt> LeafMap;
|
||||
using LeafMap = DenseMap<Value *, APInt>;
|
||||
LeafMap Leaves; // Leaf -> Total weight so far.
|
||||
SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
|
||||
SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
|
||||
|
||||
#ifndef NDEBUG
|
||||
SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
|
||||
SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
|
||||
#endif
|
||||
while (!Worklist.empty()) {
|
||||
std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
|
||||
|
@ -770,7 +784,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
|
|||
break;
|
||||
ExpressionChanged->moveBefore(I);
|
||||
ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
|
||||
} while (1);
|
||||
} while (true);
|
||||
|
||||
// Throw away any left over nodes from the original expression.
|
||||
for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
|
||||
|
@ -793,7 +807,6 @@ static Value *NegateValue(Value *V, Instruction *BI,
|
|||
return ConstantExpr::getNeg(C);
|
||||
}
|
||||
|
||||
|
||||
// We are trying to expose opportunity for reassociation. One of the things
|
||||
// that we want to do to achieve this is to push a negation as deep into an
|
||||
// expression chain as possible, to expose the add instructions. In practice,
|
||||
|
@ -910,7 +923,6 @@ BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
|
|||
//
|
||||
// Calculate the negative value of Operand 1 of the sub instruction,
|
||||
// and set it as the RHS of the add instruction we just made.
|
||||
//
|
||||
Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
|
||||
BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
|
||||
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
|
||||
|
@ -1154,7 +1166,6 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
|
|||
// If it was successful, true is returned, and the "R" and "C" is returned
|
||||
// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
|
||||
// and both "Res" and "ConstOpnd" remain unchanged.
|
||||
//
|
||||
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
|
||||
APInt &ConstOpnd, Value *&Res) {
|
||||
// Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
|
||||
|
@ -1180,7 +1191,6 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
|
|||
RedoInsts.insert(T);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Helper function of OptimizeXor(). It tries to simplify
|
||||
// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
|
||||
|
@ -1227,7 +1237,6 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
|
|||
|
||||
Res = createAndInstr(I, X, C3);
|
||||
ConstOpnd ^= C1;
|
||||
|
||||
} else if (Opnd1->isOrExpr()) {
|
||||
// Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
|
||||
//
|
||||
|
@ -1346,7 +1355,6 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
|
|||
|
||||
// step 3.2: When previous and current operands share the same symbolic
|
||||
// value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
|
||||
//
|
||||
if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
|
||||
// Remove previous operand
|
||||
PrevOpnd->Invalidate();
|
||||
|
@ -2251,10 +2259,13 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
|
|||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class ReassociateLegacyPass : public FunctionPass {
|
||||
ReassociatePass Impl;
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
|
||||
ReassociateLegacyPass() : FunctionPass(ID) {
|
||||
initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
@ -2273,9 +2284,11 @@ namespace {
|
|||
AU.addPreserved<GlobalsAAWrapperPass>();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char ReassociateLegacyPass::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
|
||||
"Reassociate expressions", false, false)
|
||||
|
||||
|
|
Loading…
Reference in New Issue