forked from OSchip/llvm-project
[LSR] Allow giving priority to post-incrementing addressing modes
Implement TTI interface for targets to indicate that the LSR should give priority to post-incrementing addressing modes. Combination of patches by Sebastian Pop and Brendon Cahoon. Differential Revision: https://reviews.llvm.org/D44758 llvm-svn: 328490
This commit is contained in:
parent
22dccd008e
commit
0b377e0ae9
|
@ -476,6 +476,10 @@ public:
|
|||
/// calculation for the instructions in a loop.
|
||||
bool canMacroFuseCmp() const;
|
||||
|
||||
/// \return True is LSR should make efforts to create/preserve post-inc
|
||||
/// addressing mode expressions.
|
||||
bool shouldFavorPostInc() const;
|
||||
|
||||
/// \brief Return true if the target supports masked load/store
|
||||
/// AVX2 and AVX-512 targets allow masks for consecutive load and store
|
||||
bool isLegalMaskedStore(Type *DataType) const;
|
||||
|
@ -886,6 +890,21 @@ public:
|
|||
bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const;
|
||||
|
||||
/// \brief The type of load/store indexing.
|
||||
enum MemIndexedMode {
|
||||
MIM_Unindexed, ///< No indexing.
|
||||
MIM_PreInc, ///< Pre-incrementing.
|
||||
MIM_PreDec, ///< Pre-decrementing.
|
||||
MIM_PostInc, ///< Post-incrementing.
|
||||
MIM_PostDec ///< Post-decrementing.
|
||||
};
|
||||
|
||||
/// \returns True if the specified indexed load for the given type is legal.
|
||||
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
|
||||
|
||||
/// \returns True if the specified indexed store for the given type is legal.
|
||||
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
|
||||
|
||||
/// \returns The bitwidth of the largest vector type that should be used to
|
||||
/// load/store in the given address space.
|
||||
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
|
||||
|
@ -994,6 +1013,7 @@ public:
|
|||
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) = 0;
|
||||
virtual bool canMacroFuseCmp() = 0;
|
||||
virtual bool shouldFavorPostInc() const = 0;
|
||||
virtual bool isLegalMaskedStore(Type *DataType) = 0;
|
||||
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
|
||||
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
|
||||
|
@ -1109,6 +1129,8 @@ public:
|
|||
unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
|
||||
virtual bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const = 0;
|
||||
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
|
||||
virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
|
||||
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
|
||||
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
|
||||
virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
|
||||
|
@ -1216,6 +1238,9 @@ public:
|
|||
bool canMacroFuseCmp() override {
|
||||
return Impl.canMacroFuseCmp();
|
||||
}
|
||||
bool shouldFavorPostInc() const override {
|
||||
return Impl.shouldFavorPostInc();
|
||||
}
|
||||
bool isLegalMaskedStore(Type *DataType) override {
|
||||
return Impl.isLegalMaskedStore(DataType);
|
||||
}
|
||||
|
@ -1470,6 +1495,12 @@ public:
|
|||
const Function *Callee) const override {
|
||||
return Impl.areInlineCompatible(Caller, Callee);
|
||||
}
|
||||
bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
|
||||
return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
|
||||
}
|
||||
bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
|
||||
return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
|
||||
}
|
||||
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
|
||||
return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
|
||||
}
|
||||
|
|
|
@ -248,6 +248,8 @@ public:
|
|||
|
||||
bool canMacroFuseCmp() { return false; }
|
||||
|
||||
bool shouldFavorPostInc() const { return false; }
|
||||
|
||||
bool isLegalMaskedStore(Type *DataType) { return false; }
|
||||
|
||||
bool isLegalMaskedLoad(Type *DataType) { return false; }
|
||||
|
@ -511,6 +513,16 @@ public:
|
|||
Callee->getFnAttribute("target-features"));
|
||||
}
|
||||
|
||||
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
|
||||
const DataLayout &DL) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
|
||||
const DataLayout &DL) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
|
||||
|
||||
bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
|
||||
|
|
|
@ -111,6 +111,22 @@ private:
|
|||
return static_cast<const T *>(this)->getTLI();
|
||||
}
|
||||
|
||||
static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
|
||||
switch (M) {
|
||||
case TTI::MIM_Unindexed:
|
||||
return ISD::UNINDEXED;
|
||||
case TTI::MIM_PreInc:
|
||||
return ISD::PRE_INC;
|
||||
case TTI::MIM_PreDec:
|
||||
return ISD::PRE_DEC;
|
||||
case TTI::MIM_PostInc:
|
||||
return ISD::POST_INC;
|
||||
case TTI::MIM_PostDec:
|
||||
return ISD::POST_DEC;
|
||||
}
|
||||
llvm_unreachable("Unexpected MemIndexedMode");
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
|
||||
: BaseT(DL) {}
|
||||
|
@ -157,6 +173,18 @@ public:
|
|||
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
|
||||
}
|
||||
|
||||
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
|
||||
const DataLayout &DL) const {
|
||||
EVT VT = getTLI()->getValueType(DL, Ty);
|
||||
return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
|
||||
}
|
||||
|
||||
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
|
||||
const DataLayout &DL) const {
|
||||
EVT VT = getTLI()->getValueType(DL, Ty);
|
||||
return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
|
||||
}
|
||||
|
||||
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
|
||||
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
|
||||
}
|
||||
|
|
|
@ -159,6 +159,10 @@ bool TargetTransformInfo::canMacroFuseCmp() const {
|
|||
return TTIImpl->canMacroFuseCmp();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::shouldFavorPostInc() const {
|
||||
return TTIImpl->shouldFavorPostInc();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
|
||||
return TTIImpl->isLegalMaskedStore(DataType);
|
||||
}
|
||||
|
@ -555,6 +559,16 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
|
|||
return TTIImpl->areInlineCompatible(Caller, Callee);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
|
||||
Type *Ty) const {
|
||||
return TTIImpl->isIndexedLoadLegal(Mode, Ty);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
|
||||
Type *Ty) const {
|
||||
return TTIImpl->isIndexedStoreLegal(Mode, Ty);
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
|
||||
return TTIImpl->getLoadStoreVecRegBitWidth(AS);
|
||||
}
|
||||
|
|
|
@ -185,6 +185,8 @@ struct MemAccessTy {
|
|||
unsigned AS = UnknownAddressSpace) {
|
||||
return MemAccessTy(Type::getVoidTy(Ctx), AS);
|
||||
}
|
||||
|
||||
Type *getType() { return MemTy; }
|
||||
};
|
||||
|
||||
/// This class holds data which is used to order reuse candidates.
|
||||
|
@ -1040,12 +1042,14 @@ private:
|
|||
void RateRegister(const SCEV *Reg,
|
||||
SmallPtrSetImpl<const SCEV *> &Regs,
|
||||
const Loop *L,
|
||||
ScalarEvolution &SE, DominatorTree &DT);
|
||||
ScalarEvolution &SE, DominatorTree &DT,
|
||||
const TargetTransformInfo &TTI);
|
||||
void RatePrimaryRegister(const SCEV *Reg,
|
||||
SmallPtrSetImpl<const SCEV *> &Regs,
|
||||
const Loop *L,
|
||||
ScalarEvolution &SE, DominatorTree &DT,
|
||||
SmallPtrSetImpl<const SCEV *> *LoserRegs);
|
||||
SmallPtrSetImpl<const SCEV *> *LoserRegs,
|
||||
const TargetTransformInfo &TTI);
|
||||
};
|
||||
|
||||
/// An operand value in an instruction which is to be replaced with some
|
||||
|
@ -1194,7 +1198,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
|||
void Cost::RateRegister(const SCEV *Reg,
|
||||
SmallPtrSetImpl<const SCEV *> &Regs,
|
||||
const Loop *L,
|
||||
ScalarEvolution &SE, DominatorTree &DT) {
|
||||
ScalarEvolution &SE, DominatorTree &DT,
|
||||
const TargetTransformInfo &TTI) {
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
|
||||
// If this is an addrec for another loop, it should be an invariant
|
||||
// with respect to L since L is the innermost loop (at least
|
||||
|
@ -1215,13 +1220,28 @@ void Cost::RateRegister(const SCEV *Reg,
|
|||
++C.NumRegs;
|
||||
return;
|
||||
}
|
||||
C.AddRecCost += 1; /// TODO: This should be a function of the stride.
|
||||
|
||||
unsigned LoopCost = 1;
|
||||
if (TTI.shouldFavorPostInc()) {
|
||||
const SCEV *LoopStep = AR->getStepRecurrence(SE);
|
||||
if (isa<SCEVConstant>(LoopStep)) {
|
||||
// Check if a post-indexed load/store can be used.
|
||||
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
|
||||
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
|
||||
const SCEV *LoopStart = AR->getStart();
|
||||
if (!isa<SCEVConstant>(LoopStart) &&
|
||||
SE.isLoopInvariant(LoopStart, L))
|
||||
LoopCost = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
C.AddRecCost += LoopCost;
|
||||
|
||||
// Add the step value register, if it needs one.
|
||||
// TODO: The non-affine case isn't precisely modeled here.
|
||||
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
|
||||
if (!Regs.count(AR->getOperand(1))) {
|
||||
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
|
||||
RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
|
||||
if (isLoser())
|
||||
return;
|
||||
}
|
||||
|
@ -1249,13 +1269,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
|
|||
SmallPtrSetImpl<const SCEV *> &Regs,
|
||||
const Loop *L,
|
||||
ScalarEvolution &SE, DominatorTree &DT,
|
||||
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
|
||||
SmallPtrSetImpl<const SCEV *> *LoserRegs,
|
||||
const TargetTransformInfo &TTI) {
|
||||
if (LoserRegs && LoserRegs->count(Reg)) {
|
||||
Lose();
|
||||
return;
|
||||
}
|
||||
if (Regs.insert(Reg).second) {
|
||||
RateRegister(Reg, Regs, L, SE, DT);
|
||||
RateRegister(Reg, Regs, L, SE, DT, TTI);
|
||||
if (LoserRegs && isLoser())
|
||||
LoserRegs->insert(Reg);
|
||||
}
|
||||
|
@ -1279,7 +1300,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
|||
Lose();
|
||||
return;
|
||||
}
|
||||
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
|
||||
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
|
||||
if (isLoser())
|
||||
return;
|
||||
}
|
||||
|
@ -1288,7 +1309,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
|||
Lose();
|
||||
return;
|
||||
}
|
||||
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
|
||||
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
|
||||
if (isLoser())
|
||||
return;
|
||||
}
|
||||
|
@ -3465,12 +3486,45 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
|
|||
return S;
|
||||
}
|
||||
|
||||
/// Return true if the SCEV represents a value that may end up as a
|
||||
/// post-increment operation.
|
||||
static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
|
||||
LSRUse &LU, const SCEV *S, const Loop *L,
|
||||
ScalarEvolution &SE) {
|
||||
if (LU.Kind != LSRUse::Address ||
|
||||
!LU.AccessTy.getType()->isIntOrIntVectorTy())
|
||||
return false;
|
||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
|
||||
if (!AR)
|
||||
return false;
|
||||
const SCEV *LoopStep = AR->getStepRecurrence(SE);
|
||||
if (!isa<SCEVConstant>(LoopStep))
|
||||
return false;
|
||||
if (LU.AccessTy.getType()->getScalarSizeInBits() !=
|
||||
LoopStep->getType()->getScalarSizeInBits())
|
||||
return false;
|
||||
// Check if a post-indexed load/store can be used.
|
||||
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
|
||||
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
|
||||
const SCEV *LoopStart = AR->getStart();
|
||||
if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Helper function for LSRInstance::GenerateReassociations.
|
||||
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
|
||||
const Formula &Base,
|
||||
unsigned Depth, size_t Idx,
|
||||
bool IsScaledReg) {
|
||||
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
|
||||
// Don't generate reassociations for the base register of a value that
|
||||
// may generate a post-increment operator. The reason is that the
|
||||
// reassociations cause extra base+register formula to be created,
|
||||
// and possibly chosen, but the post-increment is more efficient.
|
||||
if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
|
||||
return;
|
||||
SmallVector<const SCEV *, 8> AddOps;
|
||||
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
|
||||
if (Remainder)
|
||||
|
@ -4039,6 +4093,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
|||
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
|
||||
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
|
||||
LU.Kind, LU.AccessTy, NewF)) {
|
||||
if (TTI.shouldFavorPostInc() &&
|
||||
mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
|
||||
continue;
|
||||
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
|
||||
continue;
|
||||
NewF = F;
|
||||
|
|
Loading…
Reference in New Issue