[LSR] Allow giving priority to post-incrementing addressing modes

Implement TTI interface for targets to indicate that the LSR should give
priority to post-incrementing addressing modes.

Combination of patches by Sebastian Pop and Brendon Cahoon.

Differential Revision: https://reviews.llvm.org/D44758

llvm-svn: 328490
This commit is contained in:
Krzysztof Parzyszek 2018-03-26 13:10:09 +00:00
parent 22dccd008e
commit 0b377e0ae9
5 changed files with 151 additions and 9 deletions

View File

@ -476,6 +476,10 @@ public:
/// calculation for the instructions in a loop.
bool canMacroFuseCmp() const;
/// \return True is LSR should make efforts to create/preserve post-inc
/// addressing mode expressions.
bool shouldFavorPostInc() const;
/// \brief Return true if the target supports masked load/store
/// AVX2 and AVX-512 targets allow masks for consecutive load and store
bool isLegalMaskedStore(Type *DataType) const;
@ -886,6 +890,21 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
/// \brief The type of load/store indexing.
enum MemIndexedMode {
MIM_Unindexed, ///< No indexing.
MIM_PreInc, ///< Pre-incrementing.
MIM_PreDec, ///< Pre-decrementing.
MIM_PostInc, ///< Post-incrementing.
MIM_PostDec ///< Post-decrementing.
};
/// \returns True if the specified indexed load for the given type is legal.
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
/// \returns True if the specified indexed store for the given type is legal.
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
/// \returns The bitwidth of the largest vector type that should be used to
/// load/store in the given address space.
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
@ -994,6 +1013,7 @@ public:
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool canMacroFuseCmp() = 0;
virtual bool shouldFavorPostInc() const = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
@ -1109,6 +1129,8 @@ public:
unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
@ -1216,6 +1238,9 @@ public:
bool canMacroFuseCmp() override {
return Impl.canMacroFuseCmp();
}
bool shouldFavorPostInc() const override {
return Impl.shouldFavorPostInc();
}
bool isLegalMaskedStore(Type *DataType) override {
return Impl.isLegalMaskedStore(DataType);
}
@ -1470,6 +1495,12 @@ public:
const Function *Callee) const override {
return Impl.areInlineCompatible(Caller, Callee);
}
bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
}
bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
}
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
}

View File

@ -248,6 +248,8 @@ public:
bool canMacroFuseCmp() { return false; }
bool shouldFavorPostInc() const { return false; }
bool isLegalMaskedStore(Type *DataType) { return false; }
bool isLegalMaskedLoad(Type *DataType) { return false; }
@ -511,6 +513,16 @@ public:
Callee->getFnAttribute("target-features"));
}
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
const DataLayout &DL) const {
return false;
}
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
const DataLayout &DL) const {
return false;
}
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }

View File

@ -111,6 +111,22 @@ private:
return static_cast<const T *>(this)->getTLI();
}
static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
switch (M) {
case TTI::MIM_Unindexed:
return ISD::UNINDEXED;
case TTI::MIM_PreInc:
return ISD::PRE_INC;
case TTI::MIM_PreDec:
return ISD::PRE_DEC;
case TTI::MIM_PostInc:
return ISD::POST_INC;
case TTI::MIM_PostDec:
return ISD::POST_DEC;
}
llvm_unreachable("Unexpected MemIndexedMode");
}
protected:
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
: BaseT(DL) {}
@ -157,6 +173,18 @@ public:
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
const DataLayout &DL) const {
EVT VT = getTLI()->getValueType(DL, Ty);
return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
}
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
const DataLayout &DL) const {
EVT VT = getTLI()->getValueType(DL, Ty);
return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
}
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}

View File

@ -159,6 +159,10 @@ bool TargetTransformInfo::canMacroFuseCmp() const {
return TTIImpl->canMacroFuseCmp();
}
bool TargetTransformInfo::shouldFavorPostInc() const {
return TTIImpl->shouldFavorPostInc();
}
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
return TTIImpl->isLegalMaskedStore(DataType);
}
@ -555,6 +559,16 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
Type *Ty) const {
return TTIImpl->isIndexedLoadLegal(Mode, Ty);
}
bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
Type *Ty) const {
return TTIImpl->isIndexedStoreLegal(Mode, Ty);
}
unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
return TTIImpl->getLoadStoreVecRegBitWidth(AS);
}

View File

@ -185,6 +185,8 @@ struct MemAccessTy {
unsigned AS = UnknownAddressSpace) {
return MemAccessTy(Type::getVoidTy(Ctx), AS);
}
Type *getType() { return MemTy; }
};
/// This class holds data which is used to order reuse candidates.
@ -1040,12 +1042,14 @@ private:
void RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
ScalarEvolution &SE, DominatorTree &DT,
const TargetTransformInfo &TTI);
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSetImpl<const SCEV *> *LoserRegs);
SmallPtrSetImpl<const SCEV *> *LoserRegs,
const TargetTransformInfo &TTI);
};
/// An operand value in an instruction which is to be replaced with some
@ -1194,7 +1198,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
ScalarEvolution &SE, DominatorTree &DT,
const TargetTransformInfo &TTI) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
// If this is an addrec for another loop, it should be an invariant
// with respect to L since L is the innermost loop (at least
@ -1215,13 +1220,28 @@ void Cost::RateRegister(const SCEV *Reg,
++C.NumRegs;
return;
}
C.AddRecCost += 1; /// TODO: This should be a function of the stride.
unsigned LoopCost = 1;
if (TTI.shouldFavorPostInc()) {
const SCEV *LoopStep = AR->getStepRecurrence(SE);
if (isa<SCEVConstant>(LoopStep)) {
// Check if a post-indexed load/store can be used.
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
const SCEV *LoopStart = AR->getStart();
if (!isa<SCEVConstant>(LoopStart) &&
SE.isLoopInvariant(LoopStart, L))
LoopCost = 0;
}
}
}
C.AddRecCost += LoopCost;
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
if (isLoser())
return;
}
@ -1249,13 +1269,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
SmallPtrSetImpl<const SCEV *> *LoserRegs,
const TargetTransformInfo &TTI) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
if (Regs.insert(Reg).second) {
RateRegister(Reg, Regs, L, SE, DT);
RateRegister(Reg, Regs, L, SE, DT, TTI);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
@ -1279,7 +1300,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
if (isLoser())
return;
}
@ -1288,7 +1309,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
if (isLoser())
return;
}
@ -3465,12 +3486,45 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
return S;
}
/// Return true if the SCEV represents a value that may end up as a
/// post-increment operation.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
LSRUse &LU, const SCEV *S, const Loop *L,
ScalarEvolution &SE) {
if (LU.Kind != LSRUse::Address ||
!LU.AccessTy.getType()->isIntOrIntVectorTy())
return false;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
if (!AR)
return false;
const SCEV *LoopStep = AR->getStepRecurrence(SE);
if (!isa<SCEVConstant>(LoopStep))
return false;
if (LU.AccessTy.getType()->getScalarSizeInBits() !=
LoopStep->getType()->getScalarSizeInBits())
return false;
// Check if a post-indexed load/store can be used.
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
const SCEV *LoopStart = AR->getStart();
if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
return true;
}
return false;
}
/// \brief Helper function for LSRInstance::GenerateReassociations.
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
unsigned Depth, size_t Idx,
bool IsScaledReg) {
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
// Don't generate reassociations for the base register of a value that
// may generate a post-increment operator. The reason is that the
// reassociations cause extra base+register formula to be created,
// and possibly chosen, but the post-increment is more efficient.
if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
return;
SmallVector<const SCEV *, 8> AddOps;
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
if (Remainder)
@ -4039,6 +4093,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
if (TTI.shouldFavorPostInc() &&
mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
continue;
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;