forked from OSchip/llvm-project
[Alignment][NFC] getMemoryOpCost uses MaybeAlign
Summary: This is patch is part of a series to introduce an Alignment type. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html See this patch for the introduction of the type: https://reviews.llvm.org/D64790 Reviewers: courbet Subscribers: nemanjai, hiraditya, kbarton, MaskRay, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69307
This commit is contained in:
parent
3c7c371793
commit
a4783ef58d
|
@ -930,8 +930,9 @@ public:
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
|
||||||
|
|
||||||
/// \return The cost of Load and Store instructions.
|
/// \return The cost of Load and Store instructions.
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr) const;
|
unsigned AddressSpace,
|
||||||
|
const Instruction *I = nullptr) const;
|
||||||
|
|
||||||
/// \return The cost of masked Load and Store instructions.
|
/// \return The cost of masked Load and Store instructions.
|
||||||
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||||
|
@ -1305,7 +1306,7 @@ public:
|
||||||
Type *CondTy, const Instruction *I) = 0;
|
Type *CondTy, const Instruction *I) = 0;
|
||||||
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
|
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
unsigned Index) = 0;
|
unsigned Index) = 0;
|
||||||
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I) = 0;
|
unsigned AddressSpace, const Instruction *I) = 0;
|
||||||
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned Alignment,
|
unsigned Alignment,
|
||||||
|
@ -1711,7 +1712,7 @@ public:
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
|
||||||
return Impl.getVectorInstrCost(Opcode, Val, Index);
|
return Impl.getVectorInstrCost(Opcode, Val, Index);
|
||||||
}
|
}
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I) override {
|
unsigned AddressSpace, const Instruction *I) override {
|
||||||
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
||||||
}
|
}
|
||||||
|
|
|
@ -447,7 +447,7 @@ public:
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I) {
|
unsigned AddressSpace, const Instruction *I) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -869,8 +869,9 @@ public:
|
||||||
return LT.first;
|
return LT.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr) {
|
unsigned AddressSpace,
|
||||||
|
const Instruction *I = nullptr) {
|
||||||
assert(!Src->isVoidTy() && "Invalid type");
|
assert(!Src->isVoidTy() && "Invalid type");
|
||||||
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
|
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
|
||||||
|
|
||||||
|
@ -921,8 +922,8 @@ public:
|
||||||
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
|
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
|
||||||
Opcode, VecTy, Alignment, AddressSpace);
|
Opcode, VecTy, Alignment, AddressSpace);
|
||||||
else
|
else
|
||||||
Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
|
Cost = static_cast<T *>(this)->getMemoryOpCost(
|
||||||
AddressSpace);
|
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
|
||||||
|
|
||||||
// Legalize the vector type, and get the legalized and unlegalized type
|
// Legalize the vector type, and get the legalized and unlegalized type
|
||||||
// sizes.
|
// sizes.
|
||||||
|
|
|
@ -639,7 +639,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
}
|
}
|
||||||
|
|
||||||
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned Alignment,
|
MaybeAlign Alignment,
|
||||||
unsigned AddressSpace,
|
unsigned AddressSpace,
|
||||||
const Instruction *I) const {
|
const Instruction *I) const {
|
||||||
assert ((I == nullptr || I->getOpcode() == Opcode) &&
|
assert ((I == nullptr || I->getOpcode() == Opcode) &&
|
||||||
|
@ -1201,13 +1201,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
|
||||||
const StoreInst *SI = cast<StoreInst>(I);
|
const StoreInst *SI = cast<StoreInst>(I);
|
||||||
Type *ValTy = SI->getValueOperand()->getType();
|
Type *ValTy = SI->getValueOperand()->getType();
|
||||||
return getMemoryOpCost(I->getOpcode(), ValTy,
|
return getMemoryOpCost(I->getOpcode(), ValTy,
|
||||||
SI->getAlignment(),
|
MaybeAlign(SI->getAlignment()),
|
||||||
SI->getPointerAddressSpace(), I);
|
SI->getPointerAddressSpace(), I);
|
||||||
}
|
}
|
||||||
case Instruction::Load: {
|
case Instruction::Load: {
|
||||||
const LoadInst *LI = cast<LoadInst>(I);
|
const LoadInst *LI = cast<LoadInst>(I);
|
||||||
return getMemoryOpCost(I->getOpcode(), I->getType(),
|
return getMemoryOpCost(I->getOpcode(), I->getType(),
|
||||||
LI->getAlignment(),
|
MaybeAlign(LI->getAlignment()),
|
||||||
LI->getPointerAddressSpace(), I);
|
LI->getPointerAddressSpace(), I);
|
||||||
}
|
}
|
||||||
case Instruction::ZExt:
|
case Instruction::ZExt:
|
||||||
|
|
|
@ -632,12 +632,12 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
||||||
unsigned Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
const Instruction *I) {
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
|
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||||
|
|
||||||
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
|
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
|
||||||
LT.second.is128BitVector() && Alignment < 16) {
|
LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
|
||||||
// Unaligned stores are extremely inefficient. We don't split all
|
// Unaligned stores are extremely inefficient. We don't split all
|
||||||
// unaligned 128-bit stores because the negative impact that has shown in
|
// unaligned 128-bit stores because the negative impact that has shown in
|
||||||
// practice on inlined block copy code.
|
// practice on inlined block copy code.
|
||||||
|
@ -703,8 +703,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
|
||||||
if (!I->isVectorTy())
|
if (!I->isVectorTy())
|
||||||
continue;
|
continue;
|
||||||
if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
|
if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
|
||||||
Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
|
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
|
||||||
getMemoryOpCost(Instruction::Load, I, 128, 0);
|
getMemoryOpCost(Instruction::Load, I, Align(128), 0);
|
||||||
}
|
}
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,7 +134,7 @@ public:
|
||||||
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
||||||
bool IsZeroCmp) const;
|
bool IsZeroCmp) const;
|
||||||
|
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||||
|
|
||||||
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
|
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
|
||||||
|
|
|
@ -735,11 +735,13 @@ int ARMTTIImpl::getArithmeticInstrCost(
|
||||||
return BaseCost;
|
return BaseCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace, const Instruction *I) {
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
|
const Instruction *I) {
|
||||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||||
|
|
||||||
if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 &&
|
if (ST->hasNEON() && Src->isVectorTy() &&
|
||||||
|
(Alignment && *Alignment != Align(16)) &&
|
||||||
Src->getVectorElementType()->isDoubleTy()) {
|
Src->getVectorElementType()->isDoubleTy()) {
|
||||||
// Unaligned loads/stores are extremely inefficient.
|
// Unaligned loads/stores are extremely inefficient.
|
||||||
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
|
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
|
||||||
|
|
|
@ -189,7 +189,7 @@ public:
|
||||||
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
||||||
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
|
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
|
||||||
|
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||||
|
|
||||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
|
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
|
||||||
|
|
|
@ -152,7 +152,9 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
|
MaybeAlign Alignment,
|
||||||
|
unsigned AddressSpace,
|
||||||
|
const Instruction *I) {
|
||||||
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
|
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
|
||||||
if (Opcode == Instruction::Store)
|
if (Opcode == Instruction::Store)
|
||||||
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
||||||
|
@ -166,24 +168,30 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
// Cost of HVX loads.
|
// Cost of HVX loads.
|
||||||
if (VecWidth % RegWidth == 0)
|
if (VecWidth % RegWidth == 0)
|
||||||
return VecWidth / RegWidth;
|
return VecWidth / RegWidth;
|
||||||
// Cost of constructing HVX vector from scalar loads.
|
// Cost of constructing HVX vector from scalar loads
|
||||||
Alignment = std::min(Alignment, RegWidth / 8);
|
const Align RegAlign(RegWidth / 8);
|
||||||
unsigned AlignWidth = 8 * std::max(1u, Alignment);
|
if (!Alignment || *Alignment > RegAlign)
|
||||||
|
Alignment = RegAlign;
|
||||||
|
assert(Alignment);
|
||||||
|
unsigned AlignWidth = 8 * Alignment->value();
|
||||||
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
||||||
return 3 * NumLoads;
|
return 3 * NumLoads;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Non-HVX vectors.
|
// Non-HVX vectors.
|
||||||
// Add extra cost for floating point types.
|
// Add extra cost for floating point types.
|
||||||
unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
|
unsigned Cost =
|
||||||
: 1;
|
VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
|
||||||
Alignment = std::min(Alignment, 8u);
|
|
||||||
unsigned AlignWidth = 8 * std::max(1u, Alignment);
|
// At this point unspecified alignment is considered as Align::None().
|
||||||
|
const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
|
||||||
|
unsigned AlignWidth = 8 * BoundAlignment.value();
|
||||||
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
||||||
if (Alignment == 4 || Alignment == 8)
|
if (Alignment == Align(4) || Alignment == Align(8))
|
||||||
return Cost * NumLoads;
|
return Cost * NumLoads;
|
||||||
// Loads of less than 32 bits will need extra inserts to compose a vector.
|
// Loads of less than 32 bits will need extra inserts to compose a vector.
|
||||||
unsigned LogA = Log2_32(Alignment);
|
assert(BoundAlignment <= Align(8));
|
||||||
|
unsigned LogA = Log2(BoundAlignment);
|
||||||
return (3 - LogA) * Cost * NumLoads;
|
return (3 - LogA) * Cost * NumLoads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,7 +222,8 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
|
||||||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||||
Alignment, AddressSpace,
|
Alignment, AddressSpace,
|
||||||
UseMaskForCond, UseMaskForGaps);
|
UseMaskForCond, UseMaskForGaps);
|
||||||
return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
|
return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
|
||||||
|
nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
|
|
|
@ -112,8 +112,9 @@ public:
|
||||||
unsigned ScalarizationCostPassed = UINT_MAX);
|
unsigned ScalarizationCostPassed = UINT_MAX);
|
||||||
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
|
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
|
||||||
const SCEV *S);
|
const SCEV *S);
|
||||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
unsigned AddressSpace,
|
||||||
|
const Instruction *I = nullptr);
|
||||||
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||||
unsigned AddressSpace);
|
unsigned AddressSpace);
|
||||||
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||||
|
|
|
@ -829,8 +829,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace, const Instruction *I) {
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
|
const Instruction *I) {
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||||
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
||||||
|
@ -888,7 +889,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||||
// to be decomposed based on the alignment factor.
|
// to be decomposed based on the alignment factor.
|
||||||
|
|
||||||
// Add the cost of each scalar load or store.
|
// Add the cost of each scalar load or store.
|
||||||
Cost += LT.first*(SrcBytes/Alignment-1);
|
assert(Alignment);
|
||||||
|
Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
|
||||||
|
|
||||||
// For a vector type, there is also scalarization overhead (only for
|
// For a vector type, there is also scalarization overhead (only for
|
||||||
// stores, loads are expanded using the vector-load + permutation sequence,
|
// stores, loads are expanded using the vector-load + permutation sequence,
|
||||||
|
@ -919,7 +921,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
||||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
||||||
|
|
||||||
// Firstly, the cost of load/store operation.
|
// Firstly, the cost of load/store operation.
|
||||||
int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
|
int Cost =
|
||||||
|
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
|
||||||
|
|
||||||
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
|
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
|
||||||
// (at least in the sense that there need only be one non-loop-invariant
|
// (at least in the sense that there need only be one non-loop-invariant
|
||||||
|
|
|
@ -97,7 +97,7 @@ public:
|
||||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
||||||
unsigned Factor,
|
unsigned Factor,
|
||||||
|
|
|
@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||||
}
|
}
|
||||||
if (isa<StoreInst>(&I)) {
|
if (isa<StoreInst>(&I)) {
|
||||||
Type *MemAccessTy = I.getOperand(0)->getType();
|
Type *MemAccessTy = I.getOperand(0)->getType();
|
||||||
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
|
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -995,7 +995,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned Alignment, unsigned AddressSpace,
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
const Instruction *I) {
|
const Instruction *I) {
|
||||||
assert(!Src->isVoidTy() && "Invalid type");
|
assert(!Src->isVoidTy() && "Invalid type");
|
||||||
|
|
||||||
|
|
|
@ -87,7 +87,7 @@ public:
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
|
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||||
|
|
||||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
||||||
|
|
|
@ -2404,8 +2404,9 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
||||||
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
|
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
unsigned AddressSpace, const Instruction *I) {
|
MaybeAlign Alignment, unsigned AddressSpace,
|
||||||
|
const Instruction *I) {
|
||||||
// Handle non-power-of-two vectors such as <3 x float>
|
// Handle non-power-of-two vectors such as <3 x float>
|
||||||
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
|
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
|
||||||
unsigned NumElem = VTy->getVectorNumElements();
|
unsigned NumElem = VTy->getVectorNumElements();
|
||||||
|
@ -2456,7 +2457,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
||||||
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
|
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
|
||||||
if (!SrcVTy)
|
if (!SrcVTy)
|
||||||
// To calculate scalar take the regular cost, without mask
|
// To calculate scalar take the regular cost, without mask
|
||||||
return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
|
return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
|
||||||
|
|
||||||
unsigned NumElem = SrcVTy->getVectorNumElements();
|
unsigned NumElem = SrcVTy->getVectorNumElements();
|
||||||
VectorType *MaskTy =
|
VectorType *MaskTy =
|
||||||
|
@ -2474,7 +2475,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
||||||
int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
|
int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
|
||||||
int MemopCost =
|
int MemopCost =
|
||||||
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||||
Alignment, AddressSpace);
|
MaybeAlign(Alignment), AddressSpace);
|
||||||
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
|
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3164,7 +3165,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
||||||
? ST->getGatherOverhead()
|
? ST->getGatherOverhead()
|
||||||
: ST->getScatterOverhead();
|
: ST->getScatterOverhead();
|
||||||
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||||
Alignment, AddressSpace);
|
MaybeAlign(Alignment), AddressSpace);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the cost of full scalarization of gather / scatter operation.
|
/// Return the cost of full scalarization of gather / scatter operation.
|
||||||
|
@ -3194,7 +3195,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
|
||||||
|
|
||||||
// The cost of the scalar loads/stores.
|
// The cost of the scalar loads/stores.
|
||||||
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||||
Alignment, AddressSpace);
|
MaybeAlign(Alignment), AddressSpace);
|
||||||
|
|
||||||
int InsertExtractCost = 0;
|
int InsertExtractCost = 0;
|
||||||
if (Opcode == Instruction::Load)
|
if (Opcode == Instruction::Load)
|
||||||
|
@ -3520,8 +3521,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
|
||||||
// Get the cost of one memory operation.
|
// Get the cost of one memory operation.
|
||||||
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
||||||
LegalVT.getVectorNumElements());
|
LegalVT.getVectorNumElements());
|
||||||
unsigned MemOpCost =
|
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||||
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
|
MaybeAlign(Alignment), AddressSpace);
|
||||||
|
|
||||||
VectorType *VT = VectorType::get(ScalarTy, VF);
|
VectorType *VT = VectorType::get(ScalarTy, VF);
|
||||||
EVT ETy = TLI->getValueType(DL, VT);
|
EVT ETy = TLI->getValueType(DL, VT);
|
||||||
|
@ -3620,8 +3621,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
|
||||||
// Get the cost of one memory operation.
|
// Get the cost of one memory operation.
|
||||||
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
||||||
LegalVT.getVectorNumElements());
|
LegalVT.getVectorNumElements());
|
||||||
unsigned MemOpCost =
|
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||||
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
|
MaybeAlign(Alignment), AddressSpace);
|
||||||
|
|
||||||
unsigned VF = VecTy->getVectorNumElements() / Factor;
|
unsigned VF = VecTy->getVectorNumElements() / Factor;
|
||||||
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
|
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
|
||||||
|
|
|
@ -133,7 +133,7 @@ public:
|
||||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||||
const Instruction *I = nullptr);
|
const Instruction *I = nullptr);
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||||
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||||
unsigned AddressSpace);
|
unsigned AddressSpace);
|
||||||
|
|
|
@ -5746,7 +5746,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
||||||
// vectorized loop where the user of it is a vectorized instruction.
|
// vectorized loop where the user of it is a vectorized instruction.
|
||||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
||||||
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
||||||
Alignment ? Alignment->value() : 0, AS);
|
Alignment, AS);
|
||||||
|
|
||||||
// Get the overhead of the extractelement and insertelement instructions
|
// Get the overhead of the extractelement and insertelement instructions
|
||||||
// we might create due to scalarization.
|
// we might create due to scalarization.
|
||||||
|
@ -5783,8 +5783,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
|
||||||
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
|
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
|
||||||
Alignment ? Alignment->value() : 0, AS);
|
Alignment ? Alignment->value() : 0, AS);
|
||||||
else
|
else
|
||||||
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
|
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
|
||||||
Alignment ? Alignment->value() : 0, AS, I);
|
|
||||||
|
|
||||||
bool Reverse = ConsecutiveStride < 0;
|
bool Reverse = ConsecutiveStride < 0;
|
||||||
if (Reverse)
|
if (Reverse)
|
||||||
|
@ -5800,16 +5799,14 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
|
||||||
unsigned AS = getLoadStoreAddressSpace(I);
|
unsigned AS = getLoadStoreAddressSpace(I);
|
||||||
if (isa<LoadInst>(I)) {
|
if (isa<LoadInst>(I)) {
|
||||||
return TTI.getAddressComputationCost(ValTy) +
|
return TTI.getAddressComputationCost(ValTy) +
|
||||||
TTI.getMemoryOpCost(Instruction::Load, ValTy,
|
TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
|
||||||
Alignment ? Alignment->value() : 0, AS) +
|
|
||||||
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
|
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
|
||||||
}
|
}
|
||||||
StoreInst *SI = cast<StoreInst>(I);
|
StoreInst *SI = cast<StoreInst>(I);
|
||||||
|
|
||||||
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
|
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
|
||||||
return TTI.getAddressComputationCost(ValTy) +
|
return TTI.getAddressComputationCost(ValTy) +
|
||||||
TTI.getMemoryOpCost(Instruction::Store, ValTy,
|
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
|
||||||
Alignment ? Alignment->value() : 0, AS) +
|
|
||||||
(isLoopInvariantStoreValue
|
(isLoopInvariantStoreValue
|
||||||
? 0
|
? 0
|
||||||
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
|
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
|
||||||
|
@ -5877,8 +5874,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
|
||||||
unsigned AS = getLoadStoreAddressSpace(I);
|
unsigned AS = getLoadStoreAddressSpace(I);
|
||||||
|
|
||||||
return TTI.getAddressComputationCost(ValTy) +
|
return TTI.getAddressComputationCost(ValTy) +
|
||||||
TTI.getMemoryOpCost(I->getOpcode(), ValTy,
|
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
|
||||||
Alignment ? Alignment->value() : 0, AS, I);
|
|
||||||
}
|
}
|
||||||
return getWideningCost(I, VF);
|
return getWideningCost(I, VF);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3162,7 +3162,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||||
}
|
}
|
||||||
case Instruction::Load: {
|
case Instruction::Load: {
|
||||||
// Cost of wide load - cost of scalar loads.
|
// Cost of wide load - cost of scalar loads.
|
||||||
unsigned alignment = cast<LoadInst>(VL0)->getAlignment();
|
MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
|
||||||
int ScalarEltCost =
|
int ScalarEltCost =
|
||||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
|
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
|
||||||
if (NeedToShuffleReuses) {
|
if (NeedToShuffleReuses) {
|
||||||
|
@ -3180,7 +3180,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||||
}
|
}
|
||||||
case Instruction::Store: {
|
case Instruction::Store: {
|
||||||
// We know that we can merge the stores. Calculate the cost.
|
// We know that we can merge the stores. Calculate the cost.
|
||||||
unsigned alignment = cast<StoreInst>(VL0)->getAlignment();
|
MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment());
|
||||||
int ScalarEltCost =
|
int ScalarEltCost =
|
||||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
|
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
|
||||||
if (NeedToShuffleReuses) {
|
if (NeedToShuffleReuses) {
|
||||||
|
|
Loading…
Reference in New Issue