forked from OSchip/llvm-project
[Alignment][NFC] getMemoryOpCost uses MaybeAlign
Summary: This is patch is part of a series to introduce an Alignment type. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html See this patch for the introduction of the type: https://reviews.llvm.org/D64790 Reviewers: courbet Subscribers: nemanjai, hiraditya, kbarton, MaskRay, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69307
This commit is contained in:
parent
3c7c371793
commit
a4783ef58d
|
@ -930,8 +930,9 @@ public:
|
|||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
|
||||
|
||||
/// \return The cost of Load and Store instructions.
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr) const;
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace,
|
||||
const Instruction *I = nullptr) const;
|
||||
|
||||
/// \return The cost of masked Load and Store instructions.
|
||||
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
|
@ -1305,7 +1306,7 @@ public:
|
|||
Type *CondTy, const Instruction *I) = 0;
|
||||
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
unsigned Index) = 0;
|
||||
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I) = 0;
|
||||
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
|
@ -1711,7 +1712,7 @@ public:
|
|||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
|
||||
return Impl.getVectorInstrCost(Opcode, Val, Index);
|
||||
}
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I) override {
|
||||
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
||||
}
|
||||
|
|
|
@ -447,7 +447,7 @@ public:
|
|||
return 1;
|
||||
}
|
||||
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I) {
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -869,8 +869,9 @@ public:
|
|||
return LT.first;
|
||||
}
|
||||
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr) {
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace,
|
||||
const Instruction *I = nullptr) {
|
||||
assert(!Src->isVoidTy() && "Invalid type");
|
||||
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
|
||||
|
||||
|
@ -921,8 +922,8 @@ public:
|
|||
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
|
||||
Opcode, VecTy, Alignment, AddressSpace);
|
||||
else
|
||||
Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
|
||||
AddressSpace);
|
||||
Cost = static_cast<T *>(this)->getMemoryOpCost(
|
||||
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
// Legalize the vector type, and get the legalized and unlegalized type
|
||||
// sizes.
|
||||
|
|
|
@ -639,7 +639,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
|
|||
}
|
||||
|
||||
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
MaybeAlign Alignment,
|
||||
unsigned AddressSpace,
|
||||
const Instruction *I) const {
|
||||
assert ((I == nullptr || I->getOpcode() == Opcode) &&
|
||||
|
@ -1201,14 +1201,14 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
|
|||
const StoreInst *SI = cast<StoreInst>(I);
|
||||
Type *ValTy = SI->getValueOperand()->getType();
|
||||
return getMemoryOpCost(I->getOpcode(), ValTy,
|
||||
SI->getAlignment(),
|
||||
SI->getPointerAddressSpace(), I);
|
||||
MaybeAlign(SI->getAlignment()),
|
||||
SI->getPointerAddressSpace(), I);
|
||||
}
|
||||
case Instruction::Load: {
|
||||
const LoadInst *LI = cast<LoadInst>(I);
|
||||
return getMemoryOpCost(I->getOpcode(), I->getType(),
|
||||
LI->getAlignment(),
|
||||
LI->getPointerAddressSpace(), I);
|
||||
MaybeAlign(LI->getAlignment()),
|
||||
LI->getPointerAddressSpace(), I);
|
||||
}
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
|
|
|
@ -632,12 +632,12 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
|
|||
}
|
||||
|
||||
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
||||
unsigned Alignment, unsigned AddressSpace,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
|
||||
LT.second.is128BitVector() && Alignment < 16) {
|
||||
LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
|
||||
// Unaligned stores are extremely inefficient. We don't split all
|
||||
// unaligned 128-bit stores because the negative impact that has shown in
|
||||
// practice on inlined block copy code.
|
||||
|
@ -703,8 +703,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
|
|||
if (!I->isVectorTy())
|
||||
continue;
|
||||
if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
|
||||
Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
|
||||
getMemoryOpCost(Instruction::Load, I, 128, 0);
|
||||
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
|
||||
getMemoryOpCost(Instruction::Load, I, Align(128), 0);
|
||||
}
|
||||
return Cost;
|
||||
}
|
||||
|
|
|
@ -134,7 +134,7 @@ public:
|
|||
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
||||
bool IsZeroCmp) const;
|
||||
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
|
||||
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
|
||||
|
|
|
@ -735,11 +735,13 @@ int ARMTTIImpl::getArithmeticInstrCost(
|
|||
return BaseCost;
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I) {
|
||||
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
|
||||
if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 &&
|
||||
if (ST->hasNEON() && Src->isVectorTy() &&
|
||||
(Alignment && *Alignment != Align(16)) &&
|
||||
Src->getVectorElementType()->isDoubleTy()) {
|
||||
// Unaligned loads/stores are extremely inefficient.
|
||||
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
|
||||
|
|
|
@ -189,7 +189,7 @@ public:
|
|||
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
||||
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
|
||||
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
|
||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
|
||||
|
|
|
@ -152,7 +152,9 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
|
|||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
|
||||
MaybeAlign Alignment,
|
||||
unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
|
||||
if (Opcode == Instruction::Store)
|
||||
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
||||
|
@ -166,24 +168,30 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||
// Cost of HVX loads.
|
||||
if (VecWidth % RegWidth == 0)
|
||||
return VecWidth / RegWidth;
|
||||
// Cost of constructing HVX vector from scalar loads.
|
||||
Alignment = std::min(Alignment, RegWidth / 8);
|
||||
unsigned AlignWidth = 8 * std::max(1u, Alignment);
|
||||
// Cost of constructing HVX vector from scalar loads
|
||||
const Align RegAlign(RegWidth / 8);
|
||||
if (!Alignment || *Alignment > RegAlign)
|
||||
Alignment = RegAlign;
|
||||
assert(Alignment);
|
||||
unsigned AlignWidth = 8 * Alignment->value();
|
||||
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
||||
return 3 * NumLoads;
|
||||
}
|
||||
|
||||
// Non-HVX vectors.
|
||||
// Add extra cost for floating point types.
|
||||
unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
|
||||
: 1;
|
||||
Alignment = std::min(Alignment, 8u);
|
||||
unsigned AlignWidth = 8 * std::max(1u, Alignment);
|
||||
unsigned Cost =
|
||||
VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
|
||||
|
||||
// At this point unspecified alignment is considered as Align::None().
|
||||
const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
|
||||
unsigned AlignWidth = 8 * BoundAlignment.value();
|
||||
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
||||
if (Alignment == 4 || Alignment == 8)
|
||||
if (Alignment == Align(4) || Alignment == Align(8))
|
||||
return Cost * NumLoads;
|
||||
// Loads of less than 32 bits will need extra inserts to compose a vector.
|
||||
unsigned LogA = Log2_32(Alignment);
|
||||
assert(BoundAlignment <= Align(8));
|
||||
unsigned LogA = Log2(BoundAlignment);
|
||||
return (3 - LogA) * Cost * NumLoads;
|
||||
}
|
||||
|
||||
|
@ -214,7 +222,8 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
|
|||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
Alignment, AddressSpace,
|
||||
UseMaskForCond, UseMaskForGaps);
|
||||
return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
|
||||
return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
|
|
|
@ -112,8 +112,9 @@ public:
|
|||
unsigned ScalarizationCostPassed = UINT_MAX);
|
||||
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
|
||||
const SCEV *S);
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace,
|
||||
const Instruction *I = nullptr);
|
||||
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace);
|
||||
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
|
|
|
@ -829,8 +829,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
|||
return Cost;
|
||||
}
|
||||
|
||||
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I) {
|
||||
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
||||
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
||||
|
@ -888,7 +889,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
|||
// to be decomposed based on the alignment factor.
|
||||
|
||||
// Add the cost of each scalar load or store.
|
||||
Cost += LT.first*(SrcBytes/Alignment-1);
|
||||
assert(Alignment);
|
||||
Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
|
||||
|
||||
// For a vector type, there is also scalarization overhead (only for
|
||||
// stores, loads are expanded using the vector-load + permutation sequence,
|
||||
|
@ -919,7 +921,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
|||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
||||
|
||||
// Firstly, the cost of load/store operation.
|
||||
int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
|
||||
int Cost =
|
||||
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
|
||||
// (at least in the sense that there need only be one non-loop-invariant
|
||||
|
|
|
@ -97,7 +97,7 @@ public:
|
|||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
||||
unsigned Factor,
|
||||
|
|
|
@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
}
|
||||
if (isa<StoreInst>(&I)) {
|
||||
Type *MemAccessTy = I.getOperand(0)->getType();
|
||||
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
|
||||
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -995,7 +995,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
|
|||
}
|
||||
|
||||
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment, unsigned AddressSpace,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
assert(!Src->isVoidTy() && "Invalid type");
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ public:
|
|||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
|
||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
||||
|
|
|
@ -2404,8 +2404,9 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
|||
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
|
||||
}
|
||||
|
||||
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I) {
|
||||
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
MaybeAlign Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
// Handle non-power-of-two vectors such as <3 x float>
|
||||
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
|
||||
unsigned NumElem = VTy->getVectorNumElements();
|
||||
|
@ -2456,7 +2457,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
|||
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
|
||||
if (!SrcVTy)
|
||||
// To calculate scalar take the regular cost, without mask
|
||||
return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
|
||||
return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
unsigned NumElem = SrcVTy->getVectorNumElements();
|
||||
VectorType *MaskTy =
|
||||
|
@ -2474,7 +2475,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
|||
int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
|
||||
int MemopCost =
|
||||
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||
Alignment, AddressSpace);
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
|
||||
}
|
||||
|
||||
|
@ -3164,7 +3165,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
|||
? ST->getGatherOverhead()
|
||||
: ST->getScatterOverhead();
|
||||
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||
Alignment, AddressSpace);
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
}
|
||||
|
||||
/// Return the cost of full scalarization of gather / scatter operation.
|
||||
|
@ -3194,7 +3195,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
|
|||
|
||||
// The cost of the scalar loads/stores.
|
||||
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||
Alignment, AddressSpace);
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
int InsertExtractCost = 0;
|
||||
if (Opcode == Instruction::Load)
|
||||
|
@ -3520,8 +3521,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
|
|||
// Get the cost of one memory operation.
|
||||
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
unsigned MemOpCost =
|
||||
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
|
||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
VectorType *VT = VectorType::get(ScalarTy, VF);
|
||||
EVT ETy = TLI->getValueType(DL, VT);
|
||||
|
@ -3620,8 +3621,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
|
|||
// Get the cost of one memory operation.
|
||||
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
unsigned MemOpCost =
|
||||
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
|
||||
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
|
||||
MaybeAlign(Alignment), AddressSpace);
|
||||
|
||||
unsigned VF = VecTy->getVectorNumElements() / Factor;
|
||||
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
|
||||
|
|
|
@ -133,7 +133,7 @@ public:
|
|||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace);
|
||||
|
|
|
@ -5746,7 +5746,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
|||
// vectorized loop where the user of it is a vectorized instruction.
|
||||
const MaybeAlign Alignment = getLoadStoreAlignment(I);
|
||||
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
||||
Alignment ? Alignment->value() : 0, AS);
|
||||
Alignment, AS);
|
||||
|
||||
// Get the overhead of the extractelement and insertelement instructions
|
||||
// we might create due to scalarization.
|
||||
|
@ -5783,8 +5783,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
|
|||
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
Alignment ? Alignment->value() : 0, AS);
|
||||
else
|
||||
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
Alignment ? Alignment->value() : 0, AS, I);
|
||||
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
|
||||
|
||||
bool Reverse = ConsecutiveStride < 0;
|
||||
if (Reverse)
|
||||
|
@ -5800,16 +5799,14 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
|
|||
unsigned AS = getLoadStoreAddressSpace(I);
|
||||
if (isa<LoadInst>(I)) {
|
||||
return TTI.getAddressComputationCost(ValTy) +
|
||||
TTI.getMemoryOpCost(Instruction::Load, ValTy,
|
||||
Alignment ? Alignment->value() : 0, AS) +
|
||||
TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
|
||||
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
|
||||
}
|
||||
StoreInst *SI = cast<StoreInst>(I);
|
||||
|
||||
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
|
||||
return TTI.getAddressComputationCost(ValTy) +
|
||||
TTI.getMemoryOpCost(Instruction::Store, ValTy,
|
||||
Alignment ? Alignment->value() : 0, AS) +
|
||||
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
|
||||
(isLoopInvariantStoreValue
|
||||
? 0
|
||||
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
|
||||
|
@ -5877,8 +5874,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
|
|||
unsigned AS = getLoadStoreAddressSpace(I);
|
||||
|
||||
return TTI.getAddressComputationCost(ValTy) +
|
||||
TTI.getMemoryOpCost(I->getOpcode(), ValTy,
|
||||
Alignment ? Alignment->value() : 0, AS, I);
|
||||
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
|
||||
}
|
||||
return getWideningCost(I, VF);
|
||||
}
|
||||
|
|
|
@ -3162,7 +3162,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::Load: {
|
||||
// Cost of wide load - cost of scalar loads.
|
||||
unsigned alignment = cast<LoadInst>(VL0)->getAlignment();
|
||||
MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
|
||||
int ScalarEltCost =
|
||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
|
@ -3180,7 +3180,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::Store: {
|
||||
// We know that we can merge the stores. Calculate the cost.
|
||||
unsigned alignment = cast<StoreInst>(VL0)->getAlignment();
|
||||
MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment());
|
||||
int ScalarEltCost =
|
||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
|
|
Loading…
Reference in New Issue