[Alignment][NFC] getMemoryOpCost uses MaybeAlign

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: nemanjai, hiraditya, kbarton, MaskRay, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69307
This commit is contained in:
Guillaume Chatelet 2019-10-22 17:16:52 +02:00
parent 3c7c371793
commit a4783ef58d
18 changed files with 80 additions and 66 deletions

View File

@ -930,8 +930,9 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions. /// \return The cost of Load and Store instructions.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr) const; unsigned AddressSpace,
const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions. /// \return The cost of masked Load and Store instructions.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@ -1305,7 +1306,7 @@ public:
Type *CondTy, const Instruction *I) = 0; Type *CondTy, const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val, virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0; unsigned Index) = 0;
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) = 0; unsigned AddressSpace, const Instruction *I) = 0;
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned Alignment,
@ -1711,7 +1712,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index); return Impl.getVectorInstrCost(Opcode, Val, Index);
} }
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) override { unsigned AddressSpace, const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
} }

View File

@ -447,7 +447,7 @@ public:
return 1; return 1;
} }
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) { unsigned AddressSpace, const Instruction *I) {
return 1; return 1;
} }

View File

@ -869,8 +869,9 @@ public:
return LT.first; return LT.first;
} }
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr) { unsigned AddressSpace,
const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type"); assert(!Src->isVoidTy() && "Invalid type");
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
@ -921,8 +922,8 @@ public:
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
Opcode, VecTy, Alignment, AddressSpace); Opcode, VecTy, Alignment, AddressSpace);
else else
Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, Cost = static_cast<T *>(this)->getMemoryOpCost(
AddressSpace); Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// Legalize the vector type, and get the legalized and unlegalized type // Legalize the vector type, and get the legalized and unlegalized type
// sizes. // sizes.

View File

@ -639,7 +639,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
} }
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, MaybeAlign Alignment,
unsigned AddressSpace, unsigned AddressSpace,
const Instruction *I) const { const Instruction *I) const {
assert ((I == nullptr || I->getOpcode() == Opcode) && assert ((I == nullptr || I->getOpcode() == Opcode) &&
@ -1201,13 +1201,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
const StoreInst *SI = cast<StoreInst>(I); const StoreInst *SI = cast<StoreInst>(I);
Type *ValTy = SI->getValueOperand()->getType(); Type *ValTy = SI->getValueOperand()->getType();
return getMemoryOpCost(I->getOpcode(), ValTy, return getMemoryOpCost(I->getOpcode(), ValTy,
SI->getAlignment(), MaybeAlign(SI->getAlignment()),
SI->getPointerAddressSpace(), I); SI->getPointerAddressSpace(), I);
} }
case Instruction::Load: { case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(I); const LoadInst *LI = cast<LoadInst>(I);
return getMemoryOpCost(I->getOpcode(), I->getType(), return getMemoryOpCost(I->getOpcode(), I->getType(),
LI->getAlignment(), MaybeAlign(LI->getAlignment()),
LI->getPointerAddressSpace(), I); LI->getPointerAddressSpace(), I);
} }
case Instruction::ZExt: case Instruction::ZExt:

View File

@ -632,12 +632,12 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
} }
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
unsigned Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) { const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty); auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
LT.second.is128BitVector() && Alignment < 16) { LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
// Unaligned stores are extremely inefficient. We don't split all // Unaligned stores are extremely inefficient. We don't split all
// unaligned 128-bit stores because the negative impact that has shown in // unaligned 128-bit stores because the negative impact that has shown in
// practice on inlined block copy code. // practice on inlined block copy code.
@ -703,8 +703,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
if (!I->isVectorTy()) if (!I->isVectorTy())
continue; continue;
if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128) if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) + Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
getMemoryOpCost(Instruction::Load, I, 128, 0); getMemoryOpCost(Instruction::Load, I, Align(128), 0);
} }
return Cost; return Cost;
} }

View File

@ -134,7 +134,7 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const; bool IsZeroCmp) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace, const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);

View File

@ -735,11 +735,13 @@ int ARMTTIImpl::getArithmeticInstrCost(
return BaseCost; return BaseCost;
} }
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned AddressSpace, const Instruction *I) { MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 && if (ST->hasNEON() && Src->isVectorTy() &&
(Alignment && *Alignment != Align(16)) &&
Src->getVectorElementType()->isDoubleTy()) { Src->getVectorElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient. // Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.

View File

@ -189,7 +189,7 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>()); ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,

View File

@ -152,7 +152,9 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
} }
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace, const Instruction *I) { MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I) {
assert(Opcode == Instruction::Load || Opcode == Instruction::Store); assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
if (Opcode == Instruction::Store) if (Opcode == Instruction::Store)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
@ -166,24 +168,30 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Cost of HVX loads. // Cost of HVX loads.
if (VecWidth % RegWidth == 0) if (VecWidth % RegWidth == 0)
return VecWidth / RegWidth; return VecWidth / RegWidth;
// Cost of constructing HVX vector from scalar loads. // Cost of constructing HVX vector from scalar loads
Alignment = std::min(Alignment, RegWidth / 8); const Align RegAlign(RegWidth / 8);
unsigned AlignWidth = 8 * std::max(1u, Alignment); if (!Alignment || *Alignment > RegAlign)
Alignment = RegAlign;
assert(Alignment);
unsigned AlignWidth = 8 * Alignment->value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
return 3 * NumLoads; return 3 * NumLoads;
} }
// Non-HVX vectors. // Non-HVX vectors.
// Add extra cost for floating point types. // Add extra cost for floating point types.
unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor unsigned Cost =
: 1; VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
Alignment = std::min(Alignment, 8u);
unsigned AlignWidth = 8 * std::max(1u, Alignment); // At this point unspecified alignment is considered as Align::None().
const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
unsigned AlignWidth = 8 * BoundAlignment.value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
if (Alignment == 4 || Alignment == 8) if (Alignment == Align(4) || Alignment == Align(8))
return Cost * NumLoads; return Cost * NumLoads;
// Loads of less than 32 bits will need extra inserts to compose a vector. // Loads of less than 32 bits will need extra inserts to compose a vector.
unsigned LogA = Log2_32(Alignment); assert(BoundAlignment <= Align(8));
unsigned LogA = Log2(BoundAlignment);
return (3 - LogA) * Cost * NumLoads; return (3 - LogA) * Cost * NumLoads;
} }
@ -214,7 +222,8 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, Alignment, AddressSpace,
UseMaskForCond, UseMaskForGaps); UseMaskForCond, UseMaskForGaps);
return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr); return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
nullptr);
} }
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

View File

@ -112,8 +112,9 @@ public:
unsigned ScalarizationCostPassed = UINT_MAX); unsigned ScalarizationCostPassed = UINT_MAX);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S); const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace,
const Instruction *I = nullptr);
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace); unsigned AddressSpace);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

View File

@ -829,8 +829,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost; return Cost;
} }
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned AddressSpace, const Instruction *I) { MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
// Legalize the type. // Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@ -888,7 +889,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
// to be decomposed based on the alignment factor. // to be decomposed based on the alignment factor.
// Add the cost of each scalar load or store. // Add the cost of each scalar load or store.
Cost += LT.first*(SrcBytes/Alignment-1); assert(Alignment);
Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
// For a vector type, there is also scalarization overhead (only for // For a vector type, there is also scalarization overhead (only for
// stores, loads are expanded using the vector-load + permutation sequence, // stores, loads are expanded using the vector-load + permutation sequence,
@ -919,7 +921,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
// Firstly, the cost of load/store operation. // Firstly, the cost of load/store operation.
int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace); int Cost =
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant // (at least in the sense that there need only be one non-loop-invariant

View File

@ -97,7 +97,7 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, unsigned Factor,

View File

@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
} }
if (isa<StoreInst>(&I)) { if (isa<StoreInst>(&I)) {
Type *MemAccessTy = I.getOperand(0)->getType(); Type *MemAccessTy = I.getOperand(0)->getType();
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0); NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
} }
} }
@ -995,7 +995,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
} }
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace, MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) { const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type"); assert(!Src->isVoidTy() && "Invalid type");

View File

@ -87,7 +87,7 @@ public:
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,

View File

@ -2404,8 +2404,9 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
} }
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned AddressSpace, const Instruction *I) { MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float> // Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) { if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements(); unsigned NumElem = VTy->getVectorNumElements();
@ -2456,7 +2457,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy); VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy) if (!SrcVTy)
// To calculate scalar take the regular cost, without mask // To calculate scalar take the regular cost, without mask
return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace); return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
unsigned NumElem = SrcVTy->getVectorNumElements(); unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy = VectorType *MaskTy =
@ -2474,7 +2475,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore); int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
int MemopCost = int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace); MaybeAlign(Alignment), AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
} }
@ -3164,7 +3165,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? ST->getGatherOverhead() ? ST->getGatherOverhead()
: ST->getScatterOverhead(); : ST->getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace); MaybeAlign(Alignment), AddressSpace);
} }
/// Return the cost of full scalarization of gather / scatter operation. /// Return the cost of full scalarization of gather / scatter operation.
@ -3194,7 +3195,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
// The cost of the scalar loads/stores. // The cost of the scalar loads/stores.
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace); MaybeAlign(Alignment), AddressSpace);
int InsertExtractCost = 0; int InsertExtractCost = 0;
if (Opcode == Instruction::Load) if (Opcode == Instruction::Load)
@ -3520,8 +3521,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
// Get the cost of one memory operation. // Get the cost of one memory operation.
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
LegalVT.getVectorNumElements()); LegalVT.getVectorNumElements());
unsigned MemOpCost = unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); MaybeAlign(Alignment), AddressSpace);
VectorType *VT = VectorType::get(ScalarTy, VF); VectorType *VT = VectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT); EVT ETy = TLI->getValueType(DL, VT);
@ -3620,8 +3621,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
// Get the cost of one memory operation. // Get the cost of one memory operation.
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
LegalVT.getVectorNumElements()); LegalVT.getVectorNumElements());
unsigned MemOpCost = unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); MaybeAlign(Alignment), AddressSpace);
unsigned VF = VecTy->getVectorNumElements() / Factor; unsigned VF = VecTy->getVectorNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);

View File

@ -133,7 +133,7 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr); const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr); unsigned AddressSpace, const Instruction *I = nullptr);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace); unsigned AddressSpace);

View File

@ -5746,7 +5746,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// vectorized loop where the user of it is a vectorized instruction. // vectorized loop where the user of it is a vectorized instruction.
const MaybeAlign Alignment = getLoadStoreAlignment(I); const MaybeAlign Alignment = getLoadStoreAlignment(I);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment ? Alignment->value() : 0, AS); Alignment, AS);
// Get the overhead of the extractelement and insertelement instructions // Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization. // we might create due to scalarization.
@ -5783,8 +5783,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
Alignment ? Alignment->value() : 0, AS); Alignment ? Alignment->value() : 0, AS);
else else
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
Alignment ? Alignment->value() : 0, AS, I);
bool Reverse = ConsecutiveStride < 0; bool Reverse = ConsecutiveStride < 0;
if (Reverse) if (Reverse)
@ -5800,16 +5799,14 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I); unsigned AS = getLoadStoreAddressSpace(I);
if (isa<LoadInst>(I)) { if (isa<LoadInst>(I)) {
return TTI.getAddressComputationCost(ValTy) + return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Load, ValTy, TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
Alignment ? Alignment->value() : 0, AS) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
} }
StoreInst *SI = cast<StoreInst>(I); StoreInst *SI = cast<StoreInst>(I);
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) + return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
Alignment ? Alignment->value() : 0, AS) +
(isLoopInvariantStoreValue (isLoopInvariantStoreValue
? 0 ? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
@ -5877,8 +5874,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I); unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) + return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy, TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
Alignment ? Alignment->value() : 0, AS, I);
} }
return getWideningCost(I, VF); return getWideningCost(I, VF);
} }

View File

@ -3162,7 +3162,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
} }
case Instruction::Load: { case Instruction::Load: {
// Cost of wide load - cost of scalar loads. // Cost of wide load - cost of scalar loads.
unsigned alignment = cast<LoadInst>(VL0)->getAlignment(); MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
int ScalarEltCost = int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {
@ -3180,7 +3180,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
} }
case Instruction::Store: { case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost. // We know that we can merge the stores. Calculate the cost.
unsigned alignment = cast<StoreInst>(VL0)->getAlignment(); MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment());
int ScalarEltCost = int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0); TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
if (NeedToShuffleReuses) { if (NeedToShuffleReuses) {