[Alignment][NFC] getMemoryOpCost uses MaybeAlign

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: nemanjai, hiraditya, kbarton, MaskRay, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69307
This commit is contained in:
Guillaume Chatelet 2019-10-22 17:16:52 +02:00
parent 3c7c371793
commit a4783ef58d
18 changed files with 80 additions and 66 deletions

View File

@ -930,8 +930,9 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@ -1305,7 +1306,7 @@ public:
Type *CondTy, const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) = 0;
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
@ -1711,7 +1712,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
}

View File

@ -447,7 +447,7 @@ public:
return 1;
}
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) {
return 1;
}

View File

@ -869,8 +869,9 @@ public:
return LT.first;
}
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr) {
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type");
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
@ -921,8 +922,8 @@ public:
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
Opcode, VecTy, Alignment, AddressSpace);
else
Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
AddressSpace);
Cost = static_cast<T *>(this)->getMemoryOpCost(
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// Legalize the vector type, and get the legalized and unlegalized type
// sizes.

View File

@ -639,7 +639,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
}
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I) const {
assert ((I == nullptr || I->getOpcode() == Opcode) &&
@ -1201,13 +1201,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
const StoreInst *SI = cast<StoreInst>(I);
Type *ValTy = SI->getValueOperand()->getType();
return getMemoryOpCost(I->getOpcode(), ValTy,
SI->getAlignment(),
MaybeAlign(SI->getAlignment()),
SI->getPointerAddressSpace(), I);
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(I);
return getMemoryOpCost(I->getOpcode(), I->getType(),
LI->getAlignment(),
MaybeAlign(LI->getAlignment()),
LI->getPointerAddressSpace(), I);
}
case Instruction::ZExt:

View File

@ -632,12 +632,12 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
unsigned Alignment, unsigned AddressSpace,
MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
LT.second.is128BitVector() && Alignment < 16) {
LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
// Unaligned stores are extremely inefficient. We don't split all
// unaligned 128-bit stores because the negative impact that has shown in
// practice on inlined block copy code.
@ -703,8 +703,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
if (!I->isVectorTy())
continue;
if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
getMemoryOpCost(Instruction::Load, I, 128, 0);
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
getMemoryOpCost(Instruction::Load, I, Align(128), 0);
}
return Cost;
}

View File

@ -134,7 +134,7 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);

View File

@ -735,11 +735,13 @@ int ARMTTIImpl::getArithmeticInstrCost(
return BaseCost;
}
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I) {
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 &&
if (ST->hasNEON() && Src->isVectorTy() &&
(Alignment && *Alignment != Align(16)) &&
Src->getVectorElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.

View File

@ -189,7 +189,7 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,

View File

@ -152,7 +152,9 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
}
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I) {
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
if (Opcode == Instruction::Store)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
@ -166,24 +168,30 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Cost of HVX loads.
if (VecWidth % RegWidth == 0)
return VecWidth / RegWidth;
// Cost of constructing HVX vector from scalar loads.
Alignment = std::min(Alignment, RegWidth / 8);
unsigned AlignWidth = 8 * std::max(1u, Alignment);
// Cost of constructing HVX vector from scalar loads
const Align RegAlign(RegWidth / 8);
if (!Alignment || *Alignment > RegAlign)
Alignment = RegAlign;
assert(Alignment);
unsigned AlignWidth = 8 * Alignment->value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
return 3 * NumLoads;
}
// Non-HVX vectors.
// Add extra cost for floating point types.
unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
: 1;
Alignment = std::min(Alignment, 8u);
unsigned AlignWidth = 8 * std::max(1u, Alignment);
unsigned Cost =
VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
// At this point unspecified alignment is considered as Align::None().
const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
unsigned AlignWidth = 8 * BoundAlignment.value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
if (Alignment == 4 || Alignment == 8)
if (Alignment == Align(4) || Alignment == Align(8))
return Cost * NumLoads;
// Loads of less than 32 bits will need extra inserts to compose a vector.
unsigned LogA = Log2_32(Alignment);
assert(BoundAlignment <= Align(8));
unsigned LogA = Log2(BoundAlignment);
return (3 - LogA) * Cost * NumLoads;
}
@ -214,7 +222,8 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace,
UseMaskForCond, UseMaskForGaps);
return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
nullptr);
}
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

View File

@ -112,8 +112,9 @@ public:
unsigned ScalarizationCostPassed = UINT_MAX);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I = nullptr);
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

View File

@ -829,8 +829,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I) {
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@ -888,7 +889,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
// to be decomposed based on the alignment factor.
// Add the cost of each scalar load or store.
Cost += LT.first*(SrcBytes/Alignment-1);
assert(Alignment);
Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
// For a vector type, there is also scalarization overhead (only for
// stores, loads are expanded using the vector-load + permutation sequence,
@ -919,7 +921,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
// Firstly, the cost of load/store operation.
int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
int Cost =
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant

View File

@ -97,7 +97,7 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,

View File

@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
if (isa<StoreInst>(&I)) {
Type *MemAccessTy = I.getOperand(0)->getType();
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
}
}
@ -995,7 +995,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
}
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace,
MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");

View File

@ -87,7 +87,7 @@ public:
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,

View File

@ -2404,8 +2404,9 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I) {
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
@ -2456,7 +2457,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
@ -2474,7 +2475,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
MaybeAlign(Alignment), AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}
@ -3164,7 +3165,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? ST->getGatherOverhead()
: ST->getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
MaybeAlign(Alignment), AddressSpace);
}
/// Return the cost of full scalarization of gather / scatter operation.
@ -3194,7 +3195,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
// The cost of the scalar loads/stores.
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
MaybeAlign(Alignment), AddressSpace);
int InsertExtractCost = 0;
if (Opcode == Instruction::Load)
@ -3520,8 +3521,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
// Get the cost of one memory operation.
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
LegalVT.getVectorNumElements());
unsigned MemOpCost =
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace);
VectorType *VT = VectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
@ -3620,8 +3621,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
// Get the cost of one memory operation.
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
LegalVT.getVectorNumElements());
unsigned MemOpCost =
getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace);
unsigned VF = VecTy->getVectorNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);

View File

@ -133,7 +133,7 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);

View File

@ -5746,7 +5746,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// vectorized loop where the user of it is a vectorized instruction.
const MaybeAlign Alignment = getLoadStoreAlignment(I);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment ? Alignment->value() : 0, AS);
Alignment, AS);
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
@ -5783,8 +5783,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
Alignment ? Alignment->value() : 0, AS);
else
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
Alignment ? Alignment->value() : 0, AS, I);
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
bool Reverse = ConsecutiveStride < 0;
if (Reverse)
@ -5800,16 +5799,14 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
if (isa<LoadInst>(I)) {
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Load, ValTy,
Alignment ? Alignment->value() : 0, AS) +
TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
}
StoreInst *SI = cast<StoreInst>(I);
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy,
Alignment ? Alignment->value() : 0, AS) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
(isLoopInvariantStoreValue
? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
@ -5877,8 +5874,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy,
Alignment ? Alignment->value() : 0, AS, I);
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
}
return getWideningCost(I, VF);
}

View File

@ -3162,7 +3162,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
unsigned alignment = cast<LoadInst>(VL0)->getAlignment();
MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
if (NeedToShuffleReuses) {
@ -3180,7 +3180,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
unsigned alignment = cast<StoreInst>(VL0)->getAlignment();
MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment());
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
if (NeedToShuffleReuses) {