forked from OSchip/llvm-project
Loop Vectorizer: Refactor code to compute vectorized memory instruction cost
Introduce a helper class that computes the cost of memory access instructions. No functionality change intended. llvm-svn: 174422
This commit is contained in:
parent
61f39ce8b8
commit
3be40b56c5
|
@ -560,6 +560,11 @@ public:
|
|||
/// \return information about the register usage of the loop.
|
||||
RegisterUsage calculateRegisterUsage();
|
||||
|
||||
/// A helper function for converting Scalar types to vector types.
|
||||
/// If the incoming type is void, we return void. If the VF is 1, we return
|
||||
/// the scalar type.
|
||||
static Type* ToVectorTy(Type *Scalar, unsigned VF);
|
||||
|
||||
private:
|
||||
/// Returns the expected execution cost. The unit of the cost does
|
||||
/// not matter because we use the 'cost' units to compare different
|
||||
|
@ -571,11 +576,6 @@ private:
|
|||
/// width. Vector width of one means scalar.
|
||||
unsigned getInstructionCost(Instruction *I, unsigned VF);
|
||||
|
||||
/// A helper function for converting Scalar types to vector types.
|
||||
/// If the incoming type is void, we return void. If the VF is 1, we return
|
||||
/// the scalar type.
|
||||
static Type* ToVectorTy(Type *Scalar, unsigned VF);
|
||||
|
||||
/// Returns whether the instruction is a load or store and will be a emitted
|
||||
/// as a vector operation.
|
||||
bool isConsecutiveLoadOrStore(Instruction *I);
|
||||
|
@ -594,6 +594,177 @@ private:
|
|||
DataLayout *DL;
|
||||
};
|
||||
|
||||
/// A helper class to compute the cost of a memory operation (load or store).
|
||||
class MemoryCostComputation {
|
||||
public:
|
||||
/// \brief This function computes the cost of a memory instruction, either of
|
||||
/// a load or of a store.
|
||||
/// \param Inst a pointer to a LoadInst or a StoreInst.
|
||||
/// \param VF the vector factor to use.
|
||||
/// \param TTI the target transform information used to obtain costs.
|
||||
/// \param Legality the legality class used by this function to obtain the
|
||||
/// access strid of the memory operation.
|
||||
/// \returns the estimated cost of the memory instruction.
|
||||
static unsigned computeCost(Value *Inst, unsigned VF,
|
||||
const TargetTransformInfo &TTI,
|
||||
LoopVectorizationLegality *Legality) {
|
||||
if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
|
||||
return StoreCost(Store, VF, TTI, Legality).cost();
|
||||
|
||||
return LoadCost(cast<LoadInst>(Inst), VF, TTI, Legality).cost();
|
||||
}
|
||||
|
||||
private:
|
||||
/// An helper class to compute the cost of vectorize memory instruction. It is
|
||||
/// subclassed by load and store cost computation classes who fill the fields
|
||||
/// with values that require knowing about the concrete Load/StoreInst class.
|
||||
class MemoryOpCost {
|
||||
public:
|
||||
/// \return the cost of vectorizing the memory access instruction.
|
||||
unsigned cost() {
|
||||
if (VectorFactor == 1)
|
||||
return TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, AddressSpace);
|
||||
|
||||
if ((Stride = Legality->isConsecutivePtr(PointerOperand)))
|
||||
return costOfWideMemInst();
|
||||
|
||||
return costOfScalarizedMemInst();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// The pointer operand of the memory instruction.
|
||||
Value *PointerOperand;
|
||||
/// The scalar type of the memory access.
|
||||
Type *ScalarTy;
|
||||
/// The vector type of the memory access.
|
||||
Type *VectorTy;
|
||||
/// The vector factor by which we vectorize.
|
||||
unsigned VectorFactor;
|
||||
/// The stride of the memory access.
|
||||
int Stride;
|
||||
/// The alignment of the memory operation.
|
||||
unsigned Alignment;
|
||||
/// The address space of the memory operation.
|
||||
unsigned AddressSpace;
|
||||
/// The opcode of the memory instruction.
|
||||
unsigned Opcode;
|
||||
/// Are we looking at a load or store instruction.
|
||||
bool IsLoadInst;
|
||||
const TargetTransformInfo &TTI;
|
||||
LoopVectorizationLegality *Legality;
|
||||
|
||||
/// Constructs a helper class to compute the cost of a memory instruction.
|
||||
/// \param VF the vector factor (the length of the vector).
|
||||
/// \param TI the target transform information used by this class to obtain
|
||||
/// costs.
|
||||
/// \param L the legality class used by this class to obtain the access
|
||||
/// stride of the memory operation.
|
||||
MemoryOpCost(unsigned VF, const TargetTransformInfo &TI,
|
||||
LoopVectorizationLegality *L) :
|
||||
VectorFactor(VF), TTI(TI), Legality(L) {
|
||||
}
|
||||
|
||||
private:
|
||||
/// \return the cost if the memory instruction is scalarized.
|
||||
unsigned costOfScalarizedMemInst() {
|
||||
unsigned Cost = 0;
|
||||
Cost += costOfExtractFromPointerVector();
|
||||
Cost += costOfExtractFromValueVector();
|
||||
Cost += VectorFactor * TTI.getMemoryOpCost(Opcode, ScalarTy, Alignment,
|
||||
AddressSpace);
|
||||
Cost += costOfInsertIntoValueVector();
|
||||
return Cost;
|
||||
}
|
||||
|
||||
/// \return the cost of extracting the pointers out of the pointer vector.
|
||||
unsigned costOfExtractFromPointerVector() {
|
||||
Type *PtrTy = getVectorizedPointerOperandType();
|
||||
return costOfVectorInstForAllElems(Instruction::ExtractElement, PtrTy);
|
||||
}
|
||||
|
||||
/// \return the cost for extracting values out of the value vector if the
|
||||
/// memory instruction is a store and zero otherwise.
|
||||
unsigned costOfExtractFromValueVector() {
|
||||
if (IsLoadInst)
|
||||
return 0;
|
||||
|
||||
return costOfVectorInstForAllElems(Instruction::ExtractElement, VectorTy);
|
||||
}
|
||||
|
||||
/// \return the cost of insert values into the value vector if the memory
|
||||
/// instruction was a load and zero otherwise.
|
||||
unsigned costOfInsertIntoValueVector() {
|
||||
if (!IsLoadInst)
|
||||
return 0;
|
||||
|
||||
return costOfVectorInstForAllElems(Instruction::InsertElement, VectorTy);
|
||||
}
|
||||
|
||||
/// \return the cost of a vector memory instruction.
|
||||
unsigned costOfWideMemInst() {
|
||||
unsigned Cost = TTI.getMemoryOpCost(Opcode, VectorTy, Alignment,
|
||||
AddressSpace);
|
||||
// Reverse stride.
|
||||
if (Stride < 0)
|
||||
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,
|
||||
0);
|
||||
return Cost;
|
||||
}
|
||||
|
||||
/// Helper function to compute the cost of one insert- or extractelement
|
||||
/// instruction per vector element.
|
||||
/// \param VecOpcode the vector instruction opcode (Can be either
|
||||
/// InsertElement or an ExtractElement).
|
||||
/// \param Ty the vector type the vector instruction operates on.
|
||||
/// \return the cost of an vector instruction applied to each vector
|
||||
/// element.
|
||||
unsigned costOfVectorInstForAllElems(unsigned VecOpcode, Type *Ty) {
|
||||
unsigned Cost = 0;
|
||||
for (unsigned i = 0; i < VectorFactor; ++i)
|
||||
Cost += TTI.getVectorInstrCost(VecOpcode, Ty, i);
|
||||
return Cost;
|
||||
}
|
||||
|
||||
/// \return a vectorized type for the pointer operand.
|
||||
Type * getVectorizedPointerOperandType() {
|
||||
Type *PointerOpTy = PointerOperand->getType();
|
||||
return LoopVectorizationCostModel::ToVectorTy(PointerOpTy, VectorFactor);
|
||||
}
|
||||
};
|
||||
|
||||
/// Implementation of the abstract memory cost base class. Sets field of base
|
||||
/// class whose value depends on the LoadInst.
|
||||
class LoadCost : public MemoryOpCost {
|
||||
public:
|
||||
LoadCost(LoadInst *Load, unsigned VF, const TargetTransformInfo &TI,
|
||||
LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) {
|
||||
PointerOperand = Load->getPointerOperand();
|
||||
ScalarTy = Load->getType();
|
||||
VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF);
|
||||
Alignment = Load->getAlignment();
|
||||
AddressSpace = Load->getPointerAddressSpace();
|
||||
Opcode = Load->getOpcode();
|
||||
IsLoadInst = true;
|
||||
}
|
||||
};
|
||||
|
||||
/// Implementation of the abstract memory cost base class. Sets field of base
|
||||
/// class whose value depends on the StoreInst.
|
||||
class StoreCost : public MemoryOpCost {
|
||||
public:
|
||||
StoreCost(StoreInst *Store, unsigned VF, const TargetTransformInfo &TI,
|
||||
LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) {
|
||||
PointerOperand = Store->getPointerOperand();
|
||||
ScalarTy = Store->getValueOperand()->getType();
|
||||
VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF);
|
||||
Alignment = Store->getAlignment();
|
||||
AddressSpace = Store->getPointerAddressSpace();
|
||||
Opcode = Store->getOpcode();
|
||||
IsLoadInst = false;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
/// The LoopVectorize Pass.
|
||||
struct LoopVectorize : public LoopPass {
|
||||
/// Pass identification, replacement for typeid
|
||||
|
@ -3097,83 +3268,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
|||
VectorTy = ToVectorTy(ValTy, VF);
|
||||
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
|
||||
}
|
||||
case Instruction::Load:
|
||||
case Instruction::Store: {
|
||||
StoreInst *SI = cast<StoreInst>(I);
|
||||
Type *ValTy = SI->getValueOperand()->getType();
|
||||
VectorTy = ToVectorTy(ValTy, VF);
|
||||
|
||||
if (VF == 1)
|
||||
return TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
SI->getAlignment(),
|
||||
SI->getPointerAddressSpace());
|
||||
|
||||
// Scalarized stores.
|
||||
int Stride = Legal->isConsecutivePtr(SI->getPointerOperand());
|
||||
bool Reverse = Stride < 0;
|
||||
if (0 == Stride) {
|
||||
unsigned Cost = 0;
|
||||
|
||||
// The cost of extracting from the value vector and pointer vector.
|
||||
Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
|
||||
for (unsigned i = 0; i < VF; ++i) {
|
||||
Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
|
||||
i);
|
||||
Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
|
||||
}
|
||||
|
||||
// The cost of the scalar stores.
|
||||
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
|
||||
SI->getAlignment(),
|
||||
SI->getPointerAddressSpace());
|
||||
return Cost;
|
||||
}
|
||||
|
||||
// Wide stores.
|
||||
unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
SI->getAlignment(),
|
||||
SI->getPointerAddressSpace());
|
||||
if (Reverse)
|
||||
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
|
||||
VectorTy, 0);
|
||||
return Cost;
|
||||
return MemoryCostComputation::computeCost(I, VF, TTI, Legal);
|
||||
}
|
||||
case Instruction::Load: {
|
||||
LoadInst *LI = cast<LoadInst>(I);
|
||||
|
||||
if (VF == 1)
|
||||
return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
|
||||
LI->getPointerAddressSpace());
|
||||
|
||||
// Scalarized loads.
|
||||
int Stride = Legal->isConsecutivePtr(LI->getPointerOperand());
|
||||
bool Reverse = Stride < 0;
|
||||
if (0 == Stride) {
|
||||
unsigned Cost = 0;
|
||||
Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
|
||||
|
||||
// The cost of extracting from the pointer vector.
|
||||
for (unsigned i = 0; i < VF; ++i)
|
||||
Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
|
||||
|
||||
// The cost of inserting data to the result vector.
|
||||
for (unsigned i = 0; i < VF; ++i)
|
||||
Cost += TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy, i);
|
||||
|
||||
// The cost of the scalar stores.
|
||||
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), RetTy->getScalarType(),
|
||||
LI->getAlignment(),
|
||||
LI->getPointerAddressSpace());
|
||||
return Cost;
|
||||
}
|
||||
|
||||
// Wide loads.
|
||||
unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
LI->getAlignment(),
|
||||
LI->getPointerAddressSpace());
|
||||
if (Reverse)
|
||||
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
|
||||
return Cost;
|
||||
}
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
|
|
Loading…
Reference in New Issue