forked from OSchip/llvm-project
LoopVectorize: Teach the cost model to query scalar costs as scalar types and not vectors of 1.
llvm-svn: 166715
This commit is contained in:
parent
f0e8720054
commit
579042f71b
|
@ -324,6 +324,11 @@ private:
|
||||||
/// width. Vector width of one means scalar.
|
/// width. Vector width of one means scalar.
|
||||||
unsigned getInstructionCost(Instruction *I, unsigned VF);
|
unsigned getInstructionCost(Instruction *I, unsigned VF);
|
||||||
|
|
||||||
|
/// A helper function for converting Scalar types to vector types.
|
||||||
|
/// If the incoming type is void, we return void. If the VF is 1, we return
|
||||||
|
/// the scalar type.
|
||||||
|
static Type* ToVectorTy(Type *Scalar, unsigned VF);
|
||||||
|
|
||||||
/// The loop that we evaluate.
|
/// The loop that we evaluate.
|
||||||
Loop *TheLoop;
|
Loop *TheLoop;
|
||||||
/// Scev analysis.
|
/// Scev analysis.
|
||||||
|
@ -1478,8 +1483,16 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
|
||||||
unsigned
|
unsigned
|
||||||
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||||
assert(VTTI && "Invalid vector target transformation info");
|
assert(VTTI && "Invalid vector target transformation info");
|
||||||
|
|
||||||
|
Type *RetTy = I->getType();
|
||||||
|
Type *VectorTy = ToVectorTy(RetTy, VF);
|
||||||
|
|
||||||
|
// TODO: We need to estimate the cost of intrinsic calls.
|
||||||
switch (I->getOpcode()) {
|
switch (I->getOpcode()) {
|
||||||
case Instruction::GetElementPtr:
|
case Instruction::GetElementPtr:
|
||||||
|
// We mark this instruction as zero-cost because scalar GEPs are usually
|
||||||
|
// lowered to the intruction addressing mode. At the moment we don't
|
||||||
|
// generate vector geps.
|
||||||
return 0;
|
return 0;
|
||||||
case Instruction::Br: {
|
case Instruction::Br: {
|
||||||
return VTTI->getInstrCost(I->getOpcode());
|
return VTTI->getInstrCost(I->getOpcode());
|
||||||
|
@ -1504,74 +1517,76 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||||
case Instruction::And:
|
case Instruction::And:
|
||||||
case Instruction::Or:
|
case Instruction::Or:
|
||||||
case Instruction::Xor: {
|
case Instruction::Xor: {
|
||||||
Type *VTy = VectorType::get(I->getType(), VF);
|
return VTTI->getInstrCost(I->getOpcode(), VectorTy);
|
||||||
return VTTI->getInstrCost(I->getOpcode(), VTy);
|
|
||||||
}
|
}
|
||||||
case Instruction::Select: {
|
case Instruction::Select: {
|
||||||
SelectInst *SI = cast<SelectInst>(I);
|
SelectInst *SI = cast<SelectInst>(I);
|
||||||
Type *VTy = VectorType::get(I->getType(), VF);
|
|
||||||
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
|
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
|
||||||
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
|
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
|
||||||
Type *CondTy = SI->getCondition()->getType();
|
Type *CondTy = SI->getCondition()->getType();
|
||||||
if (ScalarCond)
|
if (ScalarCond)
|
||||||
CondTy = VectorType::get(CondTy, VF);
|
CondTy = VectorType::get(CondTy, VF);
|
||||||
|
|
||||||
return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
|
return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
|
||||||
}
|
}
|
||||||
case Instruction::ICmp:
|
case Instruction::ICmp:
|
||||||
case Instruction::FCmp: {
|
case Instruction::FCmp: {
|
||||||
Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
|
Type *ValTy = I->getOperand(0)->getType();
|
||||||
return VTTI->getInstrCost(I->getOpcode(), VTy);
|
VectorTy = ToVectorTy(ValTy, VF);
|
||||||
|
return VTTI->getInstrCost(I->getOpcode(), VectorTy);
|
||||||
}
|
}
|
||||||
case Instruction::Store: {
|
case Instruction::Store: {
|
||||||
StoreInst *SI = cast<StoreInst>(I);
|
StoreInst *SI = cast<StoreInst>(I);
|
||||||
Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF);
|
Type *ValTy = SI->getValueOperand()->getType();
|
||||||
|
VectorTy = ToVectorTy(ValTy, VF);
|
||||||
|
|
||||||
|
if (VF == 1)
|
||||||
|
return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
|
||||||
|
SI->getAlignment(), SI->getPointerAddressSpace());
|
||||||
|
|
||||||
// Scalarized stores.
|
// Scalarized stores.
|
||||||
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
|
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
|
||||||
unsigned Cost = 0;
|
unsigned Cost = 0;
|
||||||
if (VF != 1) {
|
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
||||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
ValTy);
|
||||||
VTy);
|
// The cost of extracting from the value vector.
|
||||||
// The cost of extracting from the value vector and pointer vector.
|
Cost += VF * (ExtCost);
|
||||||
Cost += VF * (ExtCost * 2);
|
|
||||||
}
|
|
||||||
// The cost of the scalar stores.
|
// The cost of the scalar stores.
|
||||||
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
|
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
|
||||||
VTy->getScalarType(),
|
ValTy->getScalarType(),
|
||||||
SI->getAlignment(),
|
SI->getAlignment(),
|
||||||
SI->getPointerAddressSpace());
|
SI->getPointerAddressSpace());
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wide stores.
|
// Wide stores.
|
||||||
return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(),
|
return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
|
||||||
SI->getPointerAddressSpace());
|
SI->getPointerAddressSpace());
|
||||||
}
|
}
|
||||||
case Instruction::Load: {
|
case Instruction::Load: {
|
||||||
LoadInst *LI = cast<LoadInst>(I);
|
LoadInst *LI = cast<LoadInst>(I);
|
||||||
Type *VTy = VectorType::get(I->getType(), VF);
|
|
||||||
|
if (VF == 1)
|
||||||
|
return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
|
||||||
|
LI->getAlignment(),
|
||||||
|
LI->getPointerAddressSpace());
|
||||||
|
|
||||||
// Scalarized loads.
|
// Scalarized loads.
|
||||||
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
|
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
|
||||||
unsigned Cost = 0;
|
unsigned Cost = 0;
|
||||||
if (VF != 1) {
|
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
|
||||||
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
|
// The cost of inserting the loaded value into the result vector.
|
||||||
unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
|
Cost += VF * (InCost);
|
||||||
|
|
||||||
// The cost of inserting the loaded value into the result vector, and
|
|
||||||
// extracting from a vector of pointers.
|
|
||||||
Cost += VF * (InCost + ExCost);
|
|
||||||
}
|
|
||||||
// The cost of the scalar stores.
|
// The cost of the scalar stores.
|
||||||
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
|
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
|
||||||
|
RetTy->getScalarType(),
|
||||||
LI->getAlignment(),
|
LI->getAlignment(),
|
||||||
LI->getPointerAddressSpace());
|
LI->getPointerAddressSpace());
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wide loads.
|
// Wide loads.
|
||||||
return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(),
|
return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
|
||||||
LI->getPointerAddressSpace());
|
LI->getPointerAddressSpace());
|
||||||
}
|
}
|
||||||
case Instruction::ZExt:
|
case Instruction::ZExt:
|
||||||
|
@ -1586,35 +1601,40 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||||
case Instruction::Trunc:
|
case Instruction::Trunc:
|
||||||
case Instruction::FPTrunc:
|
case Instruction::FPTrunc:
|
||||||
case Instruction::BitCast: {
|
case Instruction::BitCast: {
|
||||||
Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF);
|
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
|
||||||
Type *DstTy = VectorType::get(I->getType(), VF);
|
return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
|
||||||
return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
|
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
// We are scalarizing the instruction. Return the cost of the scalar
|
// We are scalarizing the instruction. Return the cost of the scalar
|
||||||
// instruction, plus the cost of insert and extract into vector
|
// instruction, plus the cost of insert and extract into vector
|
||||||
// elements, times the vector width.
|
// elements, times the vector width.
|
||||||
unsigned Cost = 0;
|
unsigned Cost = 0;
|
||||||
Type *Ty = I->getType();
|
|
||||||
|
|
||||||
if (!Ty->isVoidTy()) {
|
bool IsVoid = RetTy->isVoidTy();
|
||||||
Type *VTy = VectorType::get(Ty, VF);
|
|
||||||
unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
|
|
||||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
|
|
||||||
Cost += VF * (InsCost + ExtCost);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We don't have any information on the scalar instruction, but maybe
|
unsigned InsCost = (IsVoid ? 0 :
|
||||||
/// the target has.
|
VTTI->getInstrCost(Instruction::InsertElement,
|
||||||
/// TODO: This may be a target-specific intrinsic.
|
VectorTy));
|
||||||
/// Need to add API for that.
|
|
||||||
Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
|
|
||||||
|
|
||||||
|
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
||||||
|
VectorTy);
|
||||||
|
|
||||||
|
// The cost of inserting the results plus extracting each one of the
|
||||||
|
// operands.
|
||||||
|
Cost += VF * (InsCost + ExtCost * I->getNumOperands());
|
||||||
|
|
||||||
|
// The cost of executing VF copies of the scalar instruction.
|
||||||
|
Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
}// end of switch.
|
}// end of switch.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
|
||||||
|
if (Scalar->isVoidTy() || VF == 1)
|
||||||
|
return Scalar;
|
||||||
|
return VectorType::get(Scalar, VF);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue