LoopVectorize: Teach the cost model to query scalar costs as scalar types and not vectors of 1.

llvm-svn: 166715
This commit is contained in:
Nadav Rotem 2012-10-25 21:03:48 +00:00
parent f0e8720054
commit 579042f71b
1 changed files with 61 additions and 41 deletions

View File

@ -324,6 +324,11 @@ private:
/// width. Vector width of one means scalar. /// width. Vector width of one means scalar.
unsigned getInstructionCost(Instruction *I, unsigned VF); unsigned getInstructionCost(Instruction *I, unsigned VF);
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
static Type* ToVectorTy(Type *Scalar, unsigned VF);
/// The loop that we evaluate. /// The loop that we evaluate.
Loop *TheLoop; Loop *TheLoop;
/// Scev analysis. /// Scev analysis.
@ -1478,8 +1483,16 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
unsigned unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
assert(VTTI && "Invalid vector target transformation info"); assert(VTTI && "Invalid vector target transformation info");
Type *RetTy = I->getType();
Type *VectorTy = ToVectorTy(RetTy, VF);
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) { switch (I->getOpcode()) {
case Instruction::GetElementPtr: case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because scalar GEPs are usually
// lowered to the intruction addressing mode. At the moment we don't
// generate vector geps.
return 0; return 0;
case Instruction::Br: { case Instruction::Br: {
return VTTI->getInstrCost(I->getOpcode()); return VTTI->getInstrCost(I->getOpcode());
@ -1504,74 +1517,76 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::And: case Instruction::And:
case Instruction::Or: case Instruction::Or:
case Instruction::Xor: { case Instruction::Xor: {
Type *VTy = VectorType::get(I->getType(), VF); return VTTI->getInstrCost(I->getOpcode(), VectorTy);
return VTTI->getInstrCost(I->getOpcode(), VTy);
} }
case Instruction::Select: { case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I); SelectInst *SI = cast<SelectInst>(I);
Type *VTy = VectorType::get(I->getType(), VF);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType(); Type *CondTy = SI->getCondition()->getType();
if (ScalarCond) if (ScalarCond)
CondTy = VectorType::get(CondTy, VF); CondTy = VectorType::get(CondTy, VF);
return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy); return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
} }
case Instruction::ICmp: case Instruction::ICmp:
case Instruction::FCmp: { case Instruction::FCmp: {
Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF); Type *ValTy = I->getOperand(0)->getType();
return VTTI->getInstrCost(I->getOpcode(), VTy); VectorTy = ToVectorTy(ValTy, VF);
return VTTI->getInstrCost(I->getOpcode(), VectorTy);
} }
case Instruction::Store: { case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(I); StoreInst *SI = cast<StoreInst>(I);
Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF); Type *ValTy = SI->getValueOperand()->getType();
VectorTy = ToVectorTy(ValTy, VF);
if (VF == 1)
return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
SI->getAlignment(), SI->getPointerAddressSpace());
// Scalarized stores. // Scalarized stores.
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
unsigned Cost = 0; unsigned Cost = 0;
if (VF != 1) {
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
VTy); ValTy);
// The cost of extracting from the value vector and pointer vector. // The cost of extracting from the value vector.
Cost += VF * (ExtCost * 2); Cost += VF * (ExtCost);
}
// The cost of the scalar stores. // The cost of the scalar stores.
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
VTy->getScalarType(), ValTy->getScalarType(),
SI->getAlignment(), SI->getAlignment(),
SI->getPointerAddressSpace()); SI->getPointerAddressSpace());
return Cost; return Cost;
} }
// Wide stores. // Wide stores.
return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(), return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
SI->getPointerAddressSpace()); SI->getPointerAddressSpace());
} }
case Instruction::Load: { case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(I); LoadInst *LI = cast<LoadInst>(I);
Type *VTy = VectorType::get(I->getType(), VF);
if (VF == 1)
return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
LI->getAlignment(),
LI->getPointerAddressSpace());
// Scalarized loads. // Scalarized loads.
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
unsigned Cost = 0; unsigned Cost = 0;
if (VF != 1) { unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); // The cost of inserting the loaded value into the result vector.
unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy); Cost += VF * (InCost);
// The cost of inserting the loaded value into the result vector, and
// extracting from a vector of pointers.
Cost += VF * (InCost + ExCost);
}
// The cost of the scalar stores. // The cost of the scalar stores.
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(), Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
RetTy->getScalarType(),
LI->getAlignment(), LI->getAlignment(),
LI->getPointerAddressSpace()); LI->getPointerAddressSpace());
return Cost; return Cost;
} }
// Wide loads. // Wide loads.
return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(), return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
LI->getPointerAddressSpace()); LI->getPointerAddressSpace());
} }
case Instruction::ZExt: case Instruction::ZExt:
@ -1586,35 +1601,40 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::Trunc: case Instruction::Trunc:
case Instruction::FPTrunc: case Instruction::FPTrunc:
case Instruction::BitCast: { case Instruction::BitCast: {
Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF); Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
Type *DstTy = VectorType::get(I->getType(), VF); return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
} }
default: { default: {
// We are scalarizing the instruction. Return the cost of the scalar // We are scalarizing the instruction. Return the cost of the scalar
// instruction, plus the cost of insert and extract into vector // instruction, plus the cost of insert and extract into vector
// elements, times the vector width. // elements, times the vector width.
unsigned Cost = 0; unsigned Cost = 0;
Type *Ty = I->getType();
if (!Ty->isVoidTy()) { bool IsVoid = RetTy->isVoidTy();
Type *VTy = VectorType::get(Ty, VF);
unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
Cost += VF * (InsCost + ExtCost);
}
/// We don't have any information on the scalar instruction, but maybe unsigned InsCost = (IsVoid ? 0 :
/// the target has. VTTI->getInstrCost(Instruction::InsertElement,
/// TODO: This may be a target-specific intrinsic. VectorTy));
/// Need to add API for that.
Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
VectorTy);
// The cost of inserting the results plus extracting each one of the
// operands.
Cost += VF * (InsCost + ExtCost * I->getNumOperands());
// The cost of executing VF copies of the scalar instruction.
Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
return Cost; return Cost;
} }
}// end of switch. }// end of switch.
} }
Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
if (Scalar->isVoidTy() || VF == 1)
return Scalar;
return VectorType::get(Scalar, VF);
}
} // namespace } // namespace