forked from OSchip/llvm-project
Revert "[LoopVectorize] Simplify scalar cost calculation in getInstructionCost"
This reverts commit 4afeda9157
.
This commit is contained in:
parent
4abba775a3
commit
6968520c3b
|
@ -7316,37 +7316,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
|
|||
Type *RetTy = I->getType();
|
||||
if (canTruncateToMinimalBitwidth(I, VF))
|
||||
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
|
||||
VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
|
||||
auto SE = PSE.getSE();
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||
|
||||
auto hasSingleCopyAfterVectorization = [this](Instruction *I,
|
||||
ElementCount VF) -> bool {
|
||||
if (VF.isScalar())
|
||||
return true;
|
||||
|
||||
auto Scalarized = InstsToScalarize.find(VF);
|
||||
assert(Scalarized != InstsToScalarize.end() &&
|
||||
"VF not yet analyzed for scalarization profitability");
|
||||
return !Scalarized->second.count(I) &&
|
||||
llvm::all_of(I->users(), [&](User *U) {
|
||||
auto *UI = cast<Instruction>(U);
|
||||
return !Scalarized->second.count(UI);
|
||||
});
|
||||
};
|
||||
|
||||
if (isScalarAfterVectorization(I, VF)) {
|
||||
// With the exception of GEPs and PHIs, after scalarization there should
|
||||
// only be one copy of the instruction generated in the loop. This is
|
||||
// because the VF is either 1, or any instructions that need scalarizing
|
||||
// have already been dealt with by the the time we get here. As a result,
|
||||
// it means we don't have to multiply the instruction cost by VF.
|
||||
assert(I->getOpcode() == Instruction::GetElementPtr ||
|
||||
I->getOpcode() == Instruction::PHI ||
|
||||
hasSingleCopyAfterVectorization(I, VF));
|
||||
VectorTy = RetTy;
|
||||
} else
|
||||
VectorTy = ToVectorTy(RetTy, VF);
|
||||
|
||||
// TODO: We need to estimate the cost of intrinsic calls.
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::GetElementPtr:
|
||||
|
@ -7474,16 +7447,21 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
|
|||
Op2VK = TargetTransformInfo::OK_UniformValue;
|
||||
|
||||
SmallVector<const Value *, 4> Operands(I->operand_values());
|
||||
return TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue,
|
||||
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
|
||||
return N * TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind,
|
||||
TargetTransformInfo::OK_AnyValue,
|
||||
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
|
||||
}
|
||||
case Instruction::FNeg: {
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
return TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue,
|
||||
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None,
|
||||
TargetTransformInfo::OP_None, I->getOperand(0), I);
|
||||
unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
|
||||
return N * TTI.getArithmeticInstrCost(
|
||||
I->getOpcode(), VectorTy, CostKind,
|
||||
TargetTransformInfo::OK_AnyValue,
|
||||
TargetTransformInfo::OK_AnyValue,
|
||||
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
|
||||
I->getOperand(0), I);
|
||||
}
|
||||
case Instruction::Select: {
|
||||
SelectInst *SI = cast<SelectInst>(I);
|
||||
|
@ -7627,7 +7605,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
|
|||
}
|
||||
}
|
||||
|
||||
return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
|
||||
unsigned N;
|
||||
if (isScalarAfterVectorization(I, VF)) {
|
||||
assert(!VF.isScalable() && "VF is assumed to be non scalable");
|
||||
N = VF.getKnownMinValue();
|
||||
} else
|
||||
N = 1;
|
||||
return N *
|
||||
TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
|
||||
}
|
||||
case Instruction::Call: {
|
||||
bool NeedToScalarize;
|
||||
|
@ -7642,8 +7627,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
|
|||
case Instruction::ExtractValue:
|
||||
return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);
|
||||
default:
|
||||
// This opcode is unknown. Assume that it is the same as 'mul'.
|
||||
return TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
|
||||
// The cost of executing VF copies of the scalar instruction. This opcode
|
||||
// is unknown. Assume that it is the same as 'mul'.
|
||||
return VF.getKnownMinValue() * TTI.getArithmeticInstrCost(
|
||||
Instruction::Mul, VectorTy, CostKind) +
|
||||
getScalarizationOverhead(I, VF);
|
||||
} // end of switch.
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ target triple = "aarch64--linux-gnu"
|
|||
|
||||
; CHECK-LABEL: all_scalar
|
||||
; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
|
||||
; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions
|
||||
;
|
||||
define void @all_scalar(i64* %a, i64 %n) {
|
||||
|
|
|
@ -86,41 +86,6 @@ for.end:
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: predicated_store_phi
|
||||
;
|
||||
; Same as predicate_store except we use a pointer PHI to maintain the address
|
||||
;
|
||||
; CHECK: Found new scalar instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ]
|
||||
; CHECK: Found new scalar instruction: %addr.next = getelementptr inbounds i32, i32* %addr, i64 1
|
||||
; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %addr, align 4
|
||||
; CHECK: Found an estimated cost of 0 for VF 2 For instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ]
|
||||
; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %addr, align 4
|
||||
;
|
||||
define void @predicated_store_phi(i32* %a, i1 %c, i32 %x, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
|
||||
%addr = phi i32 * [ %a, %entry ], [ %addr.next, %for.inc ]
|
||||
%tmp1 = load i32, i32* %addr, align 4
|
||||
%tmp2 = add nsw i32 %tmp1, %x
|
||||
br i1 %c, label %if.then, label %for.inc
|
||||
|
||||
if.then:
|
||||
store i32 %tmp2, i32* %addr, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%i.next = add nuw nsw i64 %i, 1
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
%addr.next = getelementptr inbounds i32, i32* %addr, i64 1
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: predicated_udiv_scalarized_operand
|
||||
;
|
||||
; This test checks that we correctly compute the cost of the predicated udiv
|
||||
|
|
Loading…
Reference in New Issue