forked from OSchip/llvm-project
[LV] Ignore candidate VFs with invalid costs.
This follows on from discussion on the mailing-list: https://lists.llvm.org/pipermail/llvm-dev/2021-June/151047.html to interpret an Invalid cost as 'infinitely expensive', as this simplifies some of the legalization issues with scalable vectors. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D105473
This commit is contained in:
parent
e4aa6ad132
commit
d2e4ccc790
|
@ -1261,9 +1261,11 @@ public:
|
|||
const LoopVectorizationPlanner &LVP);
|
||||
|
||||
/// Setup cost-based decisions for user vectorization factor.
|
||||
void selectUserVectorizationFactor(ElementCount UserVF) {
|
||||
/// \return true if the UserVF is a feasible VF to be chosen.
|
||||
bool selectUserVectorizationFactor(ElementCount UserVF) {
|
||||
collectUniformsAndScalars(UserVF);
|
||||
collectInstsToScalarize(UserVF);
|
||||
return expectedCost(UserVF).first.isValid();
|
||||
}
|
||||
|
||||
/// \return The size (in bits) of the smallest and widest types in the code
|
||||
|
@ -5725,8 +5727,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
|
|||
auto MaxSafeUserVF =
|
||||
UserVF.isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
|
||||
|
||||
if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF))
|
||||
return UserVF;
|
||||
if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF)) {
|
||||
// If `VF=vscale x N` is safe, then so is `VF=N`
|
||||
if (UserVF.isScalable())
|
||||
return FixedScalableVFPair(
|
||||
ElementCount::getFixed(UserVF.getKnownMinValue()), UserVF);
|
||||
else
|
||||
return UserVF;
|
||||
}
|
||||
|
||||
assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));
|
||||
|
||||
|
@ -6072,17 +6080,11 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
|
|||
if (i.isScalar())
|
||||
continue;
|
||||
|
||||
// Notice that the vector loop needs to be executed less times, so
|
||||
// we need to divide the cost of the vector loops by the width of
|
||||
// the vector elements.
|
||||
VectorizationCostTy C = expectedCost(i);
|
||||
|
||||
assert(C.first.isValid() && "Unexpected invalid cost for vector loop");
|
||||
VectorizationFactor Candidate(i, C.first);
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "LV: Vector loop of width " << i << " costs: "
|
||||
<< (*Candidate.Cost.getValue() /
|
||||
Candidate.Width.getKnownMinValue())
|
||||
<< (Candidate.Cost / Candidate.Width.getKnownMinValue())
|
||||
<< (i.isScalable() ? " (assuming a minimum vscale of 1)" : "")
|
||||
<< ".\n");
|
||||
|
||||
|
@ -6109,8 +6111,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
|
|||
}
|
||||
|
||||
LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
|
||||
*ChosenFactor.Cost.getValue() >= *ScalarCost.Cost.getValue())
|
||||
dbgs()
|
||||
ChosenFactor.Cost >= ScalarCost.Cost) dbgs()
|
||||
<< "LV: Vectorization seems to be not beneficial, "
|
||||
<< "but was forced by a user.\n");
|
||||
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
|
||||
|
@ -6438,8 +6439,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
|
|||
// If we did not calculate the cost for VF (because the user selected the VF)
|
||||
// then we calculate the cost of VF here.
|
||||
if (LoopCost == 0) {
|
||||
assert(expectedCost(VF).first.isValid() && "Expected a valid cost");
|
||||
LoopCost = *expectedCost(VF).first.getValue();
|
||||
InstructionCost C = expectedCost(VF).first;
|
||||
assert(C.isValid() && "Expected to have chosen a VF with valid cost");
|
||||
LoopCost = *C.getValue();
|
||||
}
|
||||
|
||||
assert(LoopCost && "Non-zero loop cost expected");
|
||||
|
@ -7295,6 +7297,8 @@ InstructionCost
|
|||
LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
|
||||
ElementCount VF) const {
|
||||
|
||||
// There is no mechanism yet to create a scalable scalarization loop,
|
||||
// so this is currently Invalid.
|
||||
if (VF.isScalable())
|
||||
return InstructionCost::getInvalid();
|
||||
|
||||
|
@ -8013,17 +8017,19 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
|
|||
UserVF.isScalable() ? MaxFactors.ScalableVF : MaxFactors.FixedVF;
|
||||
bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF);
|
||||
if (!UserVF.isZero() && UserVFIsLegal) {
|
||||
LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max")
|
||||
<< " VF " << UserVF << ".\n");
|
||||
assert(isPowerOf2_32(UserVF.getKnownMinValue()) &&
|
||||
"VF needs to be a power of two");
|
||||
// Collect the instructions (and their associated costs) that will be more
|
||||
// profitable to scalarize.
|
||||
CM.selectUserVectorizationFactor(UserVF);
|
||||
CM.collectInLoopReductions();
|
||||
buildVPlansWithVPRecipes(UserVF, UserVF);
|
||||
LLVM_DEBUG(printPlans(dbgs()));
|
||||
return {{UserVF, 0}};
|
||||
if (CM.selectUserVectorizationFactor(UserVF)) {
|
||||
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
|
||||
CM.collectInLoopReductions();
|
||||
buildVPlansWithVPRecipes(UserVF, UserVF);
|
||||
LLVM_DEBUG(printPlans(dbgs()));
|
||||
return {{UserVF, 0}};
|
||||
} else
|
||||
reportVectorizationInfo("UserVF ignored because of invalid costs.",
|
||||
"InvalidCost", ORE, OrigLoop);
|
||||
}
|
||||
|
||||
// Populate the set of Vectorization Factor Candidates.
|
||||
|
@ -8798,8 +8804,6 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
|
|||
InstructionCost CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
|
||||
InstructionCost IntrinsicCost = ID ? CM.getVectorIntrinsicCost(CI, VF) : 0;
|
||||
bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost;
|
||||
assert((IntrinsicCost.isValid() || CallCost.isValid()) &&
|
||||
"Either the intrinsic cost or vector call cost must be valid");
|
||||
return UseVectorIntrinsic || !NeedToScalarize;
|
||||
};
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {
|
|||
; CHECK-LABEL: @vec_intrinsic
|
||||
; CHECK: vector.body:
|
||||
; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
|
||||
; CHECK: call fast <vscale x 2 x double> @sin_vec(<vscale x 2 x double> %[[LOAD]])
|
||||
; CHECK: call fast <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double> %[[LOAD]])
|
||||
entry:
|
||||
%cmp7 = icmp sgt i64 %N, 0
|
||||
br i1 %cmp7, label %for.body, label %for.end
|
||||
|
@ -95,17 +95,90 @@ for.end:
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
|
||||
; CHECK: @vec_sin_no_mapping
|
||||
; CHECK: call fast <2 x float> @llvm.sin.v2f32
|
||||
; CHECK-NOT: <vscale x
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%1 = tail call fast float @llvm.sin.f32(float %0)
|
||||
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
||||
store float %1, float* %arrayidx1, align 4
|
||||
%inc = add nuw nsw i64 %i.07, 1
|
||||
%exitcond.not = icmp eq i64 %inc, %n
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
|
||||
; CHECK: @vec_sin_fixed_mapping
|
||||
; CHECK: call fast <2 x float> @llvm.sin.v2f32
|
||||
; CHECK-NOT: <vscale x
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%1 = tail call fast float @llvm.sin.f32(float %0) #3
|
||||
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
||||
store float %1, float* %arrayidx1, align 4
|
||||
%inc = add nuw nsw i64 %i.07, 1
|
||||
%exitcond.not = icmp eq i64 %inc, %n
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; Even though there are no function mappings attached to the call
|
||||
; in the loop below we can still vectorize the loop because SVE has
|
||||
; hardware support in the form of the 'fqsrt' instruction.
|
||||
define void @vec_sqrt_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) #0 {
|
||||
; CHECK: @vec_sqrt_no_mapping
|
||||
; CHECK: call fast <vscale x 2 x float> @llvm.sqrt.nxv2f32
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%1 = tail call fast float @llvm.sqrt.f32(float %0)
|
||||
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
||||
store float %1, float* %arrayidx1, align 4
|
||||
%inc = add nuw nsw i64 %i.07, 1
|
||||
%exitcond.not = icmp eq i64 %inc, %n
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare double @foo(double)
|
||||
declare i64 @bar(i64*)
|
||||
declare double @llvm.sin.f64(double)
|
||||
declare float @llvm.sin.f32(float)
|
||||
declare float @llvm.sqrt.f32(float)
|
||||
|
||||
declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)
|
||||
declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)
|
||||
declare <vscale x 2 x double> @sin_vec(<vscale x 2 x double>)
|
||||
declare <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double>)
|
||||
declare <2 x double> @sin_vec_v2f64(<2 x double>)
|
||||
|
||||
attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_foo(foo_vec)" }
|
||||
attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_bar(bar_vec)" }
|
||||
attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec)" }
|
||||
attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec_nxv2f64)" }
|
||||
attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_vec_v2f64)" }
|
||||
|
||||
!1 = distinct !{!1, !2, !3}
|
||||
!2 = !{!"llvm.loop.vectorize.width", i32 2}
|
||||
|
|
Loading…
Reference in New Issue