forked from OSchip/llvm-project
[LV] Consider minimum vscale assmuption for RT check cost.
For scalable VFs, the minimum assumed vscale needs to be included in the cost-computation, otherwise a smaller VF may be used for RT check cost computation than was used for earlier cost computations. Fixes a RISCV test failing with UBSan due to both scalar and vector loops having the same cost.
This commit is contained in:
parent
df5c981be3
commit
774fc63490
|
@ -1559,14 +1559,14 @@ public:
|
|||
Scalars.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned NumPredStores = 0;
|
||||
|
||||
/// Convenience function that returns the value of vscale_range iff
|
||||
/// vscale_range.min == vscale_range.max or otherwise returns the value
|
||||
/// returned by the corresponding TLI method.
|
||||
Optional<unsigned> getVScaleForTuning() const;
|
||||
|
||||
private:
|
||||
unsigned NumPredStores = 0;
|
||||
|
||||
/// \return An upper bound for the vectorization factors for both
|
||||
/// fixed and scalable vectorization, where the minimum-known number of
|
||||
/// elements is a power-of-2 larger than zero. If scalable vectorization is
|
||||
|
@ -10243,7 +10243,8 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
|
|||
}
|
||||
|
||||
static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
|
||||
VectorizationFactor &VF, Loop *L,
|
||||
VectorizationFactor &VF,
|
||||
Optional<unsigned> VScale, Loop *L,
|
||||
ScalarEvolution &SE) {
|
||||
InstructionCost CheckCost = Checks.getCost();
|
||||
if (!CheckCost.isValid())
|
||||
|
@ -10295,6 +10296,12 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
|
|||
// the computations are performed on doubles, not integers and the result
|
||||
// is rounded up, hence we get an upper estimate of the TC.
|
||||
unsigned IntVF = VF.Width.getKnownMinValue();
|
||||
if (VF.Width.isScalable()) {
|
||||
unsigned AssumedMinimumVscale = 1;
|
||||
if (VScale)
|
||||
AssumedMinimumVscale = *VScale;
|
||||
IntVF *= AssumedMinimumVscale;
|
||||
}
|
||||
double VecCOverVF = double(*VF.Cost.getValue()) / IntVF;
|
||||
double RtC = *CheckCost.getValue();
|
||||
double MinTC1 = RtC / (ScalarC - VecCOverVF);
|
||||
|
@ -10308,6 +10315,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
|
|||
// RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC
|
||||
double MinTC2 = RtC * 10 / ScalarC;
|
||||
|
||||
dbgs() << ScalarC << " " << RtC << " " << VecCOverVF << "\n";
|
||||
// Now pick the larger minimum. If it is not a multiple of VF, choose the
|
||||
// next closest multiple of VF. This should partly compensate for ignoring
|
||||
// the epilogue cost.
|
||||
|
@ -10520,7 +10528,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||
bool ForceVectorization =
|
||||
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
|
||||
if (!ForceVectorization &&
|
||||
!areRuntimeChecksProfitable(Checks, VF, L, *PSE.getSE())) {
|
||||
!areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L,
|
||||
*PSE.getSE())) {
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysisAliasing(
|
||||
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
|
||||
|
|
Loading…
Reference in New Issue