[PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts.

D81345 appears to accidentally disables vectorization when explicitly
enabled. As PGSO isn't currently accessible from LoopAccessInfo, revert back to
the vectorization with versioning-for-unit-stride for PGSO.

Differential Revision: https://reviews.llvm.org/D85784
This commit is contained in:
Hiroshi Yamauchi 2020-08-11 14:10:30 -07:00
parent b40a3814b6
commit ab401a8c8a
2 changed files with 39 additions and 5 deletions

View File

@ -2822,7 +2822,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
return;
assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
OptForSizeBasedOnProfile) &&
(OptForSizeBasedOnProfile &&
Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) &&
"Cannot SCEV check stride or overflow when optimizing for size");
SCEVCheckBlock->setName("vector.scevcheck");
@ -7914,12 +7915,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
LoopVectorizationLegality &LVL) {
bool OptSize =
F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
// 1) OptSize takes precedence over all other options, i.e. if this is set,
// don't look at hints or options, and don't request a scalar epilogue.
if (OptSize)
// (For PGSO, as shouldOptimizeForSize isn't currently accessible from
// LoopAccessInfo (due to code dependency and not being able to reliably get
// PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
// of strides in LoopAccessInfo::analyzeLoop() and vectorize without
// versioning when the vectorization is forced, unlike hasOptSize. So revert
// back to the old way and vectorize with versioning when forced. See D81345.)
if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
PGSOQueryType::IRPass) &&
Hints.getForce() != LoopVectorizeHints::FK_Enabled))
return CM_ScalarEpilogueNotAllowedOptSize;
bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&

View File

@ -284,6 +284,34 @@ for.end:
ret void
}
; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
;
define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 {
; CHECK-LABEL: @stride1_pgso(
; CHECK: vector.body
;
; PGSO-LABEL: @stride1_pgso(
; PGSO: vector.body
;
; NPGSO-LABEL: @stride1_pgso(
; NPGSO: vector.body
entry:
br label %for.body
for.body:
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
%mulB = mul nsw i32 %iv, %BStride
%gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
store i16 42, i16* %gepOfB, align 4
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, 1025
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
for.end:
ret void
}
; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
; having tiny trip count, when compiling w/o -Os/-Oz.
; CHECK-LABEL: @pr46652