forked from OSchip/llvm-project
[PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts.
D81345 appears to accidentally disables vectorization when explicitly enabled. As PGSO isn't currently accessible from LoopAccessInfo, revert back to the vectorization with versioning-for-unit-stride for PGSO. Differential Revision: https://reviews.llvm.org/D85784
This commit is contained in:
parent
b40a3814b6
commit
ab401a8c8a
|
@ -2822,7 +2822,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
|
|||
return;
|
||||
|
||||
assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
|
||||
OptForSizeBasedOnProfile) &&
|
||||
(OptForSizeBasedOnProfile &&
|
||||
Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) &&
|
||||
"Cannot SCEV check stride or overflow when optimizing for size");
|
||||
|
||||
SCEVCheckBlock->setName("vector.scevcheck");
|
||||
|
@ -7914,12 +7915,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
|
|||
BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
|
||||
AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
|
||||
LoopVectorizationLegality &LVL) {
|
||||
bool OptSize =
|
||||
F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
|
||||
PGSOQueryType::IRPass);
|
||||
// 1) OptSize takes precedence over all other options, i.e. if this is set,
|
||||
// don't look at hints or options, and don't request a scalar epilogue.
|
||||
if (OptSize)
|
||||
// (For PGSO, as shouldOptimizeForSize isn't currently accessible from
|
||||
// LoopAccessInfo (due to code dependency and not being able to reliably get
|
||||
// PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
|
||||
// of strides in LoopAccessInfo::analyzeLoop() and vectorize without
|
||||
// versioning when the vectorization is forced, unlike hasOptSize. So revert
|
||||
// back to the old way and vectorize with versioning when forced. See D81345.)
|
||||
if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
|
||||
PGSOQueryType::IRPass) &&
|
||||
Hints.getForce() != LoopVectorizeHints::FK_Enabled))
|
||||
return CM_ScalarEpilogueNotAllowedOptSize;
|
||||
|
||||
bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&
|
||||
|
|
|
@ -284,6 +284,34 @@ for.end:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
|
||||
;
|
||||
define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 {
|
||||
; CHECK-LABEL: @stride1_pgso(
|
||||
; CHECK: vector.body
|
||||
;
|
||||
; PGSO-LABEL: @stride1_pgso(
|
||||
; PGSO: vector.body
|
||||
;
|
||||
; NPGSO-LABEL: @stride1_pgso(
|
||||
; NPGSO: vector.body
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
|
||||
%mulB = mul nsw i32 %iv, %BStride
|
||||
%gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
|
||||
store i16 42, i16* %gepOfB, align 4
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%exitcond = icmp eq i32 %iv.next, 1025
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
|
||||
; having tiny trip count, when compiling w/o -Os/-Oz.
|
||||
; CHECK-LABEL: @pr46652
|
||||
|
|
Loading…
Reference in New Issue