[LoopFlatten] Updating Phi nodes after IV widening

In rG6a076fa9539e, a problem with updating the old/narrow phi nodes after IV
widening was introduced. If after widening of the IV the transformation is
*not* applied, the narrow phi node was incorrectly modified, which should only
happen if flattening happens. This can be seen in the added test widen-iv2.ll,
which incorrectly had 1 incoming value, but should have its original 2 incoming
values, which is now restored.

Differential Revision: https://reviews.llvm.org/D110234
This commit is contained in:
Sjoerd Meijer 2021-09-28 15:06:22 +01:00
parent bd379915de
commit 0ea77502e2
2 changed files with 19 additions and 10 deletions

View File

@ -97,9 +97,17 @@ struct FlattenInfo {
// Holds the old/narrow induction phis, i.e. the Phis before IV widening has // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
// been applied. This bookkeeping is used so we can skip some checks on these // been applied. This bookkeeping is used so we can skip some checks on these
// phi nodes. // phi nodes.
SmallPtrSet<PHINode *, 2> OldInductionPHIs; PHINode *NarrowInnerInductionPHI = nullptr;
PHINode *NarrowOuterInductionPHI = nullptr;
FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
bool isNarrowInductionPhi(PHINode *Phi) {
// This can't be the narrow phi if we haven't widened the IV first.
if (!Widened)
return false;
return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
}
}; };
static bool static bool
@ -268,7 +276,7 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
// them specially when doing the transformation. // them specially when doing the transformation.
if (&InnerPHI == FI.InnerInductionPHI) if (&InnerPHI == FI.InnerInductionPHI)
continue; continue;
if (FI.Widened && FI.OldInductionPHIs.count(&InnerPHI)) if (FI.isNarrowInductionPhi(&InnerPHI))
continue; continue;
// Each inner loop PHI node must have two incoming values/blocks - one // Each inner loop PHI node must have two incoming values/blocks - one
@ -315,7 +323,7 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
} }
for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) { for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
if (FI.Widened && FI.OldInductionPHIs.count(&OuterPHI)) if (FI.isNarrowInductionPhi(&OuterPHI))
continue; continue;
if (!SafeOuterPHIs.count(&OuterPHI)) { if (!SafeOuterPHIs.count(&OuterPHI)) {
LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
@ -708,10 +716,11 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
bool Deleted; bool Deleted;
if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted)) if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
return false; return false;
// If the inner Phi node cannot be trivially deleted, we need to at least // Add the narrow phi to list, so that it will be adjusted later when the
// bring it in a consistent state. // the transformation is performed.
if (!Deleted) if (!Deleted)
FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted)) if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
return false; return false;
@ -719,8 +728,8 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
FI.Widened = true; FI.Widened = true;
// Save the old/narrow induction phis, which we need to ignore in CheckPHIs. // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
FI.OldInductionPHIs.insert(FI.InnerInductionPHI); FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
FI.OldInductionPHIs.insert(FI.OuterInductionPHI); FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
// After widening, rediscover all the loop components. // After widening, rediscover all the loop components.
return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);

View File

@ -37,7 +37,7 @@ define dso_local i32 @fn1() local_unnamed_addr #0 {
; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]] ; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]]
; CHECK: for.body3.us: ; CHECK: for.body3.us:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY3_US]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY3_US]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ]
; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ] ; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY3_US]] ]
; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVAR]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVAR]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[J_014_US]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[J_014_US]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP8]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP8]], [[TMP5]]
@ -46,7 +46,7 @@ define dso_local i32 @fn1() local_unnamed_addr #0 {
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP7]] ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP7]]
; CHECK-NEXT: store i32 32, i32* [[ARRAYIDX_US]], align 4 ; CHECK-NEXT: store i32 32, i32* [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[INC_US:%.*]] = add nuw nsw i32 [[J_014_US]], 1 ; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[J_014_US]], 1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP1]] ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP1]]
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY3_US]], label [[FOR_COND1_FOR_INC4_CRIT_EDGE_US]] ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY3_US]], label [[FOR_COND1_FOR_INC4_CRIT_EDGE_US]]
; CHECK: for.cond1.for.inc4_crit_edge.us: ; CHECK: for.cond1.for.inc4_crit_edge.us: