forked from OSchip/llvm-project
[LV] Fix-up external IV users after updating dominator tree
This patch delays the fix-up step for external induction variable users until after the dominator tree has been properly updated. This should fix PR30742. The SCEVExpander in InductionDescriptor::transform can generate code in the wrong location if the dominator tree is not up-to-date. We should work towards keeping the dominator tree up-to-date throughout the transformation. Reference: https://llvm.org/bugs/show_bug.cgi?id=30742 Differential Revision: https://reviews.llvm.org/D28168 llvm-svn: 291462
This commit is contained in:
parent
6dca542b4a
commit
cf796478e9
|
@ -783,6 +783,10 @@ protected:
|
|||
// Similarly, we create a new latch condition when setting up the structure
|
||||
// of the new loop, so the old one can become dead.
|
||||
SmallPtrSet<Instruction *, 4> DeadInstructions;
|
||||
|
||||
// Holds the end values for each induction variable. We save the end values
|
||||
// so we can later fix-up the external users of the induction variables.
|
||||
DenseMap<PHINode *, Value *> IVEndValues;
|
||||
};
|
||||
|
||||
class InnerLoopUnroller : public InnerLoopVectorizer {
|
||||
|
@ -3417,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
|||
// Create phi nodes to merge from the backedge-taken check block.
|
||||
PHINode *BCResumeVal = PHINode::Create(
|
||||
OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
|
||||
Value *EndValue;
|
||||
Value *&EndValue = IVEndValues[OrigPhi];
|
||||
if (OrigPhi == OldInduction) {
|
||||
// We know what the end value is.
|
||||
EndValue = CountRoundDown;
|
||||
|
@ -3436,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
|||
// or the value at the end of the vectorized loop.
|
||||
BCResumeVal->addIncoming(EndValue, MiddleBlock);
|
||||
|
||||
// Fix up external users of the induction variable.
|
||||
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
|
||||
|
||||
// Fix the scalar body counter (PHI node).
|
||||
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
|
||||
|
||||
|
@ -4109,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() {
|
|||
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
|
||||
} // end of for each Phi in PHIsToFix.
|
||||
|
||||
fixLCSSAPHIs();
|
||||
|
||||
// Make sure DomTree is updated.
|
||||
// Update the dominator tree.
|
||||
//
|
||||
// FIXME: After creating the structure of the new loop, the dominator tree is
|
||||
// no longer up-to-date, and it remains that way until we update it
|
||||
// here. An out-of-date dominator tree is problematic for SCEV,
|
||||
// because SCEVExpander uses it to guide code generation. The
|
||||
// vectorizer use SCEVExpanders in several places. Instead, we should
|
||||
// keep the dominator tree up-to-date as we go.
|
||||
updateAnalysis();
|
||||
|
||||
// Fix-up external users of the induction variables.
|
||||
for (auto &Entry : *Legal->getInductionVars())
|
||||
fixupIVUsers(Entry.first, Entry.second,
|
||||
getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
|
||||
IVEndValues[Entry.first], LoopMiddleBlock);
|
||||
|
||||
fixLCSSAPHIs();
|
||||
predicateInstructions();
|
||||
|
||||
// Remove redundant induction instructions.
|
||||
|
|
|
@ -133,3 +133,48 @@ for.end:
|
|||
store i32 %phi2, i32* %p
|
||||
ret i32 %phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @PR30742
|
||||
; CHECK: min.iters.checked
|
||||
; CHECK: %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
|
||||
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
|
||||
; CHECK: middle.block
|
||||
; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
|
||||
; CHECK: %[[T15:.+]] = add i32 %tmp03, -7
|
||||
; CHECK: %[[T16:.+]] = shl i32 %[[N_MOD_VF]], 3
|
||||
; CHECK: %[[T17:.+]] = add i32 %[[T15]], %[[T16]]
|
||||
; CHECK: %[[T18:.+]] = shl i32 {{.*}}, 3
|
||||
; CHECK: %ind.escape = sub i32 %[[T17]], %[[T18]]
|
||||
; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph
|
||||
define void @PR30742() {
|
||||
BB0:
|
||||
br label %BB1
|
||||
|
||||
BB1:
|
||||
%tmp00 = load i32, i32* undef, align 16
|
||||
%tmp01 = sub i32 %tmp00, undef
|
||||
%tmp02 = icmp slt i32 %tmp01, 1
|
||||
%tmp03 = select i1 %tmp02, i32 1, i32 %tmp01
|
||||
%tmp04 = add nsw i32 %tmp03, -7
|
||||
br label %BB2
|
||||
|
||||
BB2:
|
||||
%tmp05 = phi i32 [ %tmp04, %BB1 ], [ %tmp06, %BB2 ]
|
||||
%tmp06 = add i32 %tmp05, -8
|
||||
%tmp07 = icmp sgt i32 %tmp06, 0
|
||||
br i1 %tmp07, label %BB2, label %BB3
|
||||
|
||||
BB3:
|
||||
%tmp08 = phi i32 [ %tmp05, %BB2 ]
|
||||
%tmp09 = sub i32 %tmp00, undef
|
||||
%tmp10 = icmp slt i32 %tmp09, 1
|
||||
%tmp11 = select i1 %tmp10, i32 1, i32 %tmp09
|
||||
%tmp12 = add nsw i32 %tmp11, -7
|
||||
br label %BB4
|
||||
|
||||
BB4:
|
||||
%tmp13 = phi i32 [ %tmp12, %BB3 ], [ %tmp14, %BB4 ]
|
||||
%tmp14 = add i32 %tmp13, -8
|
||||
%tmp15 = icmp sgt i32 %tmp14, 0
|
||||
br i1 %tmp15, label %BB4, label %BB1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue