forked from OSchip/llvm-project
[SCEV] Guard movement of insertion point for loop-invariants
This reinstates r347934, along with a tweak to address a problem with PHI node ordering that that commit created (or exposed). (That commit was reverted at r348426, due to the PHI node issue.) Original commit message: r320789 suppressed moving the insertion point of SCEV expressions with dev/rem operations to the loop header in non-loop-invariant situations. This, and similar, hoisting is also unsafe in the loop-invariant case, since there may be a guard against a zero denominator. This is an adjustment to the fix of r320789 to suppress the movement even in the loop-invariant case. This fixes PR30806. Differential Revision: https://reviews.llvm.org/D57428 llvm-svn: 356392
This commit is contained in:
parent
270249de2b
commit
ad7d0ded2e
|
@ -1731,49 +1731,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
|
|||
// Compute an insertion point for this SCEV object. Hoist the instructions
|
||||
// as far out in the loop nest as possible.
|
||||
Instruction *InsertPt = &*Builder.GetInsertPoint();
|
||||
for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
|
||||
L = L->getParentLoop())
|
||||
if (SE.isLoopInvariant(S, L)) {
|
||||
if (!L) break;
|
||||
if (BasicBlock *Preheader = L->getLoopPreheader())
|
||||
InsertPt = Preheader->getTerminator();
|
||||
else {
|
||||
// LSR sets the insertion point for AddRec start/step values to the
|
||||
// block start to simplify value reuse, even though it's an invalid
|
||||
// position. SCEVExpander must correct for this in all cases.
|
||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||
|
||||
// We can move insertion point only if there is no div or rem operations
|
||||
// otherwise we are risky to move it over the check for zero denominator.
|
||||
auto SafeToHoist = [](const SCEV *S) {
|
||||
return !SCEVExprContains(S, [](const SCEV *S) {
|
||||
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
|
||||
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
|
||||
// Division by non-zero constants can be hoisted.
|
||||
return SC->getValue()->isZero();
|
||||
// All other divisions should not be moved as they may be
|
||||
// divisions by zero and should be kept within the
|
||||
// conditions of the surrounding loops that guard their
|
||||
// execution (see PR35406).
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
};
|
||||
if (SafeToHoist(S)) {
|
||||
for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
|
||||
L = L->getParentLoop()) {
|
||||
if (SE.isLoopInvariant(S, L)) {
|
||||
if (!L) break;
|
||||
if (BasicBlock *Preheader = L->getLoopPreheader())
|
||||
InsertPt = Preheader->getTerminator();
|
||||
else
|
||||
// LSR sets the insertion point for AddRec start/step values to the
|
||||
// block start to simplify value reuse, even though it's an invalid
|
||||
// position. SCEVExpander must correct for this in all cases.
|
||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||
} else {
|
||||
// If the SCEV is computable at this level, insert it into the header
|
||||
// after the PHIs (and after any other instructions that we've inserted
|
||||
// there) so that it is guaranteed to dominate any user inside the loop.
|
||||
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
|
||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
|
||||
(isInsertedInstruction(InsertPt) ||
|
||||
isa<DbgInfoIntrinsic>(InsertPt)))
|
||||
InsertPt = &*std::next(InsertPt->getIterator());
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// We can move insertion point only if there is no div or rem operations
|
||||
// otherwise we are risky to move it over the check for zero denominator.
|
||||
auto SafeToHoist = [](const SCEV *S) {
|
||||
return !SCEVExprContains(S, [](const SCEV *S) {
|
||||
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
|
||||
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
|
||||
// Division by non-zero constants can be hoisted.
|
||||
return SC->getValue()->isZero();
|
||||
// All other divisions should not be moved as they may be
|
||||
// divisions by zero and should be kept within the
|
||||
// conditions of the surrounding loops that guard their
|
||||
// execution (see PR35406).
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
};
|
||||
// If the SCEV is computable at this level, insert it into the header
|
||||
// after the PHIs (and after any other instructions that we've inserted
|
||||
// there) so that it is guaranteed to dominate any user inside the loop.
|
||||
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
|
||||
SafeToHoist(S))
|
||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
|
||||
(isInsertedInstruction(InsertPt) ||
|
||||
isa<DbgInfoIntrinsic>(InsertPt))) {
|
||||
InsertPt = &*std::next(InsertPt->getIterator());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// IndVarSimplify sometimes sets the insertion point at the block start, even
|
||||
// when there are PHIs at that point. We must correct for this.
|
||||
if (isa<PHINode>(*InsertPt))
|
||||
InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
|
||||
|
||||
// Check to see if we already expanded this here.
|
||||
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: opt -S -indvars < %s | FileCheck %s
|
||||
|
||||
; Produced from the test-case:
|
||||
;
|
||||
; extern void foo(char *, unsigned , unsigned *);
|
||||
; extern void bar(int *, long);
|
||||
; extern char *processBuf(char *);
|
||||
;
|
||||
; extern unsigned theSize;
|
||||
;
|
||||
; void foo(char *buf, unsigned denominator, unsigned *flag) {
|
||||
; int incr = (int) (theSize / denominator);
|
||||
; int inx = 0;
|
||||
; while (*flag) {
|
||||
; int itmp = inx + incr;
|
||||
; int i = (int) theSize;
|
||||
; bar(&i, (long) itmp);
|
||||
; buf = processBuf(buf);
|
||||
; inx = itmp;
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@theSize = external local_unnamed_addr global i32, align 4
|
||||
|
||||
define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
|
||||
entry:
|
||||
%i = alloca i32, align 4
|
||||
%0 = load i32, i32* @theSize, align 4
|
||||
%div = udiv i32 %0, %denominator
|
||||
%1 = load i32, i32* %flag, align 4
|
||||
%tobool5 = icmp eq i32 %1, 0
|
||||
br i1 %tobool5, label %while.end, label %while.body.lr.ph
|
||||
|
||||
while.body.lr.ph: ; preds = %entry
|
||||
%2 = bitcast i32* %i to i8*
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.lr.ph, %while.body
|
||||
; Check that there are two PHIs followed by a 'sext' in the same block, and that
|
||||
; the test does not crash.
|
||||
; CHECK: phi
|
||||
; CHECK-NEXT: phi
|
||||
; CHECK-NEXT: sext
|
||||
%buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
|
||||
%inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
|
||||
%add = add nsw i32 %inx.06, %div
|
||||
%3 = load i32, i32* @theSize, align 4
|
||||
store i32 %3, i32* %i, align 4
|
||||
%conv = sext i32 %add to i64
|
||||
call void @bar(i32* nonnull %i, i64 %conv)
|
||||
%call = call i8* @processBuf(i8* %buf.addr.07)
|
||||
%4 = load i32, i32* %flag, align 4
|
||||
%tobool = icmp eq i32 %4, 0
|
||||
br i1 %tobool, label %while.end.loopexit, label %while.body
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar(i32*, i64) local_unnamed_addr
|
||||
declare i8* @processBuf(i8*) local_unnamed_addr
|
|
@ -0,0 +1,65 @@
|
|||
; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Produced from test-case:
|
||||
;
|
||||
; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim)
|
||||
; {
|
||||
; for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) {
|
||||
; if (denom > 0) {
|
||||
; const uint32_t lim = numer / denom;
|
||||
;
|
||||
; for (uint32_t i = 0; i < lim; ++i)
|
||||
; ptr[i] = 1;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) {
|
||||
entry:
|
||||
%cmp1 = icmp eq i32 %outer_lim, 0
|
||||
br i1 %cmp1, label %exit, label %loop1.preheader
|
||||
|
||||
; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and
|
||||
; whatever label comes next.
|
||||
loop1.preheader:
|
||||
; CHECK-LABEL: loop1.preheader:
|
||||
; CHECK-NOT: udiv
|
||||
; CHECK-LABEL: :
|
||||
br label %loop1
|
||||
|
||||
loop1:
|
||||
%outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ]
|
||||
%0 = add i32 %denom, -1
|
||||
%1 = icmp ult i32 %0, %numer
|
||||
br i1 %1, label %loop2.preheader, label %loop2.exit
|
||||
|
||||
; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and
|
||||
; whatever label comes next.
|
||||
loop2.preheader:
|
||||
; CHECK-LABEL: loop2.preheader:
|
||||
; CHECK: udiv
|
||||
; CHECK-LABEL: :
|
||||
%lim = udiv i32 %numer, %denom
|
||||
%2 = zext i32 %lim to i64
|
||||
br label %loop2
|
||||
|
||||
loop2:
|
||||
%indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2
|
||||
store i32 1, i32* %arrayidx, align 4
|
||||
%indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1
|
||||
%cmp2 = icmp ult i64 %indvar.loop2.next, %2
|
||||
br i1 %cmp2, label %loop2, label %loop2.exit
|
||||
|
||||
loop2.exit:
|
||||
%inc1 = add nuw i32 %outer_i, 1
|
||||
%exitcond = icmp eq i32 %inc1, %outer_lim
|
||||
br i1 %exitcond, label %exit, label %loop1
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue