[SCEV] Fix the movement of insertion point in expander. PR35406.

We cannot move the insertion point to header if SCEV contains div/rem
operations due to they may go over check for zero denominator.

Reviewers: sanjoy, mkazantsev, sebpop
Reviewed By: sebpop
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D41229

llvm-svn: 320789
This commit is contained in:
Serguei Katkov 2017-12-15 05:24:42 +00:00
parent c41e2f6e7b
commit 67da7696a0
2 changed files with 107 additions and 1 deletions

View File

@ -1732,10 +1732,28 @@ Value *SCEVExpander::expand(const SCEV *S) {
InsertPt = &*L->getHeader()->getFirstInsertionPt(); InsertPt = &*L->getHeader()->getFirstInsertionPt();
} }
} else { } else {
// We can move insertion point only if there is no div or rem operations
// otherwise we are risky to move it over the check for zero denominator.
auto SafeToHoist = [](const SCEV *S) {
return !SCEVExprContains(S, [](const SCEV *S) {
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
// Division by non-zero constants can be hoisted.
return SC->getValue()->isZero();
// All other divisions should not be moved as they may be
// divisions by zero and should be kept within the
// conditions of the surrounding loops that guard their
// execution (see PR35406).
return true;
}
return false;
});
};
// If the SCEV is computable at this level, insert it into the header // If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted // after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop. // there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
SafeToHoist(S))
InsertPt = &*L->getHeader()->getFirstInsertionPt(); InsertPt = &*L->getHeader()->getFirstInsertionPt();
while (InsertPt->getIterator() != Builder.GetInsertPoint() && while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
(isInsertedInstruction(InsertPt) || (isInsertedInstruction(InsertPt) ||

View File

@ -0,0 +1,88 @@
; RUN: opt -S -indvars %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
define i32 @testDiv(i8* %p, i64* %p1) {
; CHECK-LABEL: @testDiv
entry:
br label %loop1
loop1:
%local_0_ = phi i32 [ 8, %entry ], [ %9, %loop2.exit ]
%local_2_ = phi i32 [ 63864, %entry ], [ %local_2_43, %loop2.exit ]
%local_3_ = phi i32 [ 51, %entry ], [ %local_3_44, %loop2.exit ]
; CHECK-NOT: udiv
%0 = udiv i32 14, %local_0_
%1 = icmp ugt i32 %local_0_, 14
br i1 %1, label %exit, label %general_case24
; CHECK-LABEL: general_case24
general_case24:
%2 = udiv i32 60392, %0
br i1 false, label %loop2, label %loop2.exit
loop2:
%local_1_56 = phi i32 [ %2, %general_case24 ], [ %3, %loop2 ]
%local_2_57 = phi i32 [ 1, %general_case24 ], [ %7, %loop2 ]
%3 = add i32 %local_1_56, -1
%4 = load atomic i64, i64* %p1 unordered, align 8
%5 = sext i32 %3 to i64
%6 = sub i64 %4, %5
store atomic i64 %6, i64* %p1 unordered, align 8
%7 = add nuw nsw i32 %local_2_57, 1
%8 = icmp ugt i32 %local_2_57, 7
br i1 %8, label %loop2.exit, label %loop2
loop2.exit:
%local_2_43 = phi i32 [ %local_2_, %general_case24 ], [ 9, %loop2 ]
%local_3_44 = phi i32 [ %local_3_, %general_case24 ], [ %local_1_56, %loop2 ]
%9 = add nuw nsw i32 %local_0_, 1
%10 = icmp ugt i32 %local_0_, 129
br i1 %10, label %exit, label %loop1
exit:
ret i32 0
}
define i32 @testRem(i8* %p, i64* %p1) {
; CHECK-LABEL: @testRem
entry:
br label %loop1
loop1:
%local_0_ = phi i32 [ 8, %entry ], [ %9, %loop2.exit ]
%local_2_ = phi i32 [ 63864, %entry ], [ %local_2_43, %loop2.exit ]
%local_3_ = phi i32 [ 51, %entry ], [ %local_3_44, %loop2.exit ]
; CHECK: udiv
; CHECK-NOT: udiv
%0 = udiv i32 14, %local_0_
%1 = icmp ugt i32 %local_0_, 14
br i1 %1, label %exit, label %general_case24
; CHECK-LABEL: general_case24
general_case24:
%2 = urem i32 60392, %0
br i1 false, label %loop2, label %loop2.exit
loop2:
%local_1_56 = phi i32 [ %2, %general_case24 ], [ %3, %loop2 ]
%local_2_57 = phi i32 [ 1, %general_case24 ], [ %7, %loop2 ]
%3 = add i32 %local_1_56, -1
%4 = load atomic i64, i64* %p1 unordered, align 8
%5 = sext i32 %3 to i64
%6 = sub i64 %4, %5
store atomic i64 %6, i64* %p1 unordered, align 8
%7 = add nuw nsw i32 %local_2_57, 1
%8 = icmp ugt i32 %local_2_57, 7
br i1 %8, label %loop2.exit, label %loop2
loop2.exit:
%local_2_43 = phi i32 [ %local_2_, %general_case24 ], [ 9, %loop2 ]
%local_3_44 = phi i32 [ %local_3_, %general_case24 ], [ %local_1_56, %loop2 ]
%9 = add nuw nsw i32 %local_0_, 1
%10 = icmp ugt i32 %local_0_, 129
br i1 %10, label %exit, label %loop1
exit:
ret i32 0
}