forked from OSchip/llvm-project
[SCEV] Fix the movement of insertion point in expander. PR35406.
We cannot move the insertion point to header if SCEV contains div/rem operations due to they may go over check for zero denominator. Reviewers: sanjoy, mkazantsev, sebpop Reviewed By: sebpop Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D41229 llvm-svn: 320789
This commit is contained in:
parent
c41e2f6e7b
commit
67da7696a0
|
@ -1732,10 +1732,28 @@ Value *SCEVExpander::expand(const SCEV *S) {
|
|||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||
}
|
||||
} else {
|
||||
// We can move insertion point only if there is no div or rem operations
|
||||
// otherwise we are risky to move it over the check for zero denominator.
|
||||
auto SafeToHoist = [](const SCEV *S) {
|
||||
return !SCEVExprContains(S, [](const SCEV *S) {
|
||||
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
|
||||
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
|
||||
// Division by non-zero constants can be hoisted.
|
||||
return SC->getValue()->isZero();
|
||||
// All other divisions should not be moved as they may be
|
||||
// divisions by zero and should be kept within the
|
||||
// conditions of the surrounding loops that guard their
|
||||
// execution (see PR35406).
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
};
|
||||
// If the SCEV is computable at this level, insert it into the header
|
||||
// after the PHIs (and after any other instructions that we've inserted
|
||||
// there) so that it is guaranteed to dominate any user inside the loop.
|
||||
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
|
||||
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
|
||||
SafeToHoist(S))
|
||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
|
||||
(isInsertedInstruction(InsertPt) ||
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
; RUN: opt -S -indvars %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @testDiv(i8* %p, i64* %p1) {
|
||||
; CHECK-LABEL: @testDiv
|
||||
entry:
|
||||
br label %loop1
|
||||
|
||||
loop1:
|
||||
%local_0_ = phi i32 [ 8, %entry ], [ %9, %loop2.exit ]
|
||||
%local_2_ = phi i32 [ 63864, %entry ], [ %local_2_43, %loop2.exit ]
|
||||
%local_3_ = phi i32 [ 51, %entry ], [ %local_3_44, %loop2.exit ]
|
||||
; CHECK-NOT: udiv
|
||||
%0 = udiv i32 14, %local_0_
|
||||
%1 = icmp ugt i32 %local_0_, 14
|
||||
br i1 %1, label %exit, label %general_case24
|
||||
|
||||
; CHECK-LABEL: general_case24
|
||||
general_case24:
|
||||
%2 = udiv i32 60392, %0
|
||||
br i1 false, label %loop2, label %loop2.exit
|
||||
|
||||
loop2:
|
||||
%local_1_56 = phi i32 [ %2, %general_case24 ], [ %3, %loop2 ]
|
||||
%local_2_57 = phi i32 [ 1, %general_case24 ], [ %7, %loop2 ]
|
||||
%3 = add i32 %local_1_56, -1
|
||||
%4 = load atomic i64, i64* %p1 unordered, align 8
|
||||
%5 = sext i32 %3 to i64
|
||||
%6 = sub i64 %4, %5
|
||||
store atomic i64 %6, i64* %p1 unordered, align 8
|
||||
%7 = add nuw nsw i32 %local_2_57, 1
|
||||
%8 = icmp ugt i32 %local_2_57, 7
|
||||
br i1 %8, label %loop2.exit, label %loop2
|
||||
|
||||
loop2.exit:
|
||||
%local_2_43 = phi i32 [ %local_2_, %general_case24 ], [ 9, %loop2 ]
|
||||
%local_3_44 = phi i32 [ %local_3_, %general_case24 ], [ %local_1_56, %loop2 ]
|
||||
%9 = add nuw nsw i32 %local_0_, 1
|
||||
%10 = icmp ugt i32 %local_0_, 129
|
||||
br i1 %10, label %exit, label %loop1
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @testRem(i8* %p, i64* %p1) {
|
||||
; CHECK-LABEL: @testRem
|
||||
entry:
|
||||
br label %loop1
|
||||
|
||||
loop1:
|
||||
%local_0_ = phi i32 [ 8, %entry ], [ %9, %loop2.exit ]
|
||||
%local_2_ = phi i32 [ 63864, %entry ], [ %local_2_43, %loop2.exit ]
|
||||
%local_3_ = phi i32 [ 51, %entry ], [ %local_3_44, %loop2.exit ]
|
||||
; CHECK: udiv
|
||||
; CHECK-NOT: udiv
|
||||
%0 = udiv i32 14, %local_0_
|
||||
%1 = icmp ugt i32 %local_0_, 14
|
||||
br i1 %1, label %exit, label %general_case24
|
||||
|
||||
; CHECK-LABEL: general_case24
|
||||
general_case24:
|
||||
%2 = urem i32 60392, %0
|
||||
br i1 false, label %loop2, label %loop2.exit
|
||||
|
||||
loop2:
|
||||
%local_1_56 = phi i32 [ %2, %general_case24 ], [ %3, %loop2 ]
|
||||
%local_2_57 = phi i32 [ 1, %general_case24 ], [ %7, %loop2 ]
|
||||
%3 = add i32 %local_1_56, -1
|
||||
%4 = load atomic i64, i64* %p1 unordered, align 8
|
||||
%5 = sext i32 %3 to i64
|
||||
%6 = sub i64 %4, %5
|
||||
store atomic i64 %6, i64* %p1 unordered, align 8
|
||||
%7 = add nuw nsw i32 %local_2_57, 1
|
||||
%8 = icmp ugt i32 %local_2_57, 7
|
||||
br i1 %8, label %loop2.exit, label %loop2
|
||||
|
||||
loop2.exit:
|
||||
%local_2_43 = phi i32 [ %local_2_, %general_case24 ], [ 9, %loop2 ]
|
||||
%local_3_44 = phi i32 [ %local_3_, %general_case24 ], [ %local_1_56, %loop2 ]
|
||||
%9 = add nuw nsw i32 %local_0_, 1
|
||||
%10 = icmp ugt i32 %local_0_, 129
|
||||
br i1 %10, label %exit, label %loop1
|
||||
|
||||
exit:
|
||||
ret i32 0
|
||||
}
|
Loading…
Reference in New Issue