forked from OSchip/llvm-project
[SCEV] Fix the movement of insertion point in expander. PR35406.
We cannot move the insertion point to header if SCEV contains div/rem operations due to they may go over check for zero denominator. Reviewers: sanjoy, mkazantsev, sebpop Reviewed By: sebpop Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D41229 llvm-svn: 320789
This commit is contained in:
parent
c41e2f6e7b
commit
67da7696a0
|
@ -1732,10 +1732,28 @@ Value *SCEVExpander::expand(const SCEV *S) {
|
||||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// We can move insertion point only if there is no div or rem operations
|
||||||
|
// otherwise we are risky to move it over the check for zero denominator.
|
||||||
|
auto SafeToHoist = [](const SCEV *S) {
|
||||||
|
return !SCEVExprContains(S, [](const SCEV *S) {
|
||||||
|
if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
|
||||||
|
if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
|
||||||
|
// Division by non-zero constants can be hoisted.
|
||||||
|
return SC->getValue()->isZero();
|
||||||
|
// All other divisions should not be moved as they may be
|
||||||
|
// divisions by zero and should be kept within the
|
||||||
|
// conditions of the surrounding loops that guard their
|
||||||
|
// execution (see PR35406).
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
};
|
||||||
// If the SCEV is computable at this level, insert it into the header
|
// If the SCEV is computable at this level, insert it into the header
|
||||||
// after the PHIs (and after any other instructions that we've inserted
|
// after the PHIs (and after any other instructions that we've inserted
|
||||||
// there) so that it is guaranteed to dominate any user inside the loop.
|
// there) so that it is guaranteed to dominate any user inside the loop.
|
||||||
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
|
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
|
||||||
|
SafeToHoist(S))
|
||||||
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
InsertPt = &*L->getHeader()->getFirstInsertionPt();
|
||||||
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
|
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
|
||||||
(isInsertedInstruction(InsertPt) ||
|
(isInsertedInstruction(InsertPt) ||
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
; RUN: opt -S -indvars %s | FileCheck %s
|
||||||
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define i32 @testDiv(i8* %p, i64* %p1) {
|
||||||
|
; CHECK-LABEL: @testDiv
|
||||||
|
entry:
|
||||||
|
br label %loop1
|
||||||
|
|
||||||
|
loop1:
|
||||||
|
%local_0_ = phi i32 [ 8, %entry ], [ %9, %loop2.exit ]
|
||||||
|
%local_2_ = phi i32 [ 63864, %entry ], [ %local_2_43, %loop2.exit ]
|
||||||
|
%local_3_ = phi i32 [ 51, %entry ], [ %local_3_44, %loop2.exit ]
|
||||||
|
; CHECK-NOT: udiv
|
||||||
|
%0 = udiv i32 14, %local_0_
|
||||||
|
%1 = icmp ugt i32 %local_0_, 14
|
||||||
|
br i1 %1, label %exit, label %general_case24
|
||||||
|
|
||||||
|
; CHECK-LABEL: general_case24
|
||||||
|
general_case24:
|
||||||
|
%2 = udiv i32 60392, %0
|
||||||
|
br i1 false, label %loop2, label %loop2.exit
|
||||||
|
|
||||||
|
loop2:
|
||||||
|
%local_1_56 = phi i32 [ %2, %general_case24 ], [ %3, %loop2 ]
|
||||||
|
%local_2_57 = phi i32 [ 1, %general_case24 ], [ %7, %loop2 ]
|
||||||
|
%3 = add i32 %local_1_56, -1
|
||||||
|
%4 = load atomic i64, i64* %p1 unordered, align 8
|
||||||
|
%5 = sext i32 %3 to i64
|
||||||
|
%6 = sub i64 %4, %5
|
||||||
|
store atomic i64 %6, i64* %p1 unordered, align 8
|
||||||
|
%7 = add nuw nsw i32 %local_2_57, 1
|
||||||
|
%8 = icmp ugt i32 %local_2_57, 7
|
||||||
|
br i1 %8, label %loop2.exit, label %loop2
|
||||||
|
|
||||||
|
loop2.exit:
|
||||||
|
%local_2_43 = phi i32 [ %local_2_, %general_case24 ], [ 9, %loop2 ]
|
||||||
|
%local_3_44 = phi i32 [ %local_3_, %general_case24 ], [ %local_1_56, %loop2 ]
|
||||||
|
%9 = add nuw nsw i32 %local_0_, 1
|
||||||
|
%10 = icmp ugt i32 %local_0_, 129
|
||||||
|
br i1 %10, label %exit, label %loop1
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @testRem(i8* %p, i64* %p1) {
|
||||||
|
; CHECK-LABEL: @testRem
|
||||||
|
entry:
|
||||||
|
br label %loop1
|
||||||
|
|
||||||
|
loop1:
|
||||||
|
%local_0_ = phi i32 [ 8, %entry ], [ %9, %loop2.exit ]
|
||||||
|
%local_2_ = phi i32 [ 63864, %entry ], [ %local_2_43, %loop2.exit ]
|
||||||
|
%local_3_ = phi i32 [ 51, %entry ], [ %local_3_44, %loop2.exit ]
|
||||||
|
; CHECK: udiv
|
||||||
|
; CHECK-NOT: udiv
|
||||||
|
%0 = udiv i32 14, %local_0_
|
||||||
|
%1 = icmp ugt i32 %local_0_, 14
|
||||||
|
br i1 %1, label %exit, label %general_case24
|
||||||
|
|
||||||
|
; CHECK-LABEL: general_case24
|
||||||
|
general_case24:
|
||||||
|
%2 = urem i32 60392, %0
|
||||||
|
br i1 false, label %loop2, label %loop2.exit
|
||||||
|
|
||||||
|
loop2:
|
||||||
|
%local_1_56 = phi i32 [ %2, %general_case24 ], [ %3, %loop2 ]
|
||||||
|
%local_2_57 = phi i32 [ 1, %general_case24 ], [ %7, %loop2 ]
|
||||||
|
%3 = add i32 %local_1_56, -1
|
||||||
|
%4 = load atomic i64, i64* %p1 unordered, align 8
|
||||||
|
%5 = sext i32 %3 to i64
|
||||||
|
%6 = sub i64 %4, %5
|
||||||
|
store atomic i64 %6, i64* %p1 unordered, align 8
|
||||||
|
%7 = add nuw nsw i32 %local_2_57, 1
|
||||||
|
%8 = icmp ugt i32 %local_2_57, 7
|
||||||
|
br i1 %8, label %loop2.exit, label %loop2
|
||||||
|
|
||||||
|
loop2.exit:
|
||||||
|
%local_2_43 = phi i32 [ %local_2_, %general_case24 ], [ 9, %loop2 ]
|
||||||
|
%local_3_44 = phi i32 [ %local_3_, %general_case24 ], [ %local_1_56, %loop2 ]
|
||||||
|
%9 = add nuw nsw i32 %local_0_, 1
|
||||||
|
%10 = icmp ugt i32 %local_0_, 129
|
||||||
|
br i1 %10, label %exit, label %loop1
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret i32 0
|
||||||
|
}
|
Loading…
Reference in New Issue