forked from OSchip/llvm-project
[SCEVExpander] Only create trunc when needed.
9345ab3a45
updated generateOverflowCheck to skip creating checks that
always evaluate to false. This in turn means that we only need to
create TruncTripCount if it is actually used.
Sink the TruncTripCount creating into ComputeEndCheck, so it is only
created when there's an actual check.
This commit is contained in:
parent
3a094d8b27
commit
aecad5828e
|
@ -2490,9 +2490,6 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
|
|||
Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
|
||||
Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
|
||||
|
||||
// Get the backedge taken count and truncate or extended to the AR type.
|
||||
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
|
||||
|
||||
// Compute |Step| * Backedge
|
||||
// Compute:
|
||||
// 1. Start + |Step| * Backedge < Start
|
||||
|
@ -2506,6 +2503,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
|
|||
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
|
||||
return ConstantInt::getFalse(Loc->getContext());
|
||||
|
||||
// Get the backedge taken count and truncate or extended to the AR type.
|
||||
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
|
||||
|
||||
Value *MulV, *OfMul;
|
||||
if (Step->isOne()) {
|
||||
// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
|
||||
|
|
|
@ -13,7 +13,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
|
|||
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
|
||||
; CHECK: for.body.lver.check:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
|
||||
|
@ -149,7 +148,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
|
|||
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
|
||||
; CHECK: for.body.lver.check:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
|
||||
|
|
|
@ -19,7 +19,6 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
|
|||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
|
@ -104,7 +103,6 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
|
|||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
|
@ -268,7 +266,6 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
|
|||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
||||
|
|
|
@ -30,7 +30,6 @@ define void @f1(i16* noalias %a,
|
|||
; LV-NEXT: for.body.lver.check:
|
||||
; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
|
||||
; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
|
||||
; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
|
||||
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
|
||||
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
|
||||
|
|
Loading…
Reference in New Issue