[SCEVExpander] Only create trunc when needed.

9345ab3a45 updated generateOverflowCheck to skip creating checks that
always evaluate to false. This in turn means that we only need to
create TruncTripCount if it is actually used.

Sink the TruncTripCount creating into ComputeEndCheck, so it is only
created when there's an actual check.
This commit is contained in:
Florian Hahn 2022-01-10 11:31:26 +00:00
parent 3a094d8b27
commit aecad5828e
No known key found for this signature in database
GPG Key ID: EEF712BB5E80EBBA
4 changed files with 3 additions and 9 deletions

View File

@ -2490,9 +2490,6 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
// Get the backedge taken count and truncate or extended to the AR type.
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
// Compute |Step| * Backedge
// Compute:
// 1. Start + |Step| * Backedge < Start
@ -2506,6 +2503,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
return ConstantInt::getFalse(Loc->getContext());
// Get the backedge taken count and truncate or extended to the AR type.
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
Value *MulV, *OfMul;
if (Step->isOne()) {
// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't

View File

@ -13,7 +13,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
; CHECK: for.body.lver.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
@ -149,7 +148,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
; CHECK: for.body.lver.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])

View File

@ -19,7 +19,6 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
@ -104,7 +103,6 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
@ -268,7 +266,6 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]

View File

@ -30,7 +30,6 @@ define void @f1(i16* noalias %a,
; LV-NEXT: for.body.lver.check:
; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])