[SCEV] Strength nowrap flags after constant folding

We should first try to constant fold the add expression and only
strengthen nowrap flags afterwards. This allows us to determine
stronger flags if e.g. only two operands are left after constant
folding (and thus "guaranteed no wrap region" code applies) or the
resulting operands are non-negative and thus nsw->nuw strengthening
applies.
This commit is contained in:
Nikita Popov 2020-10-25 17:13:38 +01:00
parent c5718253c9
commit 0dda633317
9 changed files with 19 additions and 24 deletions

View File

@ -2178,8 +2178,6 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
@ -2202,6 +2200,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 1) return Ops[0];
}
Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
// Limit recursion calls depth.
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateAddExpr(Ops, Flags);

View File

@ -358,9 +358,9 @@ define i64 @sext_like_noop(i32 %n) {
; X32-LABEL: 'sext_like_noop'
; X32-NEXT: Classifying expressions for: @sext_like_noop
; X32-NEXT: %ii = sext i32 %i to i64
; X32-NEXT: --> (sext i32 {1,+,1}<nuw><%for.body> to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) --> (sext i32 (-1 + ptrtoint (i64 (i32)* @sext_like_noop to i32)) to i64) U: [-1,65535) S: [-65537,65535)
; X32-NEXT: --> (sext i32 {1,+,1}<nuw><%for.body> to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) --> (-1 + (sext i32 ptrtoint (i64 (i32)* @sext_like_noop to i32) to i64))<nsw> U: [-1,65535) S: [-65537,65535)
; X32-NEXT: %div = sdiv i64 55555, %ii
; X32-NEXT: --> %div U: full-set S: full-set --> sdiv (i64 55555, i64 sext (i32 add (i32 ptrtoint (i64 (i32)* @sext_like_noop to i32), i32 -1) to i64)) U: full-set S: full-set
; X32-NEXT: --> %div U: full-set S: full-set --> sdiv (i64 55555, i64 add (i64 sext (i32 ptrtoint (i64 (i32)* @sext_like_noop to i32) to i64), i64 -1)) U: full-set S: full-set
; X32-NEXT: %i = phi i32 [ %inc, %for.body ], [ 1, %entry ]
; X32-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,0) S: [1,0) Exits: (-1 + ptrtoint (i64 (i32)* @sext_like_noop to i32))<nsw> LoopDispositions: { %for.body: Computable }
; X32-NEXT: %inc = add nuw i32 %i, 1

View File

@ -10,7 +10,7 @@ define void @umin_unsigned_check(i64 %n) {
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4098) S: [0,4098) Exits: (1 + (4096 umin %n))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4099) S: [1,4099) Exits: (2 + (4096 umin %n)) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4099) S: [1,4099) Exits: (2 + (4096 umin %n))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umin_unsigned_check
; CHECK-NEXT: Loop %loop: backedge-taken count is (1 + (4096 umin %n))<nuw><nsw>
; CHECK-NEXT: Loop %loop: max backedge-taken count is 4097
@ -40,7 +40,7 @@ define void @umin_signed_check(i64 %n) {
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4098) S: [0,4098) Exits: (1 + (4096 umin %n))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4099) S: [1,4099) Exits: (2 + (4096 umin %n)) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4099) S: [1,4099) Exits: (2 + (4096 umin %n))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umin_signed_check
; CHECK-NEXT: Loop %loop: backedge-taken count is (1 + (4096 umin %n))<nuw><nsw>
; CHECK-NEXT: Loop %loop: max backedge-taken count is 4097

View File

@ -204,9 +204,9 @@ exit:
define void @nsw_start1_step2(i4 %n) {
; CHECK-LABEL: 'nsw_start1_step2'
; CHECK-NEXT: Determining loop execution counts for: @nsw_start1_step2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (3 smax %n)) /u 2)
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (3 smax %n))<nsw> /u 2)
; CHECK-NEXT: Loop %loop: max backedge-taken count is 2
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2 + (3 smax %n)) /u 2)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2 + (3 smax %n))<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK: Loop %loop: Trip multiple is 1
;
@ -333,9 +333,9 @@ exit:
define void @even_start1_step2(i4 %n) {
; CHECK-LABEL: 'even_start1_step2'
; CHECK-NEXT: Determining loop execution counts for: @even_start1_step2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n)))<nsw> /u 2)
; CHECK-NEXT: Loop %loop: max backedge-taken count is 2
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2 + (3 smax (2 * %n)))<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK: Loop %loop: Trip multiple is 1
;
@ -465,9 +465,9 @@ exit:
define void @even_nsw_start1_step2(i4 %n) {
; CHECK-LABEL: 'even_nsw_start1_step2'
; CHECK-NEXT: Determining loop execution counts for: @even_nsw_start1_step2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n)))<nsw> /u 2)
; CHECK-NEXT: Loop %loop: max backedge-taken count is 2
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2 + (3 smax (2 * %n)))<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK: Loop %loop: Trip multiple is 1
;

View File

@ -474,7 +474,7 @@ define void @test_10(i32 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

View File

@ -79,11 +79,9 @@ define i32 @testRem(i8* %p, i64* %p1) {
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop1:
; CHECK-NEXT: [[LOCAL_0_:%.*]] = phi i32 [ 8, [[ENTRY:%.*]] ], [ [[I9:%.*]], [[LOOP2_EXIT:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = udiv i32 14, [[LOCAL_0_]]
; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i32 [[LOCAL_0_]], 15
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[EXIT:%.*]], label [[GENERAL_CASE24:%.*]]
; CHECK: general_case24:
; CHECK-NEXT: [[I2:%.*]] = urem i32 60392, [[I]]
; CHECK-NEXT: br i1 false, label [[LOOP2_PREHEADER:%.*]], label [[LOOP2_EXIT]]
; CHECK: loop2.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 14, [[LOCAL_0_]]
@ -95,15 +93,12 @@ define i32 @testRem(i8* %p, i64* %p1) {
; CHECK-NEXT: br label [[LOOP2:%.*]]
; CHECK: loop2:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP5]], [[LOOP2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP2]] ]
; CHECK-NEXT: [[LOCAL_1_56:%.*]] = phi i32 [ [[I3:%.*]], [[LOOP2]] ], [ [[I2]], [[LOOP2_PREHEADER]] ]
; CHECK-NEXT: [[LOCAL_2_57:%.*]] = phi i32 [ [[I7:%.*]], [[LOOP2]] ], [ 1, [[LOOP2_PREHEADER]] ]
; CHECK-NEXT: [[I3]] = add i32 [[LOCAL_1_56]], -1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[I4:%.*]] = load atomic i64, i64* [[P1:%.*]] unordered, align 8
; CHECK-NEXT: [[I5:%.*]] = sext i32 [[I3]] to i64
; CHECK-NEXT: [[I6:%.*]] = sub i64 [[I4]], [[I5]]
; CHECK-NEXT: [[I6:%.*]] = sub i64 [[I4]], [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store atomic i64 [[I6]], i64* [[P1]] unordered, align 8
; CHECK-NEXT: [[I7]] = add nuw nsw i32 [[LOCAL_2_57]], 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I7]], 9
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP2_EXIT_LOOPEXIT:%.*]], label [[LOOP2]]
; CHECK: loop2.exit.loopexit:

View File

@ -577,7 +577,7 @@ define void @func_17(i32* %len.ptr) {
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LEN]], 0
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LEN]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -5
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[SMAX]], -5
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_INC:%.*]], [[BE:%.*]] ], [ -6, [[LOOP_PREHEADER]] ]

View File

@ -53,7 +53,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: %1 = sub i32 %0, %m
; CHECK: %2 = lshr i32 %1, 2
; CHECK: %3 = shl nuw i32 %2, 2
; CHECK: %4 = add i32 %3, 3
; CHECK: %4 = add nuw nsw i32 %3, 3
; CHECK: br label %for.body
; CHECK: for.body:
@ -132,7 +132,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: %1 = sub i32 %0, %rem
; CHECK: %2 = lshr i32 %1, 2
; CHECK: %3 = shl nuw i32 %2, 2
; CHECK: %4 = add i32 %3, 3
; CHECK: %4 = add nuw nsw i32 %3, 3
; CHECK: br label %for.body
; CHECK: for.body:

View File

@ -841,7 +841,7 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
; CHECK-NEXT: call void @init(i32* [[BASE]])
; CHECK-NEXT: [[MIN_CMP:%.*]] = icmp ult i64 4096, [[N:%.*]]
; CHECK-NEXT: [[MIN_N:%.*]] = select i1 [[MIN_CMP]], i64 4096, i64 [[N]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[MIN_N]], 2
; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[MIN_N]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph: