[SCEV] Constant fold MultExpr before applying depth limit.

Summary: Users of SCEV reasonably assume that multiplication of two constant SCEVs will in turn be constant. However, that is not always the case: First, we can get here with reached depth limit, and will create MultExpr SCEV `C1 * C2` and cache it. Then, we can get here with the same operands, but with small depth level. But this time we will find existing MultExpr SCEV and return it, instead of expected constant SCEV. This patch changes getMultExpr to not apply depth limit to all constant operands expression, allowing them to be folded. Reviewers: reames, mkazantsev Subscribers: hiraditya, javed.absar, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D79893
2020-05-13 20:55:07 +03:00 · 2020-05-13 20:55:07 +03:00 · 5451289aba
parent 2419dce5d1
commit 5451289aba
3 changed files with 86 additions and 2 deletions
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@ -2931,8 +2931,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,

  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);

-  // Limit recursion calls depth.
-  if (Depth > MaxArithDepth || hasHugeExpression(Ops))
+  // Limit recursion calls depth, but fold all-constant expressions.
+  // `Ops` is sorted, so it's enough to check just last one.
+  if ((Depth > MaxArithDepth || hasHugeExpression(Ops)) &&
+      !isa<SCEVConstant>(Ops.back()))
    return getOrCreateMulExpr(Ops, Flags);

  if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
--- a/llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll
+++ b/llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll
@ -0,0 +1,68 @@
+; RUN: opt -passes 'strength-reduce' -scalar-evolution-max-arith-depth=2 -S < %s | FileCheck %s
+; RUN: opt -loop-reduce -scalar-evolution-max-arith-depth=2 -S < %s | FileCheck %s
+
+; This test should just compile cleanly without assertions.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+
+define void @test(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test(
+; CHECK:       inner_loop:
+; CHECK-NEXT:    [[LSR_IV3:%.*]] = phi i32
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i32
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32
+; CHECK:         [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 3
+; CHECK-NEXT:    [[LSR_IV_NEXT2:%.*]] = add i32 [[LSR_IV1]], 3
+; CHECK-NEXT:    [[LSR_IV_NEXT4:%.*]] = add i32 [[LSR_IV3]], -3
+;
+entry:
+  br label %outer_loop
+
+outer_loop:
+  %phi2 = phi i32 [ %A, %entry ], [ 204, %outer_tail ]
+  %phi3 = phi i32 [ %A, %entry ], [ 243, %outer_tail ]
+  %phi4 = phi i32 [ %B, %entry ], [ %i35, %outer_tail ]
+  br label %guard
+
+guard:
+  %lcmp.mod = icmp eq i32 %C, 0
+  br i1 %lcmp.mod, label %outer_tail, label %preheader
+
+preheader:
+  %i15 = shl i32 %B, 1
+  br label %inner_loop
+
+inner_loop:
+  %phi5 = phi i32 [ %phi3, %preheader ], [ %i30, %inner_loop ]
+  %phi6 = phi i32 [ %phi2, %preheader ], [ %i33, %inner_loop ]
+  %iter = phi i32 [ %C, %preheader ], [ %iter.sub, %inner_loop ]
+  %i17 = sub i32 %phi4, %phi6
+  %i18 = sub i32 14, %phi5
+  %i19 = mul i32 %i18, %C
+  %factor.prol = shl i32 %phi5, 1
+  %i20 = add i32 %i17, %factor.prol
+  %i21 = add i32 %i20, %B
+  %i22 = add i32 %i21, %i19
+  %i23 = sub i32 14, %i22
+  %i24 = mul i32 %i23, %C
+  %factor.1.prol = shl i32 %i22, 1
+  %i25 = add i32 %i17, %factor.1.prol
+  %i27 = add i32 %i25, %i24
+  %i29 = mul i32 %i25, %C
+  %factor.2.prol = shl i32 %i27, 1
+  %i30 = add i32 %i17, %factor.2.prol
+  %i33 = add nsw i32 %phi6, -3
+  %iter.sub = add i32 %iter, -1
+  %iter.cmp = icmp eq i32 %iter.sub, 0
+  br i1 %iter.cmp, label %outer_tail, label %inner_loop
+
+outer_tail:
+  %phi7 = phi i32 [ %phi2, %guard ], [ %i33, %inner_loop ]
+  %i35 = sub i32 %A, %phi7
+  %cmp = icmp sgt i32 %i35, 9876
+  br i1 %cmp, label %exit, label %outer_loop
+
+exit:
+  ret void
+
+}
--- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
+++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
@ -126,3 +126,17 @@ exit:
  %trunc2 = trunc i64 %iv2.inc to i32
  ret void
 }
+
+; Check that all constant SCEVs are folded regardless depth limit.
+define void @test_mul_const(i32 %a) {
+; CHECK-LABEL:  @test_mul_const
+; CHECK:          %test3 = mul i32 %test2, 3
+; CHECK-NEXT:     -->  (9 + (3 * (3 * %a)))
+; CHECK:          %test4 = mul i32 3, 3
+; CHECK-NEXT:     -->  9 U: [9,10) S: [9,10)
+  %test = mul i32 3, %a
+  %test2 = add i32 3, %test
+  %test3 = mul i32 %test2, 3
+  %test4 = mul i32 3, 3
+  ret void
+}