[SCEV] Constant fold MultExpr before applying depth limit.

Summary:
Users of SCEV reasonably assume that multiplication of two constant
SCEVs will in turn be constant.
However, that is not always the case:
First, we can get here with reached depth limit, and will create
MultExpr SCEV `C1 * C2` and cache it.
Then, we can get here with the same operands, but with small depth
level. But this time we will find existing MultExpr SCEV and return
it, instead of expected constant SCEV.

This patch changes getMultExpr to not apply depth limit to all constant
operands expression, allowing them to be folded.

Reviewers: reames, mkazantsev

Subscribers: hiraditya, javed.absar, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79893
This commit is contained in:
Denis Antrushin 2020-05-13 20:55:07 +03:00
parent 2419dce5d1
commit 5451289aba
3 changed files with 86 additions and 2 deletions

View File

@ -2931,8 +2931,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// Limit recursion calls depth.
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
// Limit recursion calls depth, but fold all-constant expressions.
// `Ops` is sorted, so it's enough to check just last one.
if ((Depth > MaxArithDepth || hasHugeExpression(Ops)) &&
!isa<SCEVConstant>(Ops.back()))
return getOrCreateMulExpr(Ops, Flags);
if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {

View File

@ -0,0 +1,68 @@
; RUN: opt -passes 'strength-reduce' -scalar-evolution-max-arith-depth=2 -S < %s | FileCheck %s
; RUN: opt -loop-reduce -scalar-evolution-max-arith-depth=2 -S < %s | FileCheck %s
; This test should just compile cleanly without assertions.
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
define void @test(i32 %A, i32 %B, i32 %C) {
; CHECK-LABEL: @test(
; CHECK: inner_loop:
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32
; CHECK: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 3
; CHECK-NEXT: [[LSR_IV_NEXT2:%.*]] = add i32 [[LSR_IV1]], 3
; CHECK-NEXT: [[LSR_IV_NEXT4:%.*]] = add i32 [[LSR_IV3]], -3
;
entry:
br label %outer_loop
outer_loop:
%phi2 = phi i32 [ %A, %entry ], [ 204, %outer_tail ]
%phi3 = phi i32 [ %A, %entry ], [ 243, %outer_tail ]
%phi4 = phi i32 [ %B, %entry ], [ %i35, %outer_tail ]
br label %guard
guard:
%lcmp.mod = icmp eq i32 %C, 0
br i1 %lcmp.mod, label %outer_tail, label %preheader
preheader:
%i15 = shl i32 %B, 1
br label %inner_loop
inner_loop:
%phi5 = phi i32 [ %phi3, %preheader ], [ %i30, %inner_loop ]
%phi6 = phi i32 [ %phi2, %preheader ], [ %i33, %inner_loop ]
%iter = phi i32 [ %C, %preheader ], [ %iter.sub, %inner_loop ]
%i17 = sub i32 %phi4, %phi6
%i18 = sub i32 14, %phi5
%i19 = mul i32 %i18, %C
%factor.prol = shl i32 %phi5, 1
%i20 = add i32 %i17, %factor.prol
%i21 = add i32 %i20, %B
%i22 = add i32 %i21, %i19
%i23 = sub i32 14, %i22
%i24 = mul i32 %i23, %C
%factor.1.prol = shl i32 %i22, 1
%i25 = add i32 %i17, %factor.1.prol
%i27 = add i32 %i25, %i24
%i29 = mul i32 %i25, %C
%factor.2.prol = shl i32 %i27, 1
%i30 = add i32 %i17, %factor.2.prol
%i33 = add nsw i32 %phi6, -3
%iter.sub = add i32 %iter, -1
%iter.cmp = icmp eq i32 %iter.sub, 0
br i1 %iter.cmp, label %outer_tail, label %inner_loop
outer_tail:
%phi7 = phi i32 [ %phi2, %guard ], [ %i33, %inner_loop ]
%i35 = sub i32 %A, %phi7
%cmp = icmp sgt i32 %i35, 9876
br i1 %cmp, label %exit, label %outer_loop
exit:
ret void
}

View File

@ -126,3 +126,17 @@ exit:
%trunc2 = trunc i64 %iv2.inc to i32
ret void
}
; Check that all constant SCEVs are folded regardless depth limit.
define void @test_mul_const(i32 %a) {
; CHECK-LABEL: @test_mul_const
; CHECK: %test3 = mul i32 %test2, 3
; CHECK-NEXT: --> (9 + (3 * (3 * %a)))
; CHECK: %test4 = mul i32 3, 3
; CHECK-NEXT: --> 9 U: [9,10) S: [9,10)
%test = mul i32 3, %a
%test2 = add i32 3, %test
%test3 = mul i32 %test2, 3
%test4 = mul i32 3, 3
ret void
}