From faa857dba7426af47269fad1e5feba19050c2d58 Mon Sep 17 00:00:00 2001 From: Li Huang Date: Fri, 21 Oct 2016 20:05:21 +0000 Subject: [PATCH] [SCEV] Memoize visitMulExpr results in SCEVRewriteVisitor. Summary: When SCEVRewriteVisitor traverses the SCEV DAG, it may visit the same SCEV multiple times if this SCEV is referenced by multiple other SCEVs. This has exponential time complexity in the worst case. Memoizing the results will avoid re-visiting the same SCEV. Add a map to save the results, and override the visit function of SCEVVisitor. Now SCEVRewriteVisitor only visit each SCEV once and thus returns the same result for the same input SCEV. This patch fixes PR18606, PR18607. Reviewers: Sanjoy Das, Mehdi Amini, Michael Zolotukhin Differential Revision: https://reviews.llvm.org/D25810 llvm-svn: 284868 --- .../Analysis/ScalarEvolutionExpressions.h | 21 +++++- llvm/test/Analysis/ScalarEvolution/pr18606.ll | 67 +++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/ScalarEvolution/pr18606.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h index ff24cafbe680..9414c7bb5743 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -537,14 +537,33 @@ namespace llvm { T.visitAll(Root); } - /// Recursively visits a SCEV expression and re-writes it. + /// This visitor recursively visits a SCEV expression and re-writes it. + /// The result from each visit is cached, so it will return the same + /// SCEV for the same input. template class SCEVRewriteVisitor : public SCEVVisitor { protected: ScalarEvolution &SE; + // Memoize the result of each visit so that we only compute once for + // the same input SCEV. This is to avoid redundant computations when + // a SCEV is referenced by multiple SCEVs. Without memoization, this + // visit algorithm would have exponential time complexity in the worst + // case, causing the compiler to hang on certain tests. + DenseMap RewriteResults; + public: SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {} + const SCEV *visit(const SCEV *S) { + auto It = RewriteResults.find(S); + if (It != RewriteResults.end()) + return It->second; + auto *Result = SCEVVisitor::visit(S); + assert(RewriteResults.insert({S, Result}).second && + "Should insert a new entry"); + return Result; + } + const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; } diff --git a/llvm/test/Analysis/ScalarEvolution/pr18606.ll b/llvm/test/Analysis/ScalarEvolution/pr18606.ll new file mode 100644 index 000000000000..6154a0f7d424 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr18606.ll @@ -0,0 +1,67 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +; CHECK: @main +; CHECK: %mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ] +; CEHCK: %mul = mul nsw i32 %mul.lcssa5, %mul.lcssa5 +; CHECK: %mul.30 = mul nsw i32 %mul.29, %mul.29 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = local_unnamed_addr global i32 0, align 4 +@b = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: norecurse nounwind uwtable +define i32 @main() local_unnamed_addr { +entry: + %a.promoted4 = load i32, i32* @a, align 4 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.body3 + %mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ] + %i.03 = phi i32 [ 0, %entry ], [ %inc5, %for.body3 ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %mul.lcssa5, %mul.lcssa5 + %mul.1 = mul nsw i32 %mul, %mul + %mul.2 = mul nsw i32 %mul.1, %mul.1 + %mul.3 = mul nsw i32 %mul.2, %mul.2 + %mul.4 = mul nsw i32 %mul.3, %mul.3 + %mul.5 = mul nsw i32 %mul.4, %mul.4 + %mul.6 = mul nsw i32 %mul.5, %mul.5 + %mul.7 = mul nsw i32 %mul.6, %mul.6 + %mul.8 = mul nsw i32 %mul.7, %mul.7 + %mul.9 = mul nsw i32 %mul.8, %mul.8 + %mul.10 = mul nsw i32 %mul.9, %mul.9 + %mul.11 = mul nsw i32 %mul.10, %mul.10 + %mul.12 = mul nsw i32 %mul.11, %mul.11 + %mul.13 = mul nsw i32 %mul.12, %mul.12 + %mul.14 = mul nsw i32 %mul.13, %mul.13 + %mul.15 = mul nsw i32 %mul.14, %mul.14 + %mul.16 = mul nsw i32 %mul.15, %mul.15 + %mul.17 = mul nsw i32 %mul.16, %mul.16 + %mul.18 = mul nsw i32 %mul.17, %mul.17 + %mul.19 = mul nsw i32 %mul.18, %mul.18 + %mul.20 = mul nsw i32 %mul.19, %mul.19 + %mul.21 = mul nsw i32 %mul.20, %mul.20 + %mul.22 = mul nsw i32 %mul.21, %mul.21 + %mul.23 = mul nsw i32 %mul.22, %mul.22 + %mul.24 = mul nsw i32 %mul.23, %mul.23 + %mul.25 = mul nsw i32 %mul.24, %mul.24 + %mul.26 = mul nsw i32 %mul.25, %mul.25 + %mul.27 = mul nsw i32 %mul.26, %mul.26 + %mul.28 = mul nsw i32 %mul.27, %mul.27 + %mul.29 = mul nsw i32 %mul.28, %mul.28 + %mul.30 = mul nsw i32 %mul.29, %mul.29 + %inc5 = add nuw nsw i32 %i.03, 1 + %exitcond = icmp ne i32 %inc5, 10 + br i1 %exitcond, label %for.cond1.preheader, label %for.end6 + +for.end6: ; preds = %for.body3 + %mul.lcssa.lcssa = phi i32 [ %mul.30, %for.body3 ] + %inc.lcssa.lcssa = phi i32 [ 31, %for.body3 ] + store i32 %mul.lcssa.lcssa, i32* @a, align 4 + store i32 %inc.lcssa.lcssa, i32* @b, align 4 + ret i32 0 +}