From bf34f1d2b25f1c9a5e0904bdd8145e730268a498 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Sun, 16 Nov 2014 14:24:53 +0000 Subject: [PATCH] Introduce minimalistic cost model for auto parallelization Instead of parallelizing every parallel outermost loop, we now use a very minimalistic cost model. Specifically, we assume innermost loops are not worth parallelising and all non-innermost loops are. When parallelizing all loops in LNT we got several slowdowns/timeouts due to us parallelizing innermost loops that are executed only a couple of times (number of iterations not known statically). With this basic heuristic enabled LNT does not show any more timeouts, while several interesting loops are still parallelized. There are many ways to obtain an improved heuristic. Constructing such an improvide heuristic from a position of minimal slow-down and zero code size increase seems to be the best, as it allows us to track progress on LNT. llvm-svn: 222096 --- polly/lib/CodeGen/IslAst.cpp | 24 +++++++++++++++++-- .../OpenMP/multiple_loops_outer_parallel.ll | 2 +- .../nested_loop_both_parallel_parametric.ll | 2 +- .../Ast/OpenMP/nested_loop_inner_parallel.ll | 2 +- .../Ast/OpenMP/single_loop_param_parallel.ll | 2 +- .../OpenMP/loop-body-references-outer-iv.ll | 4 ++-- .../loop-body-references-outer-values-2.ll | 6 ++--- .../loop-body-references-outer-values-3.ll | 6 ++--- .../loop-body-references-outer-values.ll | 6 ++--- .../Isl/CodeGen/OpenMP/reference-other-bb.ll | 2 +- .../OpenMP/reference-preceeding-loop.ll | 4 ++-- polly/test/Isl/CodeGen/OpenMP/single_loop.ll | 12 +++++----- ...single_loop_with_loop_invariant_baseptr.ll | 6 ++--- ...o-parallel-loops-reference-outer-indvar.ll | 4 ++-- 14 files changed, 51 insertions(+), 31 deletions(-) diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index 3d442fb21229..b5fb851c8532 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -47,6 +47,11 @@ static cl::opt cl::desc("Generate thread parallel code (isl codegen only)"), cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt PollyParallelForce( + "polly-parallel-force", + cl::desc("Force generation of thread parallel code ignoring any cost model"), + cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden, cl::init(false), cl::ZeroOrMore, @@ -454,8 +459,23 @@ bool IslAstInfo::isReductionParallel(__isl_keep isl_ast_node *Node) { } bool IslAstInfo::isExecutedInParallel(__isl_keep isl_ast_node *Node) { - return PollyParallel && isOutermostParallel(Node) && - !isReductionParallel(Node); + + if (!PollyParallel) + return false; + + // Do not parallelize innermost loops. + // + // Parallelizing innermost loops is often not profitable, especially if + // they have a low number of iterations. + // + // TODO: Decide this based on the number of loop iterations that will be + // executed. This can possibly require run-time checks, which again + // raises the question of both run-time check overhead and code size + // costs. + if (!PollyParallelForce && isInnermost(Node)) + return false; + + return isOutermostParallel(Node) && !isReductionParallel(Node); } isl_union_map *IslAstInfo::getSchedule(__isl_keep isl_ast_node *Node) { diff --git a/polly/test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll b/polly/test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll index bcd36bf1d311..f7cea3d9c57b 100644 --- a/polly/test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll +++ b/polly/test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -polly-parallel-force -analyze < %s | FileCheck %s ; ; void jd(int *A) { ; CHECK: #pragma omp parallel for diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll index 99e36e6261c6..de18741e7a15 100644 --- a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll +++ b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze -polly-delinearize < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -polly-parallel-force -analyze -polly-delinearize < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; int A[1024][1024]; diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll index ba798da718a4..7abd992dd439 100644 --- a/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll +++ b/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -polly-parallel-force -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" diff --git a/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll b/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll index 35c20063d362..364a0bd77198 100644 --- a/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll +++ b/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -polly-parallel-force -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll index 02bf3ed4a9c0..00821c181999 100644 --- a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR ; This code has failed the scev based code generation as the scev in the scop ; contains an AddRecExpr of an outer loop. When generating code, we did not diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll index 97f9f05581e2..2ecbaf3e4e15 100644 --- a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll @@ -1,6 +1,6 @@ -; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR-SCEV +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR-SCEV ; AST: #pragma simd ; AST: #pragma omp parallel for diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll index 98d39d1147ca..98072371e008 100644 --- a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll @@ -1,6 +1,6 @@ -; RUN: opt %loadPolly -basicaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR -; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR ; The interesting part of this test case is the instruction: ; %tmp = bitcast i8* %call to i64** diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll index 75b8a5f5c8dd..cbb2c7f64fe1 100644 --- a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll @@ -1,6 +1,6 @@ -; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR ; Make sure we correctly forward the reference to 'A' to the OpenMP subfunction. ; diff --git a/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll b/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll index 055f717a507d..71f183df5a79 100644 --- a/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll +++ b/polly/test/Isl/CodeGen/OpenMP/reference-other-bb.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR ; IR: @foo.polly.subfn target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll b/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll index 4d7d40027054..63fb60b4e3f4 100644 --- a/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll +++ b/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR ; - Test the case where scalar evolution references a loop that is outside diff --git a/polly/test/Isl/CodeGen/OpenMP/single_loop.ll b/polly/test/Isl/CodeGen/OpenMP/single_loop.ll index 76268ab967d1..768f293cab67 100644 --- a/polly/test/Isl/CodeGen/OpenMP/single_loop.ll +++ b/polly/test/Isl/CodeGen/OpenMP/single_loop.ll @@ -1,10 +1,10 @@ -; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR -; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4 -; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 -; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-import-jscop-dir=%S -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 ; This extensive test case tests the creation of the full set of OpenMP calls ; as well as the subfunction creation using a trivial loop as example. diff --git a/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll b/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll index 570f475b5812..e35b366611b7 100644 --- a/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll +++ b/polly/test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll @@ -1,6 +1,6 @@ -; RUN: opt %loadPolly -tbaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR -; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-parallel-force -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR ; #define N 1024 ; float A[N]; diff --git a/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll b/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll index 646e93d92d13..32b8b94b16ef 100644 --- a/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll +++ b/polly/test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR ; This test case verifies that we create correct code even if two OpenMP loops ; share common outer variables.