DCE: Switch to hybrid precise-unprecise analysis

Instead of giving a choice between a precise (but possibly very complex) analysis and an approximative analysis we now use a hybrid approach which uses N precise steps followed by one approximating step. The precision of the analysis can be changed by increasing N. With a default of 'N' = 2, we get fully precise results for our current test cases and should not run into performance problems for more complex test cases. We can adjust this value when we got more experience with this dead code elimination. llvm-svn: 201888
2014-02-21 20:51:46 +00:00 · 2014-02-21 20:51:46 +00:00 · 817d51dd1b
parent ecae17ffca
commit 817d51dd1b
2 changed files with 26 additions and 36 deletions
--- a/polly/lib/DeadCodeElimination.cpp
+++ b/polly/lib/DeadCodeElimination.cpp
@ -44,23 +44,12 @@ using namespace llvm;
 using namespace polly;

 namespace {
-enum DcePrecision {
-  DCE_PRECISION_AUTO,
-  DCE_PRECISION_HULL,
-  DCE_PRECISION_FULL
-};

-cl::opt<DcePrecision> DcePrecision(
-    "polly-dce-precision", cl::desc("Precision of Polyhedral DCE"),
-    cl::values(
-        clEnumValN(DCE_PRECISION_FULL, "full",
-                   "Live set is not approximated at each iteration"),
-        clEnumValN(
-            DCE_PRECISION_HULL, "hull",
-            "Live set is approximated with an affine hull at each iteration"),
-        clEnumValN(DCE_PRECISION_AUTO, "auto", "Currently the same as hull"),
-        clEnumValEnd),
-    cl::init(DCE_PRECISION_AUTO));
+cl::opt<int> DCEPreciseSteps(
+    "polly-dce-precise-steps",
+    cl::desc(
+        "The number of precise steps between two approximating iterations"),
+    cl::init(2));

 class DeadCodeElim : public ScopPass {

@ -75,7 +64,7 @@ public:

 private:
  isl_union_set *getLastWrites(isl_union_map *Writes, isl_union_map *Schedule);
-  bool eliminateDeadCode(Scop &S);
+  bool eliminateDeadCode(Scop &S, int PreciseSteps);
 };
 }

@ -100,7 +89,11 @@ isl_union_set *DeadCodeElim::getLastWrites(__isl_take isl_union_map *Writes,
 /// o Assuming that the last write to each location is live.
 /// o Following each RAW dependency from a live iteration backwards and adding
 ///   that iteration to the live set.
-bool DeadCodeElim::eliminateDeadCode(Scop &S) {
+///
+/// To ensure the set of live iterations does not get too complex we always
+/// combine a certain number of precise steps with one approximating step that
+/// simplifies the life set with an affine hull.
+bool DeadCodeElim::eliminateDeadCode(Scop &S, int PreciseSteps) {
  isl_union_set *Live = this->getLastWrites(S.getWrites(), S.getSchedule());

  Dependences *D = &getAnalysis<Dependences>();
@ -108,8 +101,10 @@ bool DeadCodeElim::eliminateDeadCode(Scop &S) {
  Dep = isl_union_map_reverse(Dep);

  isl_union_set *OriginalDomain = S.getDomains();
+  int Steps = 0;
   while (true) {
    isl_union_set *Extra;
+    Steps++;

    Extra =
        isl_union_set_apply(isl_union_set_copy(Live), isl_union_map_copy(Dep));
@ -120,8 +115,12 @@ bool DeadCodeElim::eliminateDeadCode(Scop &S) {
    }

    Live = isl_union_set_union(Live, Extra);
-    if (DcePrecision != DCE_PRECISION_FULL)
+
+    if (Steps > PreciseSteps) {
+      Steps = 0;
      Live = isl_union_set_affine_hull(Live);
+    }
+
    Live = isl_union_set_intersect(Live, isl_union_set_copy(OriginalDomain));
  }
  isl_union_map_free(Dep);
@ -130,7 +129,9 @@ bool DeadCodeElim::eliminateDeadCode(Scop &S) {
  return S.restrictDomains(isl_union_set_coalesce(Live));
 }

-bool DeadCodeElim::runOnScop(Scop &S) { return eliminateDeadCode(S); }
+bool DeadCodeElim::runOnScop(Scop &S) {
+  return eliminateDeadCode(S, DCEPreciseSteps);
+}

 void DeadCodeElim::printScop(raw_ostream &OS) const {}

--- a/polly/test/DeadCodeElimination/dead_iteration_elimination.ll
+++ b/polly/test/DeadCodeElimination/dead_iteration_elimination.ll
@ -1,5 +1,4 @@
-; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-ast -analyze < %s | FileCheck %s
-; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-dce-precision=full -polly-dce -polly-ast -analyze < %s | FileCheck %s -check-prefix=CHECK-DCE
+; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-dce -polly-ast -analyze < %s | FileCheck %s -check-prefix=CHECK
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-pc-linux-gnu"
 ;
@ -69,22 +68,12 @@ exit.4:
  ret void
 }

-; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
+; CHECK: for (int c1 = 50; c1 <= 99; c1 += 1)
+; CHECK:   Stmt_for_body_1(c1);
+; CHECK: for (int c1 = 110; c1 <= 199; c1 += 1)
 ; CHECK:   Stmt_for_body_1(c1);
 ; CHECK: for (int c1 = 0; c1 <= 49; c1 += 1)
 ; CHECK:   Stmt_for_body_2(c1);
 ; CHECK: for (int c1 = 0; c1 <= 69; c1 += 1)
 ; CHECK:   Stmt_for_body_3(c1);
 ; CHECK: for (int c1 = 0; c1 <= 9; c1 += 1)
-; CHECK:   Stmt_for_body_4(c1);
-
-; CHECK-DCE: for (int c1 = 50; c1 <= 99; c1 += 1)
-; CHECK-DCE:   Stmt_for_body_1(c1);
-; CHECK-DCE: for (int c1 = 110; c1 <= 199; c1 += 1)
-; CHECK-DCE:   Stmt_for_body_1(c1);
-; CHECK-DCE: for (int c1 = 0; c1 <= 49; c1 += 1)
-; CHECK-DCE:   Stmt_for_body_2(c1);
-; CHECK-DCE: for (int c1 = 0; c1 <= 69; c1 += 1)
-; CHECK-DCE:   Stmt_for_body_3(c1);
-; CHECK-DCE: for (int c1 = 0; c1 <= 9; c1 += 1)
-; CHECK-DCE:   Stmt_for_body_4(c1);