Add polyhedral dead code elimination.

This pass eliminates loop iterations that compute results that are not used
later on. This can help e.g. in D, where the default zero-initialization is
often unnecessary if right after new values are assigned to an array.

Contributed-by: Peter Conn <conn.peter@gmail.com>
llvm-svn: 201817
This commit is contained in:
Tobias Grosser 2014-02-20 21:43:54 +00:00
parent 3fc9154c40
commit 37eb422f69
7 changed files with 461 additions and 10 deletions

View File

@ -44,6 +44,7 @@ struct isl_basic_map;
struct isl_id;
struct isl_set;
struct isl_union_set;
struct isl_union_map;
struct isl_space;
struct isl_constraint;
@ -368,6 +369,11 @@ public:
/// @return The induction variable at a certain dimension.
const PHINode *getInductionVariableForDimension(unsigned Dimension) const;
/// @brief Restrict the domain of the statement.
///
/// @param NewDomain The new statement domain.
void restrictDomain(__isl_take isl_set *NewDomain);
/// @brief Get the loop for a dimension.
///
/// @param Dimension The dimension of the induction variable
@ -605,6 +611,20 @@ public:
/// @brief Get a union set containing the iteration domains of all statements.
__isl_give isl_union_set *getDomains();
/// @brief Get a union map of all writes performed in the SCoP.
__isl_give isl_union_map *getWrites();
/// @brief Get a union map of all reads performed in the SCoP.
__isl_give isl_union_map *getReads();
/// @brief Get the schedule of all the statements in the SCoP.
__isl_give isl_union_map *getSchedule();
/// @brief Intersects the domains of all statements in the SCoP.
///
/// @return true if a change was made
bool restrictDomains(__isl_take isl_union_set *Domain);
};
/// @brief Print Scop scop to raw_ostream O.

View File

@ -38,6 +38,7 @@
#include "isl/constraint.h"
#include "isl/set.h"
#include "isl/map.h"
#include "isl/union_map.h"
#include "isl/aff.h"
#include "isl/printer.h"
#include "isl/local_space.h"
@ -490,6 +491,14 @@ void MemoryAccess::setNewAccessRelation(isl_map *newAccess) {
isl_map *ScopStmt::getScattering() const { return isl_map_copy(Scattering); }
void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) {
assert(isl_set_is_subset(NewDomain, Domain) &&
"New domain is not a subset of old domain!");
isl_set_free(Domain);
Domain = NewDomain;
Scattering = isl_map_intersect_domain(Scattering, isl_set_copy(Domain));
}
void ScopStmt::setScattering(isl_map *NewScattering) {
isl_map_free(Scattering);
Scattering = NewScattering;
@ -954,6 +963,90 @@ __isl_give isl_union_set *Scop::getDomains() {
return Domain;
}
__isl_give isl_union_map *Scop::getWrites() {
isl_union_map *Write = isl_union_map_empty(this->getParamSpace());
for (Scop::iterator SI = this->begin(), SE = this->end(); SI != SE; ++SI) {
ScopStmt *Stmt = *SI;
for (ScopStmt::memacc_iterator MI = Stmt->memacc_begin(),
ME = Stmt->memacc_end();
MI != ME; ++MI) {
if (!(*MI)->isWrite())
continue;
isl_set *Domain = Stmt->getDomain();
isl_map *AccessDomain = (*MI)->getAccessRelation();
AccessDomain = isl_map_intersect_domain(AccessDomain, Domain);
Write = isl_union_map_add_map(Write, AccessDomain);
}
}
return isl_union_map_coalesce(Write);
}
__isl_give isl_union_map *Scop::getReads() {
isl_union_map *Read = isl_union_map_empty(this->getParamSpace());
for (Scop::iterator SI = this->begin(), SE = this->end(); SI != SE; ++SI) {
ScopStmt *Stmt = *SI;
for (ScopStmt::memacc_iterator MI = Stmt->memacc_begin(),
ME = Stmt->memacc_end();
MI != ME; ++MI) {
if (!(*MI)->isRead())
continue;
isl_set *Domain = Stmt->getDomain();
isl_map *AccessDomain = (*MI)->getAccessRelation();
AccessDomain = isl_map_intersect_domain(AccessDomain, Domain);
Read = isl_union_map_add_map(Read, AccessDomain);
}
}
return isl_union_map_coalesce(Read);
}
__isl_give isl_union_map *Scop::getSchedule() {
isl_union_map *Schedule = isl_union_map_empty(this->getParamSpace());
for (Scop::iterator SI = this->begin(), SE = this->end(); SI != SE; ++SI) {
ScopStmt *Stmt = *SI;
Schedule = isl_union_map_add_map(Schedule, Stmt->getScattering());
}
return isl_union_map_coalesce(Schedule);
}
bool Scop::restrictDomains(__isl_take isl_union_set *Domain) {
bool Changed = false;
for (Scop::iterator SI = this->begin(), SE = this->end(); SI != SE; ++SI) {
ScopStmt *Stmt = *SI;
isl_union_set *StmtDomain = isl_union_set_from_set(Stmt->getDomain());
isl_union_set *NewStmtDomain = isl_union_set_intersect(
isl_union_set_copy(StmtDomain), isl_union_set_copy(Domain));
if (isl_union_set_is_subset(StmtDomain, NewStmtDomain)) {
isl_union_set_free(StmtDomain);
isl_union_set_free(NewStmtDomain);
continue;
}
Changed = true;
isl_union_set_free(StmtDomain);
NewStmtDomain = isl_union_set_coalesce(NewStmtDomain);
if (isl_union_set_is_empty(NewStmtDomain)) {
Stmt->restrictDomain(isl_set_empty(Stmt->getDomainSpace()));
isl_union_set_free(NewStmtDomain);
} else
Stmt->restrictDomain(isl_set_from_union_set(NewStmtDomain));
}
isl_union_set_free(Domain);
return Changed;
}
ScalarEvolution *Scop::getSE() const { return SE; }
bool Scop::isTrivialBB(BasicBlock *BB, TempScop &tempScop) {

View File

@ -20,15 +20,34 @@
//===----------------------------------------------------------------------===//
#include "polly/Dependences.h"
#include "isl/aff_type.h"
#include "isl/union_map.h"
#include "polly/LinkAllPasses.h"
#include "polly/ScopInfo.h"
#include "llvm/Support/CommandLine.h"
#include "isl/set.h"
#include "isl/map.h"
#include "isl/union_map.h"
using namespace llvm;
using namespace polly;
namespace {
enum DcePrecision {
DCE_PRECISION_AUTO,
DCE_PRECISION_HULL,
DCE_PRECISION_FULL
};
cl::opt<DcePrecision> DcePrecision(
"polly-dce-precision", cl::desc("Precision of Polyhedral DCE"),
cl::values(
clEnumValN(DCE_PRECISION_FULL, "full",
"Live set is not approximated at each iteration"),
clEnumValN(
DCE_PRECISION_HULL, "hull",
"Live set is approximated with an affine hull at each iteration"),
clEnumValN(DCE_PRECISION_AUTO, "auto", "Currently the same as hull"),
clEnumValEnd),
cl::init(DCE_PRECISION_AUTO));
class DeadCodeElim : public ScopPass {
@ -37,25 +56,69 @@ public:
explicit DeadCodeElim() : ScopPass(ID) {}
virtual bool runOnScop(Scop &S);
void printScop(llvm::raw_ostream &OS) const;
void getAnalysisUsage(AnalysisUsage &AU) const;
private:
isl_union_set *getLastWrites(isl_union_map *Writes, isl_union_map *Schedule);
bool eliminateDeadCode(Scop &S);
};
}
char DeadCodeElim::ID = 0;
bool DeadCodeElim::runOnScop(Scop &S) {
Dependences *D = &getAnalysis<Dependences>();
/// Return the set of iterations that contains the last write for each location.
isl_union_set *DeadCodeElim::getLastWrites(__isl_take isl_union_map *Writes,
__isl_take isl_union_map *Schedule) {
isl_union_map *WriteIterations = isl_union_map_reverse(Writes);
isl_union_map *WriteTimes =
isl_union_map_apply_range(WriteIterations, isl_union_map_copy(Schedule));
int Kinds =
Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
isl_union_map *LastWriteTimes = isl_union_map_lexmax(WriteTimes);
isl_union_map *LastWriteIterations = isl_union_map_apply_range(
LastWriteTimes, isl_union_map_reverse(Schedule));
isl_union_map *Deps = D->getDependences(Kinds);
isl_union_map_free(Deps);
return false;
isl_union_set *Live = isl_union_map_range(LastWriteIterations);
return isl_union_set_coalesce(Live);
}
/// Performs polyhedral dead iteration elimination by:
/// o Assuming that the last write to each location is live.
/// o Following each RAW dependency from a live iteration backwards and adding
/// that iteration to the live set.
bool DeadCodeElim::eliminateDeadCode(Scop &S) {
isl_union_set *Live = this->getLastWrites(S.getWrites(), S.getSchedule());
Dependences *D = &getAnalysis<Dependences>();
isl_union_map *Dep = D->getDependences(Dependences::TYPE_RAW);
Dep = isl_union_map_reverse(Dep);
isl_union_set *OriginalDomain = S.getDomains();
while (true) {
isl_union_set *Extra;
Extra =
isl_union_set_apply(isl_union_set_copy(Live), isl_union_map_copy(Dep));
if (isl_union_set_is_subset(Extra, Live)) {
isl_union_set_free(Extra);
break;
}
Live = isl_union_set_union(Live, Extra);
if (DcePrecision != DCE_PRECISION_FULL)
Live = isl_union_set_affine_hull(Live);
Live = isl_union_set_intersect(Live, isl_union_set_copy(OriginalDomain));
}
isl_union_map_free(Dep);
isl_union_set_free(OriginalDomain);
return S.restrictDomains(isl_union_set_coalesce(Live));
}
bool DeadCodeElim::runOnScop(Scop &S) { return eliminateDeadCode(S); }
void DeadCodeElim::printScop(raw_ostream &OS) const {}
void DeadCodeElim::getAnalysisUsage(AnalysisUsage &AU) const {

View File

@ -0,0 +1,62 @@
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-ast -analyze < %s | FileCheck %s
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-dce -polly-ast -analyze < %s | FileCheck %s -check-prefix=CHECK-DCE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
;
; for(i = 0; i < 200; i++ )
; A[i] = 2;
;
; for (i = 0; i < 200; i++ )
; A[i] = 5;
;
; for (i = 0; i < 200; i++ )
; A[i] = 5;
define void @main() nounwind uwtable {
entry:
%A = alloca [200 x i32], align 16
br label %for.body.1
for.body.1:
%indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %for.body.1 ]
%arrayidx.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.1
store i32 2, i32* %arrayidx.1, align 4
%indvar.next.1 = add i64 %indvar.1, 1
%exitcond.1 = icmp ne i64 %indvar.next.1, 200
br i1 %exitcond.1, label %for.body.1, label %exit.1
exit.1:
br label %for.body.2
for.body.2:
%indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %for.body.2 ]
%arrayidx.2 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.2
store i32 5, i32* %arrayidx.2, align 4
%indvar.next.2 = add i64 %indvar.2, 1
%exitcond.2 = icmp ne i64 %indvar.next.2, 200
br i1 %exitcond.2, label %for.body.2, label %exit.2
exit.2:
br label %for.body.3
for.body.3:
%indvar.3 = phi i64 [ 0, %exit.2 ], [ %indvar.next.3, %for.body.3 ]
%arrayidx.3 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.3
store i32 7, i32* %arrayidx.3, align 4
%indvar.next.3 = add i64 %indvar.3, 1
%exitcond.3 = icmp ne i64 %indvar.next.3, 200
br i1 %exitcond.3, label %for.body.3 , label %exit.3
exit.3:
ret void
}
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_1(c1);
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_2(c1);
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_3(c1);
; CHECK-DCE: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK-DCE: Stmt_for_body_3(c1);

View File

@ -0,0 +1,66 @@
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-ast -analyze < %s | FileCheck %s
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-dce -polly-ast -analyze < %s | FileCheck %s -check-prefix=CHECK-DCE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
;
; for(i = 0; i < 200; i++ )
; A[i] = 2;
;
; for (i = 0; i < 200; i++ )
; B[i] = A[i];
;
; for (i = 0; i < 200; i++ )
; B[i] = A[i] = 5;
define void @main() nounwind uwtable {
entry:
%A = alloca [200 x i32], align 16
%B = alloca [200 x i32], align 16
br label %for.body.1
for.body.1:
%indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %for.body.1 ]
%arrayidx.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.1
store i32 2, i32* %arrayidx.1, align 4
%indvar.next.1 = add i64 %indvar.1, 1
%exitcond.1 = icmp ne i64 %indvar.next.1, 200
br i1 %exitcond.1, label %for.body.1, label %exit.1
exit.1:
br label %for.body.2
for.body.2:
%indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %for.body.2 ]
%arrayidx.2.a = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.2
%val = load i32* %arrayidx.2.a, align 4
%arrayidx.2.b = getelementptr [200 x i32]* %B, i64 0, i64 %indvar.2
store i32 %val, i32* %arrayidx.2.b, align 4
%indvar.next.2 = add i64 %indvar.2, 1
%exitcond.2 = icmp ne i64 %indvar.next.2, 200
br i1 %exitcond.2, label %for.body.2, label %exit.2
exit.2:
br label %for.body.3
for.body.3:
%indvar.3 = phi i64 [ 0, %exit.2 ], [ %indvar.next.3, %for.body.3 ]
%arrayidx.3.a = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.3
%arrayidx.3.b = getelementptr [200 x i32]* %B, i64 0, i64 %indvar.3
store i32 5, i32* %arrayidx.3.a, align 4
store i32 5, i32* %arrayidx.3.b, align 4
%indvar.next.3 = add i64 %indvar.3, 1
%exitcond.3 = icmp ne i64 %indvar.next.3, 200
br i1 %exitcond.3, label %for.body.3 , label %exit.3
exit.3:
ret void
}
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_1(c1);
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_2(c1);
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_3(c1);
; CHECK-DCE: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK-DCE: Stmt_for_body_3(c1);

View File

@ -0,0 +1,90 @@
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-ast -analyze < %s | FileCheck %s
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-dce-precision=full -polly-dce -polly-ast -analyze < %s | FileCheck %s -check-prefix=CHECK-DCE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
;
; for(i = 0; i < 200; i++ )
; A[i] = 2;
;
; for (i = 0; i < 50; i++ )
; A[i] = 5;
;
; for (i = 0; i < 70; i++ )
; A[i] = A[i] + 5;
;
; for (i = 100; i < 110; i++ )
; A[i] = i;
;
define void @main() nounwind uwtable {
entry:
%A = alloca [200 x i32], align 16
br label %for.body.1
for.body.1:
%indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %for.body.1 ]
%arrayidx.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.1
store i32 2, i32* %arrayidx.1, align 4
%indvar.next.1 = add i64 %indvar.1, 1
%exitcond.1 = icmp ne i64 %indvar.next.1, 200
br i1 %exitcond.1, label %for.body.1, label %exit.1
exit.1:
br label %for.body.2
for.body.2:
%indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %for.body.2 ]
%arrayidx.2 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.2
store i32 5, i32* %arrayidx.2, align 4
%indvar.next.2 = add i64 %indvar.2, 1
%exitcond.2 = icmp ne i64 %indvar.next.2, 50
br i1 %exitcond.2, label %for.body.2, label %exit.2
exit.2:
br label %for.body.3
for.body.3:
%indvar.3 = phi i64 [ 0, %exit.2 ], [ %indvar.next.3, %for.body.3 ]
%arrayidx.3 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.3
%val = load i32* %arrayidx.3, align 4
%add = add nsw i32 %val, 5
store i32 %add, i32* %arrayidx.3, align 4
%indvar.next.3 = add i64 %indvar.3, 1
%exitcond.3 = icmp ne i64 %indvar.next.3, 70
br i1 %exitcond.3, label %for.body.3 , label %exit.3
exit.3:
br label %for.body.4
for.body.4:
%indvar.4 = phi i64 [ 0, %exit.3 ], [ %indvar.next.4, %for.body.4 ]
%indvar.plus = add i64 %indvar.4, 100
%trunc = trunc i64 %indvar.plus to i32
%arrayidx.4 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.plus
store i32 %trunc, i32* %arrayidx.4, align 4
%indvar.next.4 = add i64 %indvar.4, 1
%exitcond.4 = icmp ne i64 %indvar.next.4, 10
br i1 %exitcond.4, label %for.body.4, label %exit.4
exit.4:
ret void
}
; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK: Stmt_for_body_1(c1);
; CHECK: for (int c1 = 0; c1 <= 49; c1 += 1)
; CHECK: Stmt_for_body_2(c1);
; CHECK: for (int c1 = 0; c1 <= 69; c1 += 1)
; CHECK: Stmt_for_body_3(c1);
; CHECK: for (int c1 = 0; c1 <= 9; c1 += 1)
; CHECK: Stmt_for_body_4(c1);
; CHECK-DCE: for (int c1 = 50; c1 <= 99; c1 += 1)
; CHECK-DCE: Stmt_for_body_1(c1);
; CHECK-DCE: for (int c1 = 110; c1 <= 199; c1 += 1)
; CHECK-DCE: Stmt_for_body_1(c1);
; CHECK-DCE: for (int c1 = 0; c1 <= 49; c1 += 1)
; CHECK-DCE: Stmt_for_body_2(c1);
; CHECK-DCE: for (int c1 = 0; c1 <= 69; c1 += 1)
; CHECK-DCE: Stmt_for_body_3(c1);
; CHECK-DCE: for (int c1 = 0; c1 <= 9; c1 += 1)
; CHECK-DCE: Stmt_for_body_4(c1);

View File

@ -0,0 +1,57 @@
; RUN: opt -S %loadPolly -basicaa -polly-dependences-analysis-type=value-based -polly-dce -polly-ast -analyze < %s | FileCheck %s -check-prefix=CHECK-DCE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
; A[0] = 1;
;
; for(i = 0; i < 100; i++ )
; A[i+1] = A[i] * 2;
;
; for (i = 0; i < 200; i++ )
; A[i] = B[i] * 2;
define void @main() nounwind uwtable {
entry:
%A = alloca [200 x i32], align 16
%B = alloca [200 x i32], align 16
%A.zero = getelementptr [200 x i32]* %A, i64 0, i64 0
store i32 1, i32* %A.zero, align 4
br label %for.body.1
for.body.1:
%indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %for.body.1 ]
%indvar.next.1 = add i64 %indvar.1, 1
%A.current.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.1
%val1.1 = load i32* %A.current.1, align 4
%val2.1 = mul i32 %val1.1, 2
%A.next.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.next.1
store i32 %val2.1, i32* %A.next.1, align 4
%exitcond.1 = icmp ne i64 %indvar.next.1, 100
br i1 %exitcond.1, label %for.body.1, label %exit.1
exit.1:
br label %for.body.2
for.body.2:
%indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %for.body.2 ]
%B.current.2 = getelementptr [200 x i32]* %B, i64 0, i64 %indvar.2
%val1.2 = load i32* %B.current.2, align 4
%val2.2 = mul i32 %val1.2, 2
%A.current.2 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.2
store i32 %val2.2, i32* %A.current.2, align 4
%indvar.next.2 = add i64 %indvar.2, 1
%exitcond.2 = icmp ne i64 %indvar.next.2, 200
br i1 %exitcond.2, label %for.body.2, label %exit.3
exit.3:
ret void
}
; CHECK-DCE: for (int c1 = 0; c1 <= 199; c1 += 1)
; CHECK-DCE: Stmt_for_body_2(c1);