[Polly][Bug fix] Wrong dependences filtering during Fully Indexed expansion

Summary:
When trying to expand memory accesses, the current version of Polly uses statement Level dependences. The actual implementation is not working in case of multiple dependences per statement. For example in the following source code :
```
void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) {
  int i,j;
  for (j = 0; j < Ni; j++) {
    for (int i = 0; i<Nj; i++)
S:    B[i] = i;
    for (int i = 0; i<Nj; i++)
T:    D[i] = i;

U:  A[j] = B[j];
      C[j] = D[j];
  }
}
```
The statement U has two dependences with S and T. The current version of polly fails during expansion.

This patch aims to fix this bug. For that, we use Reference Level dependences to be able to filter dependences according to statement and memory ref. The principle of expansion remains the same as before.

We also noticed that we need to bail out if load come after store (at the same position) in same statement. So a check was added to isExpandable.

Contributed by: Nicholas Bonfante <nicolas.bonfante@insa-lyon.fr>

Reviewers: Meinersbur, simbuerg, bollu

Reviewed By: Meinersbur, simbuerg

Subscribers: pollydev, llvm-commits

Differential Revision: https://reviews.llvm.org/D36791

llvm-svn: 311165
This commit is contained in:
Andreas Simbuerger 2017-08-18 15:01:18 +00:00
parent ccaec26175
commit 8d5b257d02
7 changed files with 427 additions and 208 deletions

View File

@ -66,7 +66,7 @@ private:
bool isExpandable(const ScopArrayInfo *SAI,
SmallPtrSetImpl<MemoryAccess *> &Writes,
SmallPtrSetImpl<MemoryAccess *> &Reads, Scop &S,
isl::union_map &Dependences);
const isl::union_map &Dependences);
/// Expand a write memory access.
///
@ -76,12 +76,21 @@ private:
/// Expand the read memory access.
///
/// @param The SCop in which the memory access appears in.
/// @param The memory access that need to be expanded.
/// @param S The SCop in which the memory access appears in.
/// @param MA The memory access that need to be expanded.
/// @param Dependences The RAW dependences of the SCop.
/// @param ExpandedSAI The expanded SAI created during write expansion.
void expandRead(Scop &S, MemoryAccess *MA, isl::union_map &Dependences,
void expandRead(Scop &S, MemoryAccess *MA, const isl::union_map &Dependences,
ScopArrayInfo *ExpandedSAI);
/// Filter the dependences to have only one related to current memory access.
///
/// @param S The SCop in which the memory access appears in.
/// @param MapDependences The dependences to filter.
/// @param MA The memory access that need to be expanded.
isl::union_map filterDependences(Scop &S,
const isl::union_map &MapDependences,
MemoryAccess *MA);
};
} // namespace
@ -146,13 +155,59 @@ isl::val getConstant(isl::pw_aff PwAff, bool Max, bool Min) {
char MaximalStaticExpander::ID = 0;
isl::union_map MaximalStaticExpander::filterDependences(
Scop &S, const isl::union_map &Dependences, MemoryAccess *MA) {
auto SAI = MA->getLatestScopArrayInfo();
auto AccessDomainSet = MA->getAccessRelation().domain();
auto AccessDomainId = AccessDomainSet.get_tuple_id();
isl::union_map MapDependences = isl::union_map::empty(S.getParamSpace());
Dependences.reverse().foreach_map([&MapDependences, &AccessDomainId,
&SAI](isl::map Map) -> isl::stat {
// Filter out Statement to Statement dependences.
if (!Map.can_curry())
return isl::stat::ok;
// Intersect with the relevant SAI.
auto TmpMapDomainId =
Map.get_space().domain().unwrap().range().get_tuple_id(isl::dim::set);
ScopArrayInfo *UserSAI =
static_cast<ScopArrayInfo *>(TmpMapDomainId.get_user());
if (SAI != UserSAI)
return isl::stat::ok;
// Get the correct S1[] -> S2[] dependence.
auto NewMap = Map.factor_domain();
auto NewMapDomainId = NewMap.domain().get_tuple_id();
if (AccessDomainId.keep() != NewMapDomainId.keep())
return isl::stat::ok;
// Add the corresponding map to MapDependences.
MapDependences = MapDependences.add_map(NewMap);
return isl::stat::ok;
});
return MapDependences;
}
bool MaximalStaticExpander::isExpandable(
const ScopArrayInfo *SAI, SmallPtrSetImpl<MemoryAccess *> &Writes,
SmallPtrSetImpl<MemoryAccess *> &Reads, Scop &S,
isl::union_map &Dependences) {
const isl::union_map &Dependences) {
int NumberWrites = 0;
for (ScopStmt &Stmt : S) {
auto StmtReads = isl::union_map::empty(S.getParamSpace());
auto StmtWrites = isl::union_map::empty(S.getParamSpace());
for (MemoryAccess *MA : Stmt) {
// Check if the current MemoryAccess involved the current SAI.
@ -166,6 +221,27 @@ bool MaximalStaticExpander::isExpandable(
return false;
}
// For now, we are not able to expand array where read come after write
// (to the same location) in a same statement.
auto AccRel = isl::union_map(MA->getAccessRelation());
if (MA->isRead()) {
// Reject load after store to same location.
if (!StmtWrites.is_disjoint(AccRel)) {
emitRemark(SAI->getName() + " has read after write to the same "
"element in same statement. The "
"dependences found during analysis may "
"be wrong because Polly is not able to "
"handle such case for now.",
MA->getAccessInstruction());
return false;
}
StmtReads = give(isl_union_map_union(StmtReads.take(), AccRel.take()));
} else {
StmtWrites =
give(isl_union_map_union(StmtWrites.take(), AccRel.take()));
}
// For now, we are not able to expand MayWrite.
if (MA->isMayWrite()) {
emitRemark(SAI->getName() + " has a maywrite access.",
@ -191,15 +267,13 @@ bool MaximalStaticExpander::isExpandable(
auto StmtDomain = Stmt.getDomain();
// Get the domain of the future Read access.
auto ReadDomainSet = MA->getAccessRelation().domain();
auto ReadDomain = isl::union_set(ReadDomainSet);
auto CurrentReadWriteDependences =
Dependences.reverse().intersect_domain(ReadDomain);
auto DepsDomain = CurrentReadWriteDependences.domain();
unsigned NumberElementMap =
isl_union_map_n_map(CurrentReadWriteDependences.get());
// Get the dependences relevant for this MA
auto MapDependences = filterDependences(S, Dependences, MA);
auto DepsDomain = MapDependences.domain();
unsigned NumberElementMap = isl_union_map_n_map(MapDependences.get());
// If there are multiple maps in the Deps, we cannot handle this case
// for now.
@ -236,7 +310,7 @@ bool MaximalStaticExpander::isExpandable(
}
void MaximalStaticExpander::expandRead(Scop &S, MemoryAccess *MA,
isl::union_map &Dependences,
const isl::union_map &Dependences,
ScopArrayInfo *ExpandedSAI) {
// Get the current AM.
@ -246,17 +320,16 @@ void MaximalStaticExpander::expandRead(Scop &S, MemoryAccess *MA,
auto WriteDomainSet = MA->getAccessRelation().domain();
auto WriteDomain = isl::union_set(WriteDomainSet);
auto CurrentReadWriteDependences =
Dependences.reverse().intersect_domain(WriteDomain);
// Get the dependences relevant for this MA
auto MapDependences = filterDependences(S, Dependences, MA);
// If no dependences, no need to modify anything.
if (CurrentReadWriteDependences.is_empty()) {
if (MapDependences.is_empty())
return;
}
assert(isl_union_map_n_map(CurrentReadWriteDependences.get()) == 1 &&
assert(isl_union_map_n_map(MapDependences.get()) == 1 &&
"There are more than one RAW dependencies in the union map.");
auto NewAccessMap = isl::map::from_union_map(CurrentReadWriteDependences);
auto NewAccessMap = isl::map::from_union_map(MapDependences);
auto Id = ExpandedSAI->getBasePtrId();
@ -348,7 +421,7 @@ bool MaximalStaticExpander::runOnScop(Scop &S) {
// Get the RAW Dependences.
auto &DI = getAnalysis<DependenceInfo>();
auto &D = DI.getDependences(Dependences::AL_Statement);
auto &D = DI.getDependences(Dependences::AL_Reference);
auto Dependences = isl::give(D.getDependences(Dependences::TYPE_RAW));
SmallPtrSet<ScopArrayInfo *, 4> CurrentSAI(S.arrays().begin(),

View File

@ -0,0 +1,68 @@
; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE
;
; Verify that the expansion of an array with load after store in a same statement is not done.
;
; Original source code :
;
; #define Ni 2000
; #define Nj 3000
;
; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) {
; int i,j;
; for (i = 0; i < Ni; i++) {
; for (int j = 0; j<Nj; j++) {
; B[j] = j;
; C[j] = B[j];
; }
; }
; }
;
; Check that C is expanded
;
; CHECK: i64 MemRef_C_Stmt_for_body4_expanded[10000][10000]; // Element size 8
; CHECK: new: { Stmt_for_body4[i0, i1] -> MemRef_C_Stmt_for_body4_expanded[i0, i1] };
;
; Check that B is not expanded
;
; CHECK-NOT: double MemRef_B_Stmt_for_body4_expanded[10000][10000]; // Element size 8
; MSE: MemRef_B has read after write to the same element in same statement. The dependences found during analysis may be wrong because Polly is not able to handle such case for now.
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @mse(double* %A, double* %B, double* %C, double* %D) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.inc9
%i.02 = phi i32 [ 0, %entry.split ], [ %inc10, %for.inc9 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
%0 = trunc i64 %indvars.iv to i32
%conv = sitofp i32 %0 to double
%arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv
store double %conv, double* %arrayidx, align 8
%arrayidx6 = getelementptr inbounds double, double* %B, i64 %indvars.iv
%1 = bitcast double* %arrayidx6 to i64*
%2 = load i64, i64* %1, align 8
%arrayidx8 = getelementptr inbounds double, double* %C, i64 %indvars.iv
%3 = bitcast double* %arrayidx8 to i64*
store i64 %2, i64* %3, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 10000
br i1 %exitcond, label %for.body4, label %for.inc9
for.inc9: ; preds = %for.body4
%inc10 = add nuw nsw i32 %i.02, 1
%exitcond3 = icmp ne i32 %inc10, 10000
br i1 %exitcond3, label %for.body, label %for.end11
for.end11: ; preds = %for.inc9
ret void
}

View File

@ -1,5 +1,5 @@
; RUN: opt %loadPolly -polly-canonicalize -polly-mse -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-canonicalize -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE
; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE
;
; Verify that Polly detects problems and does not expand the array
;
@ -30,76 +30,43 @@
;
; CHECK-NOT: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] };
; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline nounwind uwtable
define double @mse(double* %A, double* %B) {
entry:
%A.addr = alloca double*, align 8
%B.addr = alloca double*, align 8
%i = alloca i32, align 4
%tmp = alloca double, align 8
%j = alloca i32, align 4
store double* %A, double** %A.addr, align 8
store double* %B, double** %B.addr, align 8
store double 6.000000e+00, double* %tmp, align 8
store i32 0, i32* %i, align 4
br label %for.cond
br label %entry.split
for.cond: ; preds = %for.inc8, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 2000
br i1 %cmp, label %for.body, label %for.end10
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %for.cond
store i32 2, i32* %j, align 4
br label %for.cond1
for.body: ; preds = %entry.split, %for.end
%indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.end ]
br label %for.body3
for.cond1: ; preds = %for.inc, %for.body
%1 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %1, 3000
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%2 = load i32, i32* %j, align 4
%conv = sitofp i32 %2 to double
%3 = load double*, double** %B.addr, align 8
%4 = load i32, i32* %j, align 4
%sub = sub nsw i32 %4, 1
%idxprom = sext i32 %sub to i64
%arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
for.body3: ; preds = %for.body, %for.body3
%indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body3 ]
%0 = trunc i64 %indvars.iv to i32
%conv = sitofp i32 %0 to double
%1 = add nsw i64 %indvars.iv, -1
%arrayidx = getelementptr inbounds double, double* %B, i64 %1
store double %conv, double* %arrayidx, align 8
br label %for.inc
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 3000
br i1 %exitcond, label %for.body3, label %for.end
for.inc: ; preds = %for.body3
%5 = load i32, i32* %j, align 4
%inc = add nsw i32 %5, 1
store i32 %inc, i32* %j, align 4
br label %for.cond1
for.end: ; preds = %for.body3
%arrayidx5 = getelementptr inbounds double, double* %B, i64 %indvars.iv4
%2 = bitcast double* %arrayidx5 to i64*
%3 = load i64, i64* %2, align 8
%arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv4
%4 = bitcast double* %arrayidx7 to i64*
store i64 %3, i64* %4, align 8
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond6 = icmp ne i64 %indvars.iv.next5, 2000
br i1 %exitcond6, label %for.body, label %for.end10
for.end: ; preds = %for.cond1
%6 = load double*, double** %B.addr, align 8
%7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
%arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4
%8 = load double, double* %arrayidx5, align 8
%9 = load double*, double** %A.addr, align 8
%10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
%arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6
store double %8, double* %arrayidx7, align 8
br label %for.inc8
for.inc8: ; preds = %for.end
%11 = load i32, i32* %i, align 4
%inc9 = add nsw i32 %11, 1
store i32 %inc9, i32* %i, align 4
br label %for.cond
for.end10: ; preds = %for.cond
%12 = load double, double* %tmp, align 8
ret double %12
for.end10: ; preds = %for.end
ret double 6.000000e+00
}

View File

@ -1,5 +1,5 @@
; RUN: opt %loadPolly -polly-canonicalize -polly-mse -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-canonicalize -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE
; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE
;
; Verify that Polly detects problems and does not expand the array
;
@ -33,79 +33,44 @@
;
; CHECK-NOT: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] };
; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline nounwind uwtable
define double @mse(double* %A, double* %B) {
entry:
%A.addr = alloca double*, align 8
%B.addr = alloca double*, align 8
%i = alloca i32, align 4
%tmp = alloca double, align 8
%j = alloca i32, align 4
store double* %A, double** %A.addr, align 8
store double* %B, double** %B.addr, align 8
store double 6.000000e+00, double* %tmp, align 8
store i32 0, i32* %i, align 4
br label %for.cond
br label %entry.split
for.cond: ; preds = %for.inc10, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 2000
br i1 %cmp, label %for.body, label %for.end12
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %for.cond
%1 = load double*, double** %B.addr, align 8
%2 = load i32, i32* %i, align 4
%idxprom = sext i32 %2 to i64
%arrayidx = getelementptr inbounds double, double* %1, i64 %idxprom
for.body: ; preds = %entry.split, %for.end
%indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ]
%arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv3
store double 2.000000e+00, double* %arrayidx, align 8
store i32 0, i32* %j, align 4
br label %for.cond1
br label %for.body3
for.cond1: ; preds = %for.inc, %for.body
%3 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %3, 2000
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%4 = load i32, i32* %j, align 4
%conv = sitofp i32 %4 to double
%5 = load double*, double** %B.addr, align 8
%6 = load i32, i32* %j, align 4
%idxprom4 = sext i32 %6 to i64
%arrayidx5 = getelementptr inbounds double, double* %5, i64 %idxprom4
for.body3: ; preds = %for.body, %for.body3
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
%0 = trunc i64 %indvars.iv to i32
%conv = sitofp i32 %0 to double
%arrayidx5 = getelementptr inbounds double, double* %B, i64 %indvars.iv
store double %conv, double* %arrayidx5, align 8
br label %for.inc
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 2000
br i1 %exitcond, label %for.body3, label %for.end
for.inc: ; preds = %for.body3
%7 = load i32, i32* %j, align 4
%inc = add nsw i32 %7, 1
store i32 %inc, i32* %j, align 4
br label %for.cond1
for.end: ; preds = %for.body3
%arrayidx7 = getelementptr inbounds double, double* %B, i64 %indvars.iv3
%1 = bitcast double* %arrayidx7 to i64*
%2 = load i64, i64* %1, align 8
%arrayidx9 = getelementptr inbounds double, double* %A, i64 %indvars.iv3
%3 = bitcast double* %arrayidx9 to i64*
store i64 %2, i64* %3, align 8
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
%exitcond5 = icmp ne i64 %indvars.iv.next4, 2000
br i1 %exitcond5, label %for.body, label %for.end12
for.end: ; preds = %for.cond1
%8 = load double*, double** %B.addr, align 8
%9 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %9 to i64
%arrayidx7 = getelementptr inbounds double, double* %8, i64 %idxprom6
%10 = load double, double* %arrayidx7, align 8
%11 = load double*, double** %A.addr, align 8
%12 = load i32, i32* %i, align 4
%idxprom8 = sext i32 %12 to i64
%arrayidx9 = getelementptr inbounds double, double* %11, i64 %idxprom8
store double %10, double* %arrayidx9, align 8
br label %for.inc10
for.inc10: ; preds = %for.end
%13 = load i32, i32* %i, align 4
%inc11 = add nsw i32 %13, 1
store i32 %inc11, i32* %i, align 4
br label %for.cond
for.end12: ; preds = %for.cond
%14 = load double, double* %tmp, align 8
ret double %14
for.end12: ; preds = %for.end
ret double 6.000000e+00
}

View File

@ -1,4 +1,4 @@
; RUN: opt %loadPolly -polly-canonicalize -polly-mse -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s
;
; Verify that the accesses are correctly expanded
;
@ -27,75 +27,42 @@
;
; CHECK: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] };
; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded[i0, i0] };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline nounwind uwtable
define double @mse(double* %A, double* %B) {
entry:
%A.addr = alloca double*, align 8
%B.addr = alloca double*, align 8
%i = alloca i32, align 4
%tmp = alloca double, align 8
%j = alloca i32, align 4
store double* %A, double** %A.addr, align 8
store double* %B, double** %B.addr, align 8
store double 6.000000e+00, double* %tmp, align 8
store i32 0, i32* %i, align 4
br label %for.cond
br label %entry.split
for.cond: ; preds = %for.inc8, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 2000
br i1 %cmp, label %for.body, label %for.end10
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond1
for.body: ; preds = %entry.split, %for.end
%indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ]
br label %for.body3
for.cond1: ; preds = %for.inc, %for.body
%1 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %1, 3000
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%2 = load i32, i32* %j, align 4
%conv = sitofp i32 %2 to double
%3 = load double*, double** %B.addr, align 8
%4 = load i32, i32* %j, align 4
%idxprom = sext i32 %4 to i64
%arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
for.body3: ; preds = %for.body, %for.body3
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
%0 = trunc i64 %indvars.iv to i32
%conv = sitofp i32 %0 to double
%arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv
store double %conv, double* %arrayidx, align 8
br label %for.inc
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 3000
br i1 %exitcond, label %for.body3, label %for.end
for.inc: ; preds = %for.body3
%5 = load i32, i32* %j, align 4
%inc = add nsw i32 %5, 1
store i32 %inc, i32* %j, align 4
br label %for.cond1
for.end: ; preds = %for.body3
%arrayidx5 = getelementptr inbounds double, double* %B, i64 %indvars.iv3
%1 = bitcast double* %arrayidx5 to i64*
%2 = load i64, i64* %1, align 8
%arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv3
%3 = bitcast double* %arrayidx7 to i64*
store i64 %2, i64* %3, align 8
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
%exitcond5 = icmp ne i64 %indvars.iv.next4, 2000
br i1 %exitcond5, label %for.body, label %for.end10
for.end: ; preds = %for.cond1
%6 = load double*, double** %B.addr, align 8
%7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
%arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4
%8 = load double, double* %arrayidx5, align 8
%9 = load double*, double** %A.addr, align 8
%10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
%arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6
store double %8, double* %arrayidx7, align 8
br label %for.inc8
for.inc8: ; preds = %for.end
%11 = load i32, i32* %i, align 4
%inc9 = add nsw i32 %11, 1
store i32 %inc9, i32* %i, align 4
br label %for.cond
for.end10: ; preds = %for.cond
%12 = load double, double* %tmp, align 8
ret double %12
for.end10: ; preds = %for.end
ret double 6.000000e+00
}

View File

@ -0,0 +1,95 @@
; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s
;
; Verify that the accesses are correctly expanded
;
; Original source code :
;
; #define Ni 2000
; #define Nj 3000
;
; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) {
; int i,j;
; for (j = 0; j < Ni; j++) {
; for (int i = 0; i<Nj; i++)
; B[i] = i;
;
; for (int i = 0; i<Nj; i++)
; D[i] = i;
;
; A[j] = B[j];
; C[j] = D[j];
; }
; }
;
; Check that expanded SAI are created
;
; CHECK: double MemRef_B_Stmt_for_body4_expanded[10000][10000]; // Element size 8
; CHECK: double MemRef_D_Stmt_for_body9_expanded[10000][10000]; // Element size 8
; CHECK: i64 MemRef_A_Stmt_for_end15_expanded[10000]; // Element size 8
; CHECK: i64 MemRef_C_Stmt_for_end15_expanded[10000]; // Element size 8
;
; Check that the memory accesses are modified
; CHECK: new: { Stmt_for_body4[i0, i1] -> MemRef_B_Stmt_for_body4_expanded[i0, i1] };
; CHECK: new: { Stmt_for_body9[i0, i1] -> MemRef_D_Stmt_for_body9_expanded[i0, i1] };
; CHECK: new: { Stmt_for_end15[i0] -> MemRef_B_Stmt_for_body4_expanded[i0, i0] };
; CHECK: new: { Stmt_for_end15[i0] -> MemRef_A_Stmt_for_end15_expanded[i0] };
; CHECK: new: { Stmt_for_end15[i0] -> MemRef_D_Stmt_for_body9_expanded[i0, i0] };
; CHECK: new: { Stmt_for_end15[i0] -> MemRef_C_Stmt_for_end15_expanded[i0] };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @mse(double* %A, double* %B, double* %C, double* %D) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.end15
%indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.end15 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
%0 = trunc i64 %indvars.iv to i32
%conv = sitofp i32 %0 to double
%arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv
store double %conv, double* %arrayidx, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 10000
br i1 %exitcond, label %for.body4, label %for.end
for.end: ; preds = %for.body4
br label %for.body9
for.body9: ; preds = %for.end, %for.body9
%indvars.iv4 = phi i64 [ 0, %for.end ], [ %indvars.iv.next5, %for.body9 ]
%1 = trunc i64 %indvars.iv4 to i32
%conv10 = sitofp i32 %1 to double
%arrayidx12 = getelementptr inbounds double, double* %D, i64 %indvars.iv4
store double %conv10, double* %arrayidx12, align 8
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond6 = icmp ne i64 %indvars.iv.next5, 10000
br i1 %exitcond6, label %for.body9, label %for.end15
for.end15: ; preds = %for.body9
%arrayidx17 = getelementptr inbounds double, double* %B, i64 %indvars.iv7
%2 = bitcast double* %arrayidx17 to i64*
%3 = load i64, i64* %2, align 8
%arrayidx19 = getelementptr inbounds double, double* %A, i64 %indvars.iv7
%4 = bitcast double* %arrayidx19 to i64*
store i64 %3, i64* %4, align 8
%arrayidx21 = getelementptr inbounds double, double* %D, i64 %indvars.iv7
%5 = bitcast double* %arrayidx21 to i64*
%6 = load i64, i64* %5, align 8
%arrayidx23 = getelementptr inbounds double, double* %C, i64 %indvars.iv7
%7 = bitcast double* %arrayidx23 to i64*
store i64 %6, i64* %7, align 8
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
%exitcond9 = icmp ne i64 %indvars.iv.next8, 10000
br i1 %exitcond9, label %for.body, label %for.end26
for.end26: ; preds = %for.end15
ret void
}

View File

@ -0,0 +1,84 @@
; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s
;
; Verify that the accesses are correctly expanded
;
; Original source code :
;
; #define Ni 2000
; #define Nj 3000
;
; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) {
; int i,j;
; for (j = 0; j < Nj; j++) {
; for (int i = 0; i<Ni; i++) {
; B[i] = i;
; D[i] = i;
; }
; A[j] = B[j];
; C[j] = D[j];
; }
; }
;
; Check that expanded SAI are created
; CHECK: double MemRef_B_Stmt_for_body4_expanded[10000][10000]; // Element size 8
; CHECK: double MemRef_D_Stmt_for_body4_expanded[10000][10000]; // Element size 8
; CHECK: i64 MemRef_A_Stmt_for_end_expanded[10000]; // Element size 8
; CHECK: i64 MemRef_C_Stmt_for_end_expanded[10000]; // Element size 8
;
; Check that the memory access are modified
;
; CHECK: new: { Stmt_for_body4[i0, i1] -> MemRef_B_Stmt_for_body4_expanded[i0, i1] };
; CHECK: new: { Stmt_for_body4[i0, i1] -> MemRef_D_Stmt_for_body4_expanded[i0, i1] };
; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body4_expanded[i0, i0] };
; CHECK: new: { Stmt_for_end[i0] -> MemRef_A_Stmt_for_end_expanded[i0] };
; CHECK: new: { Stmt_for_end[i0] -> MemRef_D_Stmt_for_body4_expanded[i0, i0] };
; CHECK: new: { Stmt_for_end[i0] -> MemRef_C_Stmt_for_end_expanded[i0] };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @mse(double* %A, double* %B, double* %C, double* %D) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.end
%indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ]
br label %for.body4
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
%0 = trunc i64 %indvars.iv to i32
%conv = sitofp i32 %0 to double
%arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv
store double %conv, double* %arrayidx, align 8
%1 = trunc i64 %indvars.iv to i32
%conv5 = sitofp i32 %1 to double
%arrayidx7 = getelementptr inbounds double, double* %D, i64 %indvars.iv
store double %conv5, double* %arrayidx7, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 10000
br i1 %exitcond, label %for.body4, label %for.end
for.end: ; preds = %for.body4
%arrayidx9 = getelementptr inbounds double, double* %B, i64 %indvars.iv3
%2 = bitcast double* %arrayidx9 to i64*
%3 = load i64, i64* %2, align 8
%arrayidx11 = getelementptr inbounds double, double* %A, i64 %indvars.iv3
%4 = bitcast double* %arrayidx11 to i64*
store i64 %3, i64* %4, align 8
%arrayidx13 = getelementptr inbounds double, double* %D, i64 %indvars.iv3
%5 = bitcast double* %arrayidx13 to i64*
%6 = load i64, i64* %5, align 8
%arrayidx15 = getelementptr inbounds double, double* %C, i64 %indvars.iv3
%7 = bitcast double* %arrayidx15 to i64*
store i64 %6, i64* %7, align 8
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
%exitcond5 = icmp ne i64 %indvars.iv.next4, 10000
br i1 %exitcond5, label %for.body, label %for.end18
for.end18: ; preds = %for.end
ret void
}