forked from OSchip/llvm-project
Ensure a correct order between memory accesses.
Ensure that the scalar write access corresponds to the result of a load instruction appears after the generic read access corresponds to the load instruction. llvm-svn: 186419
This commit is contained in:
parent
5a772dcd84
commit
63cc9467af
|
@ -292,7 +292,10 @@ class TempScopInfo : public FunctionPass {
|
|||
///
|
||||
/// @param Inst The instruction to be analyzed
|
||||
/// @param R The SCoP region
|
||||
void buildScalarDependences(Instruction *Inst, Region *R);
|
||||
///
|
||||
/// @return True if the Instruction is used in other BB and a scalar write
|
||||
/// Access is required.
|
||||
bool buildScalarDependences(Instruction *Inst, Region *R);
|
||||
|
||||
void buildAccessFunctions(Region &RefRegion, BasicBlock &BB);
|
||||
|
||||
|
|
|
@ -104,11 +104,11 @@ void TempScop::printDetail(raw_ostream &OS, ScalarEvolution *SE, LoopInfo *LI,
|
|||
}
|
||||
}
|
||||
|
||||
void TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) {
|
||||
bool TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) {
|
||||
// No need to translate these scalar dependences into polyhedral form, because
|
||||
// synthesizable scalars can be generated by the code generator.
|
||||
if (canSynthesize(Inst, LI, SE, R))
|
||||
return;
|
||||
return false;
|
||||
|
||||
bool AnyCrossStmtUse = false;
|
||||
BasicBlock *ParentBB = Inst->getParent();
|
||||
|
@ -147,12 +147,7 @@ void TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) {
|
|||
AccFuncMap[UseParent].push_back(std::make_pair(ScalarAccess, U));
|
||||
}
|
||||
|
||||
// If the Instruction is used outside the statement, we need to build the
|
||||
// write access.
|
||||
if (AnyCrossStmtUse) {
|
||||
IRAccess ScalarAccess(IRAccess::SCALARWRITE, Inst, ZeroOffset, 1, true);
|
||||
AccFuncMap[ParentBB].push_back(std::make_pair(ScalarAccess, Inst));
|
||||
}
|
||||
return AnyCrossStmtUse;
|
||||
}
|
||||
|
||||
IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R) {
|
||||
|
@ -190,8 +185,12 @@ void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB) {
|
|||
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
|
||||
Functions.push_back(std::make_pair(buildIRAccess(Inst, L, &R), Inst));
|
||||
|
||||
if (!isa<StoreInst>(Inst))
|
||||
buildScalarDependences(Inst, &R);
|
||||
if (!isa<StoreInst>(Inst) && buildScalarDependences(Inst, &R)) {
|
||||
// If the Instruction is used outside the statement, we need to build the
|
||||
// write access.
|
||||
IRAccess ScalarAccess(IRAccess::SCALARWRITE, Inst, ZeroOffset, 1, true);
|
||||
Functions.push_back(std::make_pair(ScalarAccess, Inst));
|
||||
}
|
||||
}
|
||||
|
||||
if (Functions.empty())
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init = *init_ptr;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; A[i] = init + 2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
|
||||
entry:
|
||||
br label %for.i
|
||||
|
||||
for.i: ; preds = %for.i.end, %entry
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next: ; preds = %for.i
|
||||
%init = load i64* %init_ptr
|
||||
; CHECK: BB: entry.next
|
||||
; CHECK: Read init_ptr[0]
|
||||
; CHECK: Write init[0]
|
||||
br label %for.j
|
||||
|
||||
for.j: ; preds = %for.j, %entry.next
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%init_plus_two = add i64 %init, 2
|
||||
; CHECK: Read init[0]
|
||||
; CHECK: Write A[{0,+,8}<%for.j>]
|
||||
%scevgep = getelementptr i64* %A, i64 %indvar.j
|
||||
store i64 %init_plus_two, i64* %scevgep
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end: ; preds = %for.j
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return: ; preds = %for.i.end
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -0,0 +1,58 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init = *init_ptr;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init2 = *init_ptr;
|
||||
; A[i] = init + init2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
|
||||
entry:
|
||||
br label %for.i
|
||||
|
||||
for.i: ; preds = %for.i.end, %entry
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next: ; preds = %for.i
|
||||
%init = load i64* %init_ptr
|
||||
; CHECK: BB: entry.next
|
||||
; CHECK: Read init_ptr[0]
|
||||
; CHECK: Write init[0]
|
||||
br label %for.j
|
||||
|
||||
for.j: ; preds = %for.j, %entry.next
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%init_2 = load i64* %init_ptr
|
||||
%init_sum = add i64 %init, %init_2
|
||||
; CHECK: BB: for.j
|
||||
; CHECK: Read init[0]
|
||||
; CHECK: Read init_ptr[0]
|
||||
; CHECK: Write A[{0,+,8}<%for.j>]
|
||||
%scevgep = getelementptr i64* %A, i64 %indvar.j
|
||||
store i64 %init_sum, i64* %scevgep
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end: ; preds = %for.j
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return: ; preds = %for.i.end
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -0,0 +1,52 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init = *init_ptr;
|
||||
; A[i] = init + 2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
|
||||
entry:
|
||||
br label %for.i
|
||||
|
||||
for.i: ; preds = %for.i.end, %entry
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next: ; preds = %for.i
|
||||
br label %for.j
|
||||
|
||||
for.j: ; preds = %for.j, %entry.next
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%init = load i64* %init_ptr
|
||||
%init_plus_two = add i64 %init, 2
|
||||
%scevgep = getelementptr i64* %A, i64 %indvar.j
|
||||
store i64 %init_plus_two, i64* %scevgep
|
||||
; CHECK: BB: for.j
|
||||
; CHECK: Read init_ptr[0]
|
||||
; CHECK: Write A[{0,+,8}<%for.j>]
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end: ; preds = %for.j
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return: ; preds = %for.i.end
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -0,0 +1,186 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev -analyze < %s | FileCheck %s
|
||||
|
||||
; ModuleID = 'scalar_to_array.ll'
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [1024 x float] zeroinitializer, align 8
|
||||
|
||||
; CHECK: empty
|
||||
; Function Attrs: nounwind
|
||||
define i32 @empty() #0 {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return: ; preds = %for.cond
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK: array_access
|
||||
; Function Attrs: nounwind
|
||||
define i32 @array_access() #0 {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%float = uitofp i64 %indvar to float
|
||||
store float %float, float* %arrayidx
|
||||
br label %for.inc
|
||||
; CHECK: BB: for.body
|
||||
; CHECK-NOT: Read
|
||||
; CHECK: Write A[{0,+,4}<%for.cond>]
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return: ; preds = %for.cond
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
; CHECK: intra_scop_dep
|
||||
define i32 @intra_scop_dep() #0 {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body.a, label %return
|
||||
|
||||
for.body.a: ; preds = %for.cond
|
||||
%arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%scalar = load float* %arrayidx
|
||||
br label %for.body.b
|
||||
; CHECK: BB: for.body.a
|
||||
; CHECK: Read A[{0,+,4}<%for.cond>]
|
||||
; CHECK: Write scalar[0]
|
||||
|
||||
for.body.b: ; preds = %for.body.a
|
||||
%arrayidx2 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%float = uitofp i64 %indvar to float
|
||||
%sum = fadd float %scalar, %float
|
||||
store float %sum, float* %arrayidx2
|
||||
br label %for.inc
|
||||
; CHECK: BB: for.body.b
|
||||
; CHECK: Read scalar[0]
|
||||
; CHECK: Write A[{0,+,4}<%for.cond>]
|
||||
|
||||
for.inc: ; preds = %for.body.b
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return: ; preds = %for.cond
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; It is not possible to have a scop which accesses a scalar element that is
|
||||
; a global variable. All global variables are pointers containing possibly
|
||||
; a single element. Hence they do not need to be handled anyways.
|
||||
; Please note that this is still required when scalar to array rewritting is
|
||||
; disabled.
|
||||
|
||||
; CHECK: use_after_scop
|
||||
; Function Attrs: nounwind
|
||||
define i32 @use_after_scop() #0 {
|
||||
entry:
|
||||
%scalar.s2a = alloca float
|
||||
fence seq_cst
|
||||
br label %for.head
|
||||
|
||||
for.head: ; preds = %for.inc, %entry
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.head
|
||||
%arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%scalar = load float* %arrayidx
|
||||
store float %scalar, float* %scalar.s2a
|
||||
; Escaped uses are still required to be rewritten to stack variable.
|
||||
; CHECK: BB: for.body
|
||||
; CHECK: Read A[{0,+,4}<%for.head>]
|
||||
; CHECK: Write scalar.s2a[0]
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.head, label %for.after
|
||||
|
||||
for.after: ; preds = %for.inc
|
||||
%scalar.loadoutside = load float* %scalar.s2a
|
||||
fence seq_cst
|
||||
%return_value = fptosi float %scalar.loadoutside to i32
|
||||
br label %return
|
||||
|
||||
return: ; preds = %for.after
|
||||
ret i32 %return_value
|
||||
}
|
||||
|
||||
; We currently do not transform scalar references, that have only read accesses
|
||||
; in the scop. There are two reasons for this:
|
||||
;
|
||||
; o We don't introduce additional memory references which may yield to compile
|
||||
; time overhead.
|
||||
; o For integer values, such a translation may block the use of scalar
|
||||
; evolution on those values.
|
||||
;
|
||||
; CHECK: before_scop
|
||||
; Function Attrs: nounwind
|
||||
define i32 @before_scop() #0 {
|
||||
entry:
|
||||
br label %preheader
|
||||
|
||||
preheader: ; preds = %entry
|
||||
%scalar = fadd float 4.000000e+00, 5.000000e+00
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %preheader
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
|
||||
store float %scalar, float* %arrayidx
|
||||
br label %for.inc
|
||||
; CHECK: BB: for.body
|
||||
; CHECK: Write A[{0,+,4}<%for.cond>]
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return: ; preds = %for.cond
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze -disable-polly-intra-scop-scalar-to-array < %s | FileCheck %s -check-prefix=SCALARACCESS
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
|
@ -25,9 +26,12 @@ for.i:
|
|||
|
||||
entry.next:
|
||||
; CHECK: BB: entry.next
|
||||
; SCALARACCESS: BB: entry.next
|
||||
%init = load i64* %init_ptr
|
||||
; CHECK: Read init_ptr[0]
|
||||
; CHECK: Write init.s2a[0]
|
||||
; SCALARACCESS: Read init_ptr[0]
|
||||
; SCALARACCESS: Write init[0]
|
||||
br label %for.j
|
||||
|
||||
for.j:
|
||||
|
@ -35,6 +39,10 @@ for.j:
|
|||
; CHECK: BB: for.j
|
||||
; CHECK: Read init.s2a[0]
|
||||
; CHECK: Write A[{0,+,8}<%for.j>]
|
||||
|
||||
; SCALARACCESS: BB: for.j
|
||||
; SCALARACCESS: Read init
|
||||
; SCALARACCESS: Write A[{0,+,8}<%for.j>]
|
||||
%init_plus_two = add i64 %init, 2
|
||||
%scevgep = getelementptr i64* %A, i64 %indvar.j
|
||||
store i64 %init_plus_two, i64* %scevgep
|
||||
|
@ -49,3 +57,38 @@ for.i.end:
|
|||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
|
||||
entry:
|
||||
br label %for.i
|
||||
|
||||
for.i:
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next:
|
||||
; SCALARACCESS: BB: entry.next
|
||||
%init = load i64* %init_ptr
|
||||
; SCALARACCESS: Read init_ptr[0]
|
||||
; SCALARACCESS: Write init[0]
|
||||
br label %for.j
|
||||
|
||||
for.j:
|
||||
; SCALARACCESS: BB: for.j
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%scevgep = getelementptr i64* %A, i64 %indvar.j
|
||||
store i64 %init, i64* %scevgep
|
||||
; SCALARACCESS: Read init
|
||||
; SCALARACCESS: Write A[{0,+,8}<%for.j>]
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end:
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue