forked from OSchip/llvm-project
Annotate reduction parallel loops in the IslAst textual output
+ Introduced dependency type TYPE_TC_RED to represent the transitive closure (& the reverse) of reduction dependences. These are used when we check for reduction parallel loops. + Test cases including loop reversals and modulo schedules which compute reductions in a alternated order. llvm-svn: 213019
This commit is contained in:
parent
f171cf23b8
commit
457f73eaee
|
@ -49,6 +49,9 @@ struct IslAstUser {
|
|||
|
||||
// The node is the innermost parallel loop.
|
||||
int IsInnermostParallel;
|
||||
|
||||
// The node is only parallel because of reductions
|
||||
bool IsReductionParallel;
|
||||
};
|
||||
|
||||
class IslAstInfo : public ScopPass {
|
||||
|
@ -87,7 +90,7 @@ static inline bool isInnermostParallel(__isl_keep isl_ast_node *Node) {
|
|||
|
||||
bool Res = false;
|
||||
if (Info)
|
||||
Res = Info->IsInnermostParallel;
|
||||
Res = Info->IsInnermostParallel && !Info->IsReductionParallel;
|
||||
isl_id_free(Id);
|
||||
return Res;
|
||||
}
|
||||
|
@ -101,7 +104,7 @@ static inline bool isOutermostParallel(__isl_keep isl_ast_node *Node) {
|
|||
|
||||
bool Res = false;
|
||||
if (Info)
|
||||
Res = Info->IsOutermostParallel;
|
||||
Res = Info->IsOutermostParallel && !Info->IsReductionParallel;
|
||||
isl_id_free(Id);
|
||||
return Res;
|
||||
}
|
||||
|
|
|
@ -47,23 +47,30 @@ public:
|
|||
|
||||
/// @brief The type of the dependences.
|
||||
///
|
||||
/// Reduction dependences are seperated because they can be ignored during
|
||||
/// the scheduling. This is the case since the order in which the reduction
|
||||
/// statements are executed does not matter. However, if they are executed
|
||||
/// in parallel we need to take additional measures (e.g., privatization)
|
||||
/// to ensure a correct result.
|
||||
/// Reduction dependences are separated from RAW/WAW/WAR dependences because
|
||||
/// we can ignore them during the scheduling. This is the case since the order
|
||||
/// in which the reduction statements are executed does not matter. However,
|
||||
/// if they are executed in parallel we need to take additional measures
|
||||
/// (e.g, privatization) to ensure a correct result. The (reverse) transitive
|
||||
/// closure of the reduction dependences are used to check for parallel
|
||||
/// executed reduction statements during code generation. These dependences
|
||||
/// connect all instances of a reduction with each other, they are therefor
|
||||
/// cyclic and possibly "reversed".
|
||||
enum Type {
|
||||
// Write after read
|
||||
TYPE_WAR = 0x1,
|
||||
TYPE_WAR = 1 << 0,
|
||||
|
||||
// Read after write
|
||||
TYPE_RAW = 0x2,
|
||||
TYPE_RAW = 1 << 1,
|
||||
|
||||
// Write after write
|
||||
TYPE_WAW = 0x4,
|
||||
TYPE_WAW = 1 << 2,
|
||||
|
||||
// Reduction dependences
|
||||
TYPE_RED = 0x8,
|
||||
TYPE_RED = 1 << 3,
|
||||
|
||||
// Transitive closure of the reduction dependences (& the reverse)
|
||||
TYPE_TC_RED = 1 << 4,
|
||||
};
|
||||
|
||||
typedef std::map<ScopStmt *, isl_map *> StatementToIslMapTy;
|
||||
|
@ -114,6 +121,9 @@ private:
|
|||
/// @brief The map of reduction dependences
|
||||
isl_union_map *RED = nullptr;
|
||||
|
||||
/// @brief The (reverse) transitive closure of reduction dependences
|
||||
isl_union_map *TC_RED = nullptr;
|
||||
|
||||
/// @brief Collect information about the SCoP.
|
||||
void collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write,
|
||||
isl_union_map **MayWrite, isl_union_map **AccessSchedule,
|
||||
|
|
|
@ -124,6 +124,15 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read,
|
|||
}
|
||||
}
|
||||
|
||||
/// @brief Fix all dimension of @p Zero to 0 and add it to @p user
|
||||
static int fixSetToZero(__isl_take isl_set *Zero, void *user) {
|
||||
isl_union_set **User = (isl_union_set **)user;
|
||||
for (unsigned i = 0; i < isl_set_dim(Zero, isl_dim_set); i++)
|
||||
Zero = isl_set_fix_si(Zero, isl_dim_set, i, 0);
|
||||
*User = isl_union_set_add_set(*User, Zero);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// @brief Compute the privatization dependences for a given dependency @p Map
|
||||
///
|
||||
/// Privatization dependences are widened original dependences which originate
|
||||
|
@ -164,14 +173,34 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read,
|
|||
/// S1[i0] -> S2[] : i0 >= 0 and i0 <= 1023}
|
||||
/// RED:
|
||||
/// { S1[i0] -> S1[1 + i0] : i0 >= 0 and i0 <= 1022 }
|
||||
///
|
||||
/// Note: This function also computes the (reverse) transitive closure of the
|
||||
/// reduction dependences.
|
||||
void Dependences::addPrivatizationDependences() {
|
||||
isl_union_map *PrivRAW, *PrivWAW, *PrivWAR, *TransClosure;
|
||||
isl_union_map *PrivRAW, *PrivWAW, *PrivWAR;
|
||||
|
||||
// The transitive closure might be over approximated but we only use it to
|
||||
// compute the privatization dependences. Thus, overapproximation will lead
|
||||
// "only" to more conservative privatization dependences.
|
||||
// FIXME: Take precautions to ensure only forward dependences are created.
|
||||
TransClosure = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0);
|
||||
// The transitive closure might be over approximated, thus could lead to
|
||||
// dependency cycles in the privatization dependences. To make sure this
|
||||
// will not happen we remove all negative dependences after we computed
|
||||
// the transitive closure.
|
||||
TC_RED = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0);
|
||||
|
||||
// FIXME: Apply the current schedule instead of assuming the identity schedule
|
||||
// here. The current approach is only valid as long as we compute the
|
||||
// dependences only with the initial (identity schedule). Any other
|
||||
// schedule could change "the direction of the backward depenendes" we
|
||||
// want to eliminate here.
|
||||
isl_union_set *UDeltas = isl_union_map_deltas(isl_union_map_copy(TC_RED));
|
||||
isl_union_set *Universe = isl_union_set_universe(isl_union_set_copy(UDeltas));
|
||||
isl_union_set *Zero = isl_union_set_empty(isl_union_set_get_space(Universe));
|
||||
isl_union_set_foreach_set(Universe, fixSetToZero, &Zero);
|
||||
isl_union_map *NonPositive = isl_union_set_lex_le_union_set(UDeltas, Zero);
|
||||
|
||||
TC_RED = isl_union_map_subtract(TC_RED, NonPositive);
|
||||
|
||||
TC_RED = isl_union_map_union(
|
||||
TC_RED, isl_union_map_reverse(isl_union_map_copy(TC_RED)));
|
||||
TC_RED = isl_union_map_coalesce(TC_RED);
|
||||
|
||||
isl_union_map **Maps[] = {&RAW, &WAW, &WAR};
|
||||
isl_union_map **PrivMaps[] = {&PrivRAW, &PrivWAW, &PrivWAR};
|
||||
|
@ -179,15 +208,15 @@ void Dependences::addPrivatizationDependences() {
|
|||
isl_union_map **Map = Maps[u], **PrivMap = PrivMaps[u];
|
||||
|
||||
*PrivMap = isl_union_map_apply_range(isl_union_map_copy(*Map),
|
||||
isl_union_map_copy(TransClosure));
|
||||
isl_union_map_copy(TC_RED));
|
||||
*PrivMap = isl_union_map_union(
|
||||
*PrivMap, isl_union_map_apply_range(isl_union_map_copy(TransClosure),
|
||||
*PrivMap, isl_union_map_apply_range(isl_union_map_copy(TC_RED),
|
||||
isl_union_map_copy(*Map)));
|
||||
|
||||
*Map = isl_union_map_union(*Map, *PrivMap);
|
||||
}
|
||||
|
||||
isl_union_map_free(TransClosure);
|
||||
isl_union_set_free(Universe);
|
||||
}
|
||||
|
||||
void Dependences::calculateDependences(Scop &S) {
|
||||
|
@ -330,6 +359,7 @@ void Dependences::calculateDependences(Scop &S) {
|
|||
WAW = isl_union_map_zip(WAW);
|
||||
WAR = isl_union_map_zip(WAR);
|
||||
RED = isl_union_map_zip(RED);
|
||||
TC_RED = isl_union_map_zip(TC_RED);
|
||||
|
||||
DEBUG(dbgs() << "Zipped Dependences:\n"; printScop(dbgs()); dbgs() << "\n");
|
||||
|
||||
|
@ -337,6 +367,7 @@ void Dependences::calculateDependences(Scop &S) {
|
|||
WAW = isl_union_set_unwrap(isl_union_map_domain(WAW));
|
||||
WAR = isl_union_set_unwrap(isl_union_map_domain(WAR));
|
||||
RED = isl_union_set_unwrap(isl_union_map_domain(RED));
|
||||
TC_RED = isl_union_set_unwrap(isl_union_map_domain(TC_RED));
|
||||
|
||||
DEBUG(dbgs() << "Unwrapped Dependences:\n"; printScop(dbgs());
|
||||
dbgs() << "\n");
|
||||
|
@ -349,6 +380,7 @@ void Dependences::calculateDependences(Scop &S) {
|
|||
WAW = isl_union_map_coalesce(WAW);
|
||||
WAR = isl_union_map_coalesce(WAR);
|
||||
RED = isl_union_map_coalesce(RED);
|
||||
TC_RED = isl_union_map_coalesce(TC_RED);
|
||||
|
||||
DEBUG(printScop(dbgs()));
|
||||
}
|
||||
|
@ -495,6 +527,8 @@ void Dependences::printScop(raw_ostream &OS) const {
|
|||
printDependencyMap(OS, WAW);
|
||||
OS << "\tReduction dependences:\n\t\t";
|
||||
printDependencyMap(OS, RED);
|
||||
OS << "\tTransitive closure of reduction dependences:\n\t\t";
|
||||
printDependencyMap(OS, TC_RED);
|
||||
}
|
||||
|
||||
void Dependences::releaseMemory() {
|
||||
|
@ -502,8 +536,9 @@ void Dependences::releaseMemory() {
|
|||
isl_union_map_free(WAR);
|
||||
isl_union_map_free(WAW);
|
||||
isl_union_map_free(RED);
|
||||
isl_union_map_free(TC_RED);
|
||||
|
||||
RED = RAW = WAR = WAW = nullptr;
|
||||
RED = RAW = WAR = WAW = TC_RED = nullptr;
|
||||
}
|
||||
|
||||
isl_union_map *Dependences::getDependences(int Kinds) {
|
||||
|
@ -523,6 +558,9 @@ isl_union_map *Dependences::getDependences(int Kinds) {
|
|||
if (Kinds & TYPE_RED)
|
||||
Deps = isl_union_map_union(Deps, isl_union_map_copy(RED));
|
||||
|
||||
if (Kinds & TYPE_TC_RED)
|
||||
Deps = isl_union_map_union(Deps, isl_union_map_copy(TC_RED));
|
||||
|
||||
Deps = isl_union_map_coalesce(Deps);
|
||||
Deps = isl_union_map_detect_equalities(Deps);
|
||||
return Deps;
|
||||
|
|
|
@ -91,11 +91,15 @@ printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
|
|||
if (Info->IsInnermostParallel) {
|
||||
Printer = isl_printer_start_line(Printer);
|
||||
Printer = isl_printer_print_str(Printer, "#pragma simd");
|
||||
if (Info->IsReductionParallel)
|
||||
Printer = isl_printer_print_str(Printer, " reduction");
|
||||
Printer = isl_printer_end_line(Printer);
|
||||
}
|
||||
if (Info->IsOutermostParallel) {
|
||||
Printer = isl_printer_start_line(Printer);
|
||||
Printer = isl_printer_print_str(Printer, "#pragma omp parallel for");
|
||||
if (Info->IsReductionParallel)
|
||||
Printer = isl_printer_print_str(Printer, " reduction");
|
||||
Printer = isl_printer_end_line(Printer);
|
||||
}
|
||||
}
|
||||
|
@ -124,6 +128,7 @@ static struct IslAstUser *allocateIslAstUser() {
|
|||
NodeInfo->Context = 0;
|
||||
NodeInfo->IsOutermostParallel = 0;
|
||||
NodeInfo->IsInnermostParallel = 0;
|
||||
NodeInfo->IsReductionParallel = false;
|
||||
return NodeInfo;
|
||||
}
|
||||
|
||||
|
@ -148,25 +153,17 @@ static void freeIslAstUser(void *Ptr) {
|
|||
// dimension if it is a subset of a map with equal values for the current
|
||||
// dimension.
|
||||
static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
|
||||
Dependences *D) {
|
||||
isl_union_map *Schedule, *Deps;
|
||||
__isl_take isl_union_map *Deps) {
|
||||
isl_union_map *Schedule;
|
||||
isl_map *ScheduleDeps, *Test;
|
||||
isl_space *ScheduleSpace;
|
||||
unsigned Dimension, IsParallel;
|
||||
|
||||
if (!D->hasValidDependences()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Schedule = isl_ast_build_get_schedule(Build);
|
||||
ScheduleSpace = isl_ast_build_get_schedule_space(Build);
|
||||
|
||||
Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1;
|
||||
|
||||
// FIXME: We can remove ignore reduction dependences in case we privatize the
|
||||
// memory locations the reduction statements reduce into.
|
||||
Deps = D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
|
||||
Dependences::TYPE_WAR | Dependences::TYPE_RED);
|
||||
Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule));
|
||||
Deps = isl_union_map_apply_domain(Deps, Schedule);
|
||||
|
||||
|
@ -192,6 +189,35 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
|
|||
return IsParallel;
|
||||
}
|
||||
|
||||
/// @brief Check if the current scheduling dimension is parallel
|
||||
///
|
||||
/// In case the dimension is parallel we also check if any reduction
|
||||
/// dependences is broken when we exploit this parallelism. If so,
|
||||
/// @p IsReductionParallel will be set to true. The reduction dependences we use
|
||||
/// to check are actually the union of the transitive closure of the initial
|
||||
/// reduction dependences together with their reveresal. Even though these
|
||||
/// dependences connect all iterations with each other (thus they are cyclic)
|
||||
/// we can perform the parallelism check as we are only interested in a zero
|
||||
/// (or non-zero) dependence distance on the dimension in question.
|
||||
static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
|
||||
Dependences *D,
|
||||
bool &IsReductionParallel) {
|
||||
if (!D->hasValidDependences())
|
||||
return false;
|
||||
|
||||
isl_union_map *Deps = D->getDependences(
|
||||
Dependences::TYPE_RAW | Dependences::TYPE_WAW | Dependences::TYPE_WAR);
|
||||
if (!astScheduleDimIsParallel(Build, Deps))
|
||||
return false;
|
||||
|
||||
isl_union_map *RedDeps =
|
||||
D->getDependences(Dependences::TYPE_TC_RED);
|
||||
if (!astScheduleDimIsParallel(Build, RedDeps))
|
||||
IsReductionParallel = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Mark a for node openmp parallel, if it is the outermost parallel for node.
|
||||
static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
|
||||
struct AstBuildUserInfo *BuildInfo,
|
||||
|
@ -199,7 +225,8 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
|
|||
if (BuildInfo->InParallelFor)
|
||||
return;
|
||||
|
||||
if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) {
|
||||
if (astScheduleDimIsParallel(Build, BuildInfo->Deps,
|
||||
NodeInfo->IsReductionParallel)) {
|
||||
BuildInfo->InParallelFor = 1;
|
||||
NodeInfo->IsOutermostParallel = 1;
|
||||
}
|
||||
|
@ -284,7 +311,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
|
|||
if (Info->IsOutermostParallel)
|
||||
BuildInfo->InParallelFor = 0;
|
||||
if (!containsLoops(isl_ast_node_for_get_body(Node)))
|
||||
if (astScheduleDimIsParallel(Build, BuildInfo->Deps))
|
||||
if (astScheduleDimIsParallel(Build, BuildInfo->Deps,
|
||||
Info->IsReductionParallel))
|
||||
Info->IsInnermostParallel = 1;
|
||||
if (!Info->Context)
|
||||
Info->Context = isl_ast_build_copy(Build);
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s
|
||||
;
|
||||
; This loopnest contains a reduction which imposes the same dependences as the
|
||||
; accesses to the array A. We need to ensure we keep the dependences of A.
|
||||
;
|
||||
; CHECK: RAW dependences:
|
||||
; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 }
|
||||
; CHECK: WAR dependences:
|
||||
; CHECK: { }
|
||||
; CHECK: WAW dependences:
|
||||
; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 }
|
||||
; CHECK: Reduction dependences:
|
||||
; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 <= 1022 and i0 >= 0 }
|
||||
;
|
||||
;
|
||||
; void AandSum(int *restrict sum, int *restrict A) {
|
||||
; for (int i = 0; i < 1024; i++) {
|
||||
; A[i] = A[i] + A[i - 1];
|
||||
; A[i - 1] = A[i] + A[i - 2];
|
||||
; *sum += i;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @AandSum(i32* noalias %sum, i32* noalias %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %i.0, 1024
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%sub = add nsw i32 %i.0, -1
|
||||
%arrayidx1 = getelementptr inbounds i32* %A, i32 %sub
|
||||
%tmp1 = load i32* %arrayidx1, align 4
|
||||
%add = add nsw i32 %tmp, %tmp1
|
||||
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
%sub4 = add nsw i32 %i.0, -2
|
||||
%arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4
|
||||
%tmp2 = load i32* %arrayidx5, align 4
|
||||
%add6 = add nsw i32 %add, %tmp2
|
||||
%sub7 = add nsw i32 %i.0, -1
|
||||
%arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7
|
||||
store i32 %add6, i32* %arrayidx8, align 4
|
||||
%tmp3 = load i32* %sum, align 4
|
||||
%add9 = add nsw i32 %tmp3, %i.0
|
||||
store i32 %add9, i32* %sum, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -9,7 +9,7 @@
|
|||
; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2
|
||||
; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0
|
||||
; CHECK: Reduction dependences:
|
||||
; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 }
|
||||
; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 <= 1022 and i0 >= 0 }
|
||||
;
|
||||
; void f(int *sum) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
|
|
|
@ -53,7 +53,6 @@ ret:
|
|||
; Note that we do not delinearize this access function because it is considered
|
||||
; to already be affine: {{0,+,4}<%loop.i>,+,4096}<%loop.j>.
|
||||
|
||||
;
|
||||
; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
|
||||
; CHECK: #pragma simd
|
||||
; CHECK: #pragma omp parallel for
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; This loopnest contains a reduction which imposes the same dependences as the
|
||||
; accesses to the array A. We need to ensure we do __not__ parallelize anything
|
||||
; here.
|
||||
;
|
||||
; CHECK-NOT: pragma
|
||||
; CHECK-NOT: reduction
|
||||
;
|
||||
; void AandSum(int *restrict sum, int *restrict A) {
|
||||
; for (int i = 0; i < 1024; i++) {
|
||||
; A[i] = A[i] + A[i - 1];
|
||||
; A[i - 1] = A[i] + A[i - 2];
|
||||
; *sum += i;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @AandSum(i32* noalias %sum, i32* noalias %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %i.0, 1024
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%sub = add nsw i32 %i.0, -1
|
||||
%arrayidx1 = getelementptr inbounds i32* %A, i32 %sub
|
||||
%tmp1 = load i32* %arrayidx1, align 4
|
||||
%add = add nsw i32 %tmp, %tmp1
|
||||
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
%sub4 = add nsw i32 %i.0, -2
|
||||
%arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4
|
||||
%tmp2 = load i32* %arrayidx5, align 4
|
||||
%add6 = add nsw i32 %add, %tmp2
|
||||
%sub7 = add nsw i32 %i.0, -1
|
||||
%arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7
|
||||
store i32 %add6, i32* %arrayidx8, align 4
|
||||
%tmp3 = load i32* %sum, align 4
|
||||
%add9 = add nsw i32 %tmp3, %i.0
|
||||
store i32 %add9, i32* %sum, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; Verify that we won't privatize anything in the outer dimension
|
||||
;
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
;
|
||||
; void foo(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @foo(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc4, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end6
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc4
|
||||
|
||||
for.inc4: ; preds = %for.end
|
||||
%inc5 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end6: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT: #pragma simd{{\s*$}}
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: Stmt_S0(n - c1)
|
||||
; CHECK: #pragma simd{{\s*$}}
|
||||
; CHECK: Stmt_S1(n - c1)
|
||||
;
|
||||
; void rlr(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S0: A[0] += i;
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S1: A[i + 1] = 1;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rlr(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %S0
|
||||
|
||||
S0: ; preds = %for.body
|
||||
%tmp = load i32* %A, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %A, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %S0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
br label %for.cond2
|
||||
|
||||
for.cond2: ; preds = %for.inc8, %for.end
|
||||
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
|
||||
%mul3 = shl nsw i32 %n, 1
|
||||
%cmp4 = icmp slt i32 %i1.0, %mul3
|
||||
br i1 %cmp4, label %for.body5, label %for.end10
|
||||
|
||||
for.body5: ; preds = %for.cond2
|
||||
br label %S1
|
||||
|
||||
S1: ; preds = %for.body5
|
||||
%add6 = add nsw i32 %i1.0, 1
|
||||
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
|
||||
store i32 1, i32* %arrayidx7, align 4
|
||||
br label %for.inc8
|
||||
|
||||
for.inc8: ; preds = %S1
|
||||
%inc9 = add nsw i32 %i1.0, 1
|
||||
br label %for.cond2
|
||||
|
||||
for.end10: ; preds = %for.cond2
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT: #pragma simd{{\s*$}}
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: Stmt_S0(2 * n - c1)
|
||||
; CHECK: #pragma simd{{\s*$}}
|
||||
; CHECK: Stmt_S1
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: Stmt_S0(2 * n - c1)
|
||||
; CHECK-NOT: #pragma simd{{\s*$}}
|
||||
;
|
||||
; void rmalrs(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S0: A[0] += i;
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S1: A[i + 1] = 1;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmalrs(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %S0
|
||||
|
||||
S0: ; preds = %for.body
|
||||
%tmp = load i32* %A, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %A, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %S0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
br label %for.cond2
|
||||
|
||||
for.cond2: ; preds = %for.inc8, %for.end
|
||||
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
|
||||
%mul3 = shl nsw i32 %n, 1
|
||||
%cmp4 = icmp slt i32 %i1.0, %mul3
|
||||
br i1 %cmp4, label %for.body5, label %for.end10
|
||||
|
||||
for.body5: ; preds = %for.cond2
|
||||
br label %S1
|
||||
|
||||
S1: ; preds = %for.body5
|
||||
%add6 = add nsw i32 %i1.0, 1
|
||||
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
|
||||
store i32 1, i32* %arrayidx7, align 4
|
||||
br label %for.inc8
|
||||
|
||||
for.inc8: ; preds = %S1
|
||||
%inc9 = add nsw i32 %i1.0, 1
|
||||
br label %for.cond2
|
||||
|
||||
for.end10: ; preds = %for.cond2
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: #pragma omp parallel for reduction
|
||||
; CHECK: for (int c0 = 0; c0 <= 2; c0 += 1) {
|
||||
; CHECK: if (c0 == 2) {
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2)
|
||||
; CHECK: Stmt_S0(c1);
|
||||
; CHECK: } else if (c0 == 1) {
|
||||
; CHECK: #pragma simd
|
||||
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
|
||||
; CHECK: Stmt_S1(c1);
|
||||
; CHECK: } else
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c1 = -2 * n + 2; c1 <= 0; c1 += 2)
|
||||
; CHECK: Stmt_S0(-c1);
|
||||
; CHECK: }
|
||||
;
|
||||
; void rmalrs2(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S0: A[0] += i;
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S1: A[i + 1] = 1;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmalrs2(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %S0
|
||||
|
||||
S0: ; preds = %for.body
|
||||
%tmp = load i32* %A, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %A, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %S0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
br label %for.cond2
|
||||
|
||||
for.cond2: ; preds = %for.inc8, %for.end
|
||||
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
|
||||
%mul3 = shl nsw i32 %n, 1
|
||||
%cmp4 = icmp slt i32 %i1.0, %mul3
|
||||
br i1 %cmp4, label %for.body5, label %for.end10
|
||||
|
||||
for.body5: ; preds = %for.cond2
|
||||
br label %S1
|
||||
|
||||
S1: ; preds = %for.body5
|
||||
%add6 = add nsw i32 %i1.0, 1
|
||||
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
|
||||
store i32 1, i32* %arrayidx7, align 4
|
||||
br label %for.inc8
|
||||
|
||||
for.inc8: ; preds = %S1
|
||||
%inc9 = add nsw i32 %i1.0, 1
|
||||
br label %for.cond2
|
||||
|
||||
for.end10: ; preds = %for.cond2
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT: #pragma simd{{\s*$}}
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: Stmt_S0
|
||||
; CHECK: #pragma simd{{\s*$}}
|
||||
; CHECK: Stmt_S1
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: Stmt_S0
|
||||
; CHECK-NOT: #pragma simd{{\s*$}}
|
||||
;
|
||||
; void rms(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S0: A[0] += i;
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; S1: A[i + 1] = 1;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rms(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %S0
|
||||
|
||||
S0: ; preds = %for.body
|
||||
%tmp = load i32* %A, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %A, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %S0
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
br label %for.cond2
|
||||
|
||||
for.cond2: ; preds = %for.inc8, %for.end
|
||||
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
|
||||
%mul3 = shl nsw i32 %n, 1
|
||||
%cmp4 = icmp slt i32 %i1.0, %mul3
|
||||
br i1 %cmp4, label %for.body5, label %for.end10
|
||||
|
||||
for.body5: ; preds = %for.cond2
|
||||
br label %S1
|
||||
|
||||
S1: ; preds = %for.body5
|
||||
%add6 = add nsw i32 %i1.0, 1
|
||||
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
|
||||
store i32 1, i32* %arrayidx7, align 4
|
||||
br label %for.inc8
|
||||
|
||||
for.inc8: ; preds = %S1
|
||||
%inc9 = add nsw i32 %i1.0, 1
|
||||
br label %for.cond2
|
||||
|
||||
for.end10: ; preds = %for.cond2
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c0 = 0; c0 <= 1; c0 += 1) {
|
||||
; CHECK: if (c0 == 1) {
|
||||
; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: } else
|
||||
; CHECK: for (int c1 = 0; c1 < 2 * n - 1; c1 += 2)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: }
|
||||
;
|
||||
; void rmsmd(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmsmd(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc4, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end6
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc4
|
||||
|
||||
for.inc4: ; preds = %for.end
|
||||
%inc5 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end6: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; Verify that the outer dimension doesnt't carry reduction dependences
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) {
|
||||
; CHECK: if (c1 % 2 == 0) {
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: } else
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: }
|
||||
;
|
||||
; void rmsmd2(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmsmd2(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc4, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end6
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc4
|
||||
|
||||
for.inc4: ; preds = %for.end
|
||||
%inc5 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end6: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; Verify that the outer dimension doesnt't carry reduction dependences
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) {
|
||||
; CHECK: if (c3 % 2 == 0) {
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: } else
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: }
|
||||
;
|
||||
; void rmsmd3(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmsmd3(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc4, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end6
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc4
|
||||
|
||||
for.inc4: ; preds = %for.end
|
||||
%inc5 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end6: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; Verify that the outer dimension doesnt't carry reduction dependences
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c3 = -1022; c3 <= 1023; c3 += 1) {
|
||||
; CHECK: if ((c3 + 1022) % 2 == 0 && c3 <= 0) {
|
||||
; CHECK: Stmt_for_body3(c1, -c3);
|
||||
; CHECK: } else if ((c3 + 1023) % 2 == 0 && c3 >= 1)
|
||||
; CHECK: Stmt_for_body3(c1, c3);
|
||||
; CHECK: }
|
||||
;
|
||||
; void rmsmd4(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmsmd4(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc4, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end6
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc4
|
||||
|
||||
for.inc4: ; preds = %for.end
|
||||
%inc5 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end6: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; Verify that only the outer dimension needs privatization
|
||||
;
|
||||
; CHECK: #pragma omp parallel for reduction
|
||||
; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) {
|
||||
; CHECK: if (c1 % 2 == 0) {
|
||||
; CHECK-NOT: #pragma simd reduction
|
||||
; CHECK: #pragma simd
|
||||
; CHECK: for (int c3 = 0; c3 < 2 * n; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(c3, c1);
|
||||
; CHECK: } else
|
||||
; CHECK-NOT: #pragma simd reduction
|
||||
; CHECK: #pragma simd
|
||||
; CHECK: for (int c3 = -2 * n + 1; c3 <= 0; c3 += 1)
|
||||
; CHECK: Stmt_for_body3(-c3, c1);
|
||||
; CHECK: }
|
||||
;
|
||||
; void rmsmd5(int *A, long n) {
|
||||
; for (long i = 0; i < 2 * n; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmsmd5(i32* %A, i32 %n) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc4, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
||||
%mul = shl nsw i32 %n, 1
|
||||
%cmp = icmp slt i32 %i.0, %mul
|
||||
br i1 %cmp, label %for.body, label %for.end6
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc4
|
||||
|
||||
for.inc4: ; preds = %for.end
|
||||
%inc5 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end6: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
|
||||
; CHECK: Stmt_for_body6(c1, c3, c5);
|
||||
;
|
||||
; void rmd(int *A) {
|
||||
; for (long i = 0; i < 2048; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; for (long k = 0; k < 512; k++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmd(i32* %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc10, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
|
||||
%exitcond2 = icmp ne i32 %i.0, 2048
|
||||
br i1 %exitcond2, label %for.body, label %for.end12
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc7, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
|
||||
%exitcond1 = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond1, label %for.body3, label %for.end9
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
br label %for.cond4
|
||||
|
||||
for.cond4: ; preds = %for.inc, %for.body3
|
||||
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %k.0, 512
|
||||
br i1 %exitcond, label %for.body6, label %for.end
|
||||
|
||||
for.body6: ; preds = %for.cond4
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body6
|
||||
%inc = add nsw i32 %k.0, 1
|
||||
br label %for.cond4
|
||||
|
||||
for.end: ; preds = %for.cond4
|
||||
br label %for.inc7
|
||||
|
||||
for.inc7: ; preds = %for.end
|
||||
%inc8 = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end9: ; preds = %for.cond1
|
||||
br label %for.inc10
|
||||
|
||||
for.inc10: ; preds = %for.end9
|
||||
%inc11 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
|
||||
; CHECK: Stmt_for_body6(c1, c3, c5);
|
||||
;
|
||||
; void rmd2(int *A) {
|
||||
; for (long i = 0; i < 2048; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; for (long k = 0; k < 512; k++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmd2(i32* %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc10, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
|
||||
%exitcond2 = icmp ne i32 %i.0, 2048
|
||||
br i1 %exitcond2, label %for.body, label %for.end12
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc7, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
|
||||
%exitcond1 = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond1, label %for.body3, label %for.end9
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
br label %for.cond4
|
||||
|
||||
for.cond4: ; preds = %for.inc, %for.body3
|
||||
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %k.0, 512
|
||||
br i1 %exitcond, label %for.body6, label %for.end
|
||||
|
||||
for.body6: ; preds = %for.cond4
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body6
|
||||
%inc = add nsw i32 %k.0, 1
|
||||
br label %for.cond4
|
||||
|
||||
for.end: ; preds = %for.cond4
|
||||
br label %for.inc7
|
||||
|
||||
for.inc7: ; preds = %for.end
|
||||
%inc8 = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end9: ; preds = %for.cond1
|
||||
br label %for.inc10
|
||||
|
||||
for.inc10: ; preds = %for.end9
|
||||
%inc11 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
|
||||
; CHECK: Stmt_for_body6(c1, c3, c5);
|
||||
;
|
||||
; void rmd3(int *A) {
|
||||
; for (long i = 0; i < 2048; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; for (long k = 0; k < 512; k++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmd3(i32* %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc10, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
|
||||
%exitcond2 = icmp ne i32 %i.0, 2048
|
||||
br i1 %exitcond2, label %for.body, label %for.end12
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc7, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
|
||||
%exitcond1 = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond1, label %for.body3, label %for.end9
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
br label %for.cond4
|
||||
|
||||
for.cond4: ; preds = %for.inc, %for.body3
|
||||
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %k.0, 512
|
||||
br i1 %exitcond, label %for.body6, label %for.end
|
||||
|
||||
for.body6: ; preds = %for.cond4
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body6
|
||||
%inc = add nsw i32 %k.0, 1
|
||||
br label %for.cond4
|
||||
|
||||
for.end: ; preds = %for.cond4
|
||||
br label %for.inc7
|
||||
|
||||
for.inc7: ; preds = %for.end
|
||||
%inc8 = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end9: ; preds = %for.cond1
|
||||
br label %for.inc10
|
||||
|
||||
for.inc10: ; preds = %for.end9
|
||||
%inc11 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-NOT:#pragma omp parallel for reduction
|
||||
; CHECK: #pragma omp parallel for
|
||||
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
|
||||
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
|
||||
; CHECK: #pragma simd reduction
|
||||
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
|
||||
; CHECK: Stmt_for_body6(c1, c3, c5);
|
||||
;
|
||||
; void rmd4(int *A) {
|
||||
; for (long i = 0; i < 2048; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; for (long k = 0; k < 512; k++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
define void @rmd4(i32* %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc10, %entry
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
|
||||
%exitcond2 = icmp ne i32 %i.0, 2048
|
||||
br i1 %exitcond2, label %for.body, label %for.end12
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc7, %for.body
|
||||
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
|
||||
%exitcond1 = icmp ne i32 %j.0, 1024
|
||||
br i1 %exitcond1, label %for.body3, label %for.end9
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
br label %for.cond4
|
||||
|
||||
for.cond4: ; preds = %for.inc, %for.body3
|
||||
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %k.0, 512
|
||||
br i1 %exitcond, label %for.body6, label %for.end
|
||||
|
||||
for.body6: ; preds = %for.cond4
|
||||
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
|
||||
%tmp = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp, %i.0
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body6
|
||||
%inc = add nsw i32 %k.0, 1
|
||||
br label %for.cond4
|
||||
|
||||
for.end: ; preds = %for.cond4
|
||||
br label %for.inc7
|
||||
|
||||
for.inc7: ; preds = %for.end
|
||||
%inc8 = add nsw i32 %j.0, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end9: ; preds = %for.cond1
|
||||
br label %for.inc10
|
||||
|
||||
for.inc10: ; preds = %for.end9
|
||||
%inc11 = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end10",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S0",
|
||||
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, n - i0, 0] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S1",
|
||||
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, n - i0, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end10",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S0",
|
||||
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, -i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S1",
|
||||
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end10",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S0",
|
||||
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, 2 * n - i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, 2 * n - i0, 0]: i0 % 2 = 1 }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S1",
|
||||
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "for.cond => for.end12",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
|
||||
"name" : "Stmt_for_body6",
|
||||
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i1, 0, i0, 0, i2, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "for.cond => for.end12",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
|
||||
"name" : "Stmt_for_body6",
|
||||
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i1, 0, i0, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "for.cond => for.end12",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
|
||||
"name" : "Stmt_for_body6",
|
||||
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i0, 0, i1, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "for.cond => for.end12",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
|
||||
"name" : "Stmt_for_body6",
|
||||
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end10",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S0",
|
||||
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
|
||||
"name" : "Stmt_S1",
|
||||
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end6",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 1, i1, 0]: i0 % 2 = 1 }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end6",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end6",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, -i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end6",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i1, 0, i0, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i1, 1, -i0, 0]: i1 % 2 = 1 }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
|
||||
"name" : "for.cond => for.end6",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[1, i0, 0, i1, 0]: i0 % 2 = 1 }"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: pragma simd reduction
|
||||
; CHECK: pragma omp parallel for reduction
|
||||
;
|
||||
; int prod;
|
||||
; void f() {
|
||||
; for (int i = 0; i < 100; i++)
|
||||
; prod *= i;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
@prod = common global i32 0, align 4
|
||||
|
||||
define void @f() {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%exitcond = icmp ne i32 %i1.0, 100
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%add2 = add nsw i32 %i1.0, 3
|
||||
%tmp1 = load i32* @prod, align 4
|
||||
%mul3 = mul nsw i32 %tmp1, %add2
|
||||
store i32 %mul3, i32* @prod, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add nsw i32 %i1.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue