Add OpenMP code generation to isl backend

This backend supports besides the classical code generation the upcoming SCEV
based code generation (which the existing CLooG backend does not support
robustly).

OpenMP code generation in the isl backend benefits from our run-time alias
checks such that the set of loops that can possibly be parallelized is a lot
larger.

The code was tested on LNT. We do not regress on builds without -polly-parallel.
When using -polly-parallel most tests work flawlessly, but a few issues still
remain and will be addressed in follow up commits.

SCEV/non-SCEV codegen:
  - Compile time failure in ldecod and TimberWolfMC due a problem in our
    run-time alias check generation triggered by pointers that escape through
    the OpenMP subfunction (OpenMP specific).

  - Several execution time failures. Due to the larger set of loops that we now
    parallelize (compared to the classical code generation),  we currently run
    into some timeouts in tests with a lot loops that have a low trip count and
    are slowed down by parallelizing them.

SCEV only:

  - One existing failure in lencod due to llvm.org/PR21204 (not OpenMP specific)

OpenMP code generation is the last feature that was only available in the CLooG
backend. With the isl backend being the only one supporting features such as
run-time alias checks and delinearization, we will soon switch to use the isl
ast generator by the default and subsequently remove our dependency on CLooG.

http://reviews.llvm.org/D5517

llvm-svn: 222088
This commit is contained in:
Tobias Grosser 2014-11-15 21:32:53 +00:00
parent 6d675f4e35
commit e3c0558e35
16 changed files with 1047 additions and 11 deletions

View File

@ -14,9 +14,9 @@
#include "polly/CodeGen/IRBuilder.h"
#include "isl/ast.h"
#include "llvm/ADT/MapVector.h"
#include <map>
#include "isl/ast.h"
namespace llvm {
class SCEVExpander;
@ -81,7 +81,7 @@ namespace polly {
class IslExprBuilder {
public:
/// @brief A map from isl_ids to llvm::Values.
typedef std::map<isl_id *, llvm::Value *> IDToValueTy;
typedef llvm::MapVector<isl_id *, llvm::Value *> IDToValueTy;
/// @brief Construct an IslExprBuilder.
///
@ -125,7 +125,7 @@ public:
private:
PollyIRBuilder &Builder;
std::map<isl_id *, llvm::Value *> &IDToValue;
IDToValueTy &IDToValue;
/// @brief A SCEVExpander to translate dimension sizes to llvm values.
llvm::SCEVExpander &Expander;

View File

@ -12,6 +12,7 @@
#ifndef POLLY_SCEV_VALIDATOR_H
#define POLLY_SCEV_VALIDATOR_H
#include "llvm/ADT/SetVector.h"
#include <vector>
namespace llvm {
@ -19,9 +20,24 @@ class Region;
class SCEV;
class ScalarEvolution;
class Value;
class Loop;
}
namespace polly {
/// @brief Find the loops referenced from a SCEV expression.
///
/// @param Expr The SCEV expression to scan for loops.
/// @param Loops A vector into which the found loops are inserted.
void findLoops(const llvm::SCEV *Expr,
llvm::SetVector<const llvm::Loop *> &Loops);
/// @brief Find the values referenced by SCEVUnknowns in a given SCEV
/// expression.
///
/// @param Expr The SCEV expression to scan for SCEVUnknowns.
/// @param Expr A vector into which the found values are inserted.
void findValues(const llvm::SCEV *Expr, llvm::SetVector<llvm::Value *> &Values);
/// Returns true when the SCEV contains references to instructions within the
/// region.
///

View File

@ -30,7 +30,11 @@
#include "polly/ScopInfo.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ScopHelper.h"
#include "polly/Support/SCEVValidator.h"
#include "polly/TempScopInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@ -56,11 +60,12 @@ using namespace llvm;
class IslNodeBuilder {
public:
IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P,
LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT)
: Builder(Builder), Annotator(Annotator),
const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE,
DominatorTree &DT, Scop &S)
: S(S), Builder(Builder), Annotator(Annotator),
Rewriter(new SCEVExpander(SE, "polly")),
ExprBuilder(Builder, IDToValue, *Rewriter), P(P), LI(LI), SE(SE),
DT(DT) {}
ExprBuilder(Builder, IDToValue, *Rewriter), P(P), DL(DL), LI(LI),
SE(SE), DT(DT) {}
~IslNodeBuilder() { delete Rewriter; }
@ -69,6 +74,7 @@ public:
IslExprBuilder &getExprBuilder() { return ExprBuilder; }
private:
Scop &S;
PollyIRBuilder &Builder;
ScopAnnotator &Annotator;
@ -77,10 +83,17 @@ private:
IslExprBuilder ExprBuilder;
Pass *P;
const DataLayout &DL;
LoopInfo &LI;
ScalarEvolution &SE;
DominatorTree &DT;
/// @brief The current iteration of out-of-scop loops
///
/// This map provides for a given loop a llvm::Value that contains the current
/// loop iteration.
LoopToScevMapT OutsideLoopIterations;
// This maps an isl_id* to the Value* it has in the generated program. For now
// on, the only isl_ids that are stored here are the newly calculated loop
// ivs.
@ -95,6 +108,12 @@ private:
/// @param Expr The expression to code generate.
Value *generateSCEV(const SCEV *Expr);
/// A set of Value -> Value remappings to apply when generating new code.
///
/// When generating new code for a ScopStmt this map is used to map certain
/// llvm::Values to new llvm::Values.
ValueMapT ValueMap;
// Extract the upper bound of this loop
//
// The isl code generation can generate arbitrary expressions to check if the
@ -119,10 +138,49 @@ private:
unsigned getNumberOfIterations(__isl_keep isl_ast_node *For);
/// Compute the values and loops referenced in this subtree.
///
/// This function looks at all ScopStmts scheduled below the provided For node
/// and finds the llvm::Value[s] and llvm::Loops[s] which are referenced but
/// not locally defined.
///
/// Values that can be synthesized or that are available as globals are
/// considered locally defined.
///
/// Loops that contain the scop or that are part of the scop are considered
/// locally defined. Loops that are before the scop, but do not contain the
/// scop itself are considered not locally defined.
///
/// @param For The node defining the subtree.
/// @param Values A vector that will be filled with the Values referenced in
/// this subtree.
/// @param Loops A vector that will be filled with the Loops referenced in
/// this subtree.
void getReferencesInSubtree(__isl_keep isl_ast_node *For,
SetVector<Value *> &Values,
SetVector<const Loop *> &Loops);
/// Change the llvm::Value(s) used for code generation.
///
/// When generating code certain values (e.g., references to induction
/// variables or array base pointers) in the original code may be replaced by
/// new values. This function allows to (partially) update the set of values
/// used. A typical use case for this function is the case when we continue
/// code generation in a subfunction/kernel function and need to explicitly
/// pass down certain values.
///
/// @param NewValues A map that maps certain llvm::Values to new llvm::Values.
void updateValues(ParallelLoopGenerator::ValueToValueMapTy &NewValues);
void createFor(__isl_take isl_ast_node *For);
void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
void createForSequential(__isl_take isl_ast_node *For);
/// Create LLVM-IR that executes a for node thread parallel.
///
/// @param For The FOR isl_ast_node for which code is generated.
void createForParallel(__isl_take isl_ast_node *For);
/// Generate LLVM-IR that computes the values of the original induction
/// variables in function of the newly generated loop induction variables.
///
@ -238,6 +296,98 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
return NumberOfIterations + 1;
}
struct FindValuesUser {
LoopInfo &LI;
ScalarEvolution &SE;
Region &R;
SetVector<Value *> &Values;
SetVector<const SCEV *> &SCEVs;
};
/// Extract the values and SCEVs needed to generate code for a ScopStmt.
///
/// This function extracts a ScopStmt from a given isl_set and computes the
/// Values this statement depends on as well as a set of SCEV expressions that
/// need to be synthesized when generating code for this statment.
static int findValuesInStmt(isl_set *Set, void *UserPtr) {
isl_id *Id = isl_set_get_tuple_id(Set);
struct FindValuesUser &User = *static_cast<struct FindValuesUser *>(UserPtr);
const ScopStmt *Stmt = static_cast<const ScopStmt *>(isl_id_get_user(Id));
const BasicBlock *BB = Stmt->getBasicBlock();
// Check all the operands of instructions in the basic block.
for (const Instruction &Inst : *BB) {
for (Value *SrcVal : Inst.operands()) {
if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal))
if (canSynthesize(OpInst, &User.LI, &User.SE, &User.R)) {
User.SCEVs.insert(
User.SE.getSCEVAtScope(OpInst, User.LI.getLoopFor(BB)));
continue;
}
if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal))
if (Stmt->getParent()->getRegion().contains(OpInst))
continue;
if (isa<Instruction>(SrcVal) || isa<Argument>(SrcVal))
User.Values.insert(SrcVal);
}
}
isl_id_free(Id);
isl_set_free(Set);
return 0;
}
void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
SetVector<Value *> &Values,
SetVector<const Loop *> &Loops) {
SetVector<const SCEV *> SCEVs;
struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs};
for (const auto &I : IDToValue)
Values.insert(I.second);
for (const auto &I : OutsideLoopIterations)
Values.insert(cast<SCEVUnknown>(I.second)->getValue());
isl_union_set *Schedule = isl_union_map_domain(IslAstInfo::getSchedule(For));
isl_union_set_foreach_set(Schedule, findValuesInStmt, &FindValues);
isl_union_set_free(Schedule);
for (const SCEV *Expr : SCEVs) {
findValues(Expr, Values);
findLoops(Expr, Loops);
}
Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
/// Remove loops that contain the scop or that are part of the scop, as they
/// are considered local. This leaves only loops that are before the scop, but
/// do not contain the scop itself.
Loops.remove_if([this](const Loop *L) {
return this->S.getRegion().contains(L) ||
L->contains(S.getRegion().getEntry());
});
}
void IslNodeBuilder::updateValues(
ParallelLoopGenerator::ValueToValueMapTy &NewValues) {
SmallPtrSet<Value *, 5> Inserted;
for (const auto &I : IDToValue) {
IDToValue[I.first] = NewValues[I.second];
Inserted.insert(I.second);
}
for (const auto &I : NewValues) {
if (Inserted.count(I.first))
continue;
ValueMap[I.first] = I.second;
}
}
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
std::vector<Value *> &IVS,
__isl_take isl_id *IteratorID,
@ -315,7 +465,7 @@ void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
llvm_unreachable("Unhandled isl_ast_node in vectorizer");
}
IDToValue.erase(IteratorID);
IDToValue.erase(IDToValue.find(IteratorID));
isl_id_free(IteratorID);
isl_union_map_free(Schedule);
@ -379,7 +529,7 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
Annotator.popLoop(Parallel);
IDToValue.erase(IteratorID);
IDToValue.erase(IDToValue.find(IteratorID));
Builder.SetInsertPoint(ExitBlock->begin());
@ -388,6 +538,139 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
isl_id_free(IteratorID);
}
/// @brief Remove the BBs contained in a (sub)function from the dominator tree.
///
/// This function removes the basic blocks that are part of a subfunction from
/// the dominator tree. Specifically, when generating code it may happen that at
/// some point the code generation continues in a new sub-function (e.g., when
/// generating OpenMP code). The basic blocks that are created in this
/// sub-function are then still part of the dominator tree of the original
/// function, such that the dominator tree reaches over function boundaries.
/// This is not only incorrect, but also causes crashes. This function now
/// removes from the dominator tree all basic blocks that are dominated (and
/// consequently reachable) from the entry block of this (sub)function.
///
/// FIXME: A LLVM (function or region) pass should not touch anything outside of
/// the function/region it runs on. Hence, the pure need for this function shows
/// that we do not comply to this rule. At the moment, this does not cause any
/// issues, but we should be aware that such issues may appear. Unfortunately
/// the current LLVM pass infrastructure does not allow to make Polly a module
/// or call-graph pass to solve this issue, as such a pass would not have access
/// to the per-function analyses passes needed by Polly. A future pass manager
/// infrastructure is supposed to enable such kind of access possibly allowing
/// us to create a cleaner solution here.
///
/// FIXME: Instead of adding the dominance information and then dropping it
/// later on, we should try to just not add it in the first place. This requires
/// some careful testing to make sure this does not break in interaction with
/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
/// which may try to update it.
///
/// @param F The function which contains the BBs to removed.
/// @param DT The dominator tree from which to remove the BBs.
static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
DomTreeNode *N = DT.getNode(&F->getEntryBlock());
std::vector<BasicBlock *> Nodes;
// We can only remove an element from the dominator tree, if all its children
// have been removed. To ensure this we obtain the list of nodes to remove
// using a post-order tree traversal.
for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I)
Nodes.push_back(I->getBlock());
for (BasicBlock *BB : Nodes)
DT.eraseNode(BB);
}
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
isl_ast_node *Body;
isl_ast_expr *Init, *Inc, *Iterator, *UB;
isl_id *IteratorID;
Value *ValueLB, *ValueUB, *ValueInc;
Type *MaxType;
Value *IV;
CmpInst::Predicate Predicate;
Body = isl_ast_node_for_get_body(For);
Init = isl_ast_node_for_get_init(For);
Inc = isl_ast_node_for_get_inc(For);
Iterator = isl_ast_node_for_get_iterator(For);
IteratorID = isl_ast_expr_get_id(Iterator);
UB = getUpperBound(For, Predicate);
ValueLB = ExprBuilder.create(Init);
ValueUB = ExprBuilder.create(UB);
ValueInc = ExprBuilder.create(Inc);
// OpenMP always uses SLE. In case the isl generated AST uses a SLT
// expression, we need to adjust the loop blound by one.
if (Predicate == CmpInst::ICMP_SLT)
ValueUB = Builder.CreateAdd(
ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
MaxType = ExprBuilder.getType(Iterator);
MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
if (MaxType != ValueLB->getType())
ValueLB = Builder.CreateSExt(ValueLB, MaxType);
if (MaxType != ValueUB->getType())
ValueUB = Builder.CreateSExt(ValueUB, MaxType);
if (MaxType != ValueInc->getType())
ValueInc = Builder.CreateSExt(ValueInc, MaxType);
BasicBlock::iterator LoopBody;
SetVector<Value *> SubtreeValues;
SetVector<const Loop *> Loops;
getReferencesInSubtree(For, SubtreeValues, Loops);
// Create for all loops we depend on values that contain the current loop
// iteration. These values are necessary to generate code for SCEVs that
// depend on such loops. As a result we need to pass them to the subfunction.
for (const Loop *L : Loops) {
const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
SE.getUnknown(Builder.getInt64(1)),
L, SCEV::FlagAnyWrap);
Value *V = generateSCEV(OuterLIV);
OutsideLoopIterations[L] = SE.getUnknown(V);
SubtreeValues.insert(V);
}
ParallelLoopGenerator::ValueToValueMapTy NewValues;
ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL);
IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc,
SubtreeValues, NewValues, &LoopBody);
BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
Builder.SetInsertPoint(LoopBody);
// Save the current values.
ValueMapT ValueMapCopy = ValueMap;
IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
updateValues(NewValues);
IDToValue[IteratorID] = IV;
create(Body);
// Restore the original values.
ValueMap = ValueMapCopy;
IDToValue = IDToValueCopy;
Builder.SetInsertPoint(AfterLoop);
removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
for (const Loop *L : Loops)
OutsideLoopIterations.erase(L);
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
isl_id_free(IteratorID);
}
void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
bool Vector = PollyVectorizerChoice != VECTORIZER_NONE;
@ -399,6 +682,11 @@ void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
return;
}
}
if (IslAstInfo::isExecutedInParallel(For)) {
createForParallel(For);
return;
}
createForSequential(For);
}
@ -474,6 +762,12 @@ void IslNodeBuilder::createSubstitutions(isl_ast_expr *Expr, ScopStmt *Stmt,
}
}
// Add the current ValueMap to our per-statement value map.
//
// This is needed e.g. to rewrite array base addresses when moving code
// into a parallely executed subfunction.
VMap.insert(ValueMap.begin(), ValueMap.end());
isl_ast_expr_free(Expr);
}
@ -506,6 +800,8 @@ void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
Id = isl_ast_expr_get_id(StmtExpr);
isl_ast_expr_free(StmtExpr);
LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
Stmt = (ScopStmt *)isl_id_get_user(Id);
createSubstitutions(Expr, Stmt, VMap, LTS);
@ -558,6 +854,27 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
isl_id_free(Id);
}
// Generate values for the current loop iteration for all surrounding loops.
//
// We may also reference loops outside of the scop which do not contain the
// scop itself, but as the number of such scops may be arbitrarily large we do
// not generate code for them here, but only at the point of code generation
// where these values are needed.
Region &R = S.getRegion();
Loop *L = LI.getLoopFor(R.getEntry());
while (L != nullptr && R.contains(L))
L = L->getParentLoop();
while (L != nullptr) {
const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
SE.getUnknown(Builder.getInt64(1)),
L, SCEV::FlagAnyWrap);
Value *V = generateSCEV(OuterLIV);
OutsideLoopIterations[L] = SE.getUnknown(V);
L = L->getParentLoop();
}
isl_set_free(Context);
}
@ -574,6 +891,9 @@ public:
IslCodeGeneration() : ScopPass(ID) {}
/// @brief The datalayout used
const DataLayout *DL;
/// @name The analysis passes we need to generate code.
///
///{
@ -605,6 +925,7 @@ public:
AI = &getAnalysis<IslAstInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
DL = &getAnalysis<DataLayoutPass>().getDataLayout();
assert(!S.getRegion().isTopLevelRegion() &&
"Top level regions are not supported");
@ -616,7 +937,7 @@ public:
BasicBlock *EnteringBB = simplifyRegion(&S, this);
PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
IslNodeBuilder NodeBuilder(Builder, Annotator, this, *LI, *SE, *DT);
IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S);
NodeBuilder.addParameters(S.getContext());
Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder());
@ -630,6 +951,7 @@ public:
virtual void printScop(raw_ostream &OS) const {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DataLayoutPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<IslAstInfo>();
AU.addRequired<RegionInfoPass>();

View File

@ -461,6 +461,48 @@ private:
};
namespace polly {
/// Find all loops referenced in SCEVAddRecExprs.
class SCEVFindLoops {
SetVector<const Loop *> &Loops;
public:
SCEVFindLoops(SetVector<const Loop *> &Loops) : Loops(Loops) {}
bool follow(const SCEV *S) {
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S))
Loops.insert(AddRec->getLoop());
return true;
}
bool isDone() { return false; }
};
void findLoops(const SCEV *Expr, SetVector<const Loop *> &Loops) {
SCEVFindLoops FindLoops(Loops);
SCEVTraversal<SCEVFindLoops> ST(FindLoops);
ST.visitAll(Expr);
}
/// Find all values referenced in SCEVUnknowns.
class SCEVFindValues {
SetVector<Value *> &Values;
public:
SCEVFindValues(SetVector<Value *> &Values) : Values(Values) {}
bool follow(const SCEV *S) {
if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(S))
Values.insert(Unknown->getValue());
return true;
}
bool isDone() { return false; }
};
void findValues(const SCEV *Expr, SetVector<Value *> &Values) {
SCEVFindValues FindValues(Values);
SCEVTraversal<SCEVFindValues> ST(FindValues);
ST.visitAll(Expr);
}
bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) {
return SCEVInRegionDependences::hasDependences(Expr, R);
}

View File

@ -0,0 +1,55 @@
; RUN: opt %loadPolly -basicaa -tbaa -polly-parallel -polly-codegen-isl -polly-code-generator=isl -S < %s | FileCheck %s
; CHECK: polly.split_new_and_old:
; CHECK-NOT: polly.split_new_and_old:
; CHECK: @CalculateQuant8Param.polly.subfn
; In this test case the first loop is only detected as a scop because
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@active_sps = external global [8 x i32]*
define void @CalculateQuant8Param() {
entry:
%present = alloca i64, align 8
%present19 = bitcast i64* %present to i8*
br label %for.1
for.1:
%indvar.1 = phi i64 [ %indvar.1.next, %for.1 ], [ 0, %entry ]
%tmp = load [8 x i32]** @active_sps, !tbaa !1
%arrayidx.1b = getelementptr [8 x i32]* %tmp, i32 0, i64 0
%tmp1 = load i32* %arrayidx.1b, !tbaa !5
%arrayidx.1a = getelementptr i8* %present19, i64 0
%arrayidx.1c = bitcast i8* %arrayidx.1a to i32*
store i32 %tmp1, i32* %arrayidx.1c
%indvar.1.next = add i64 %indvar.1, 1
br i1 false, label %for.1, label %fence
fence:
fence seq_cst
br label %for.2
for.2:
%indvar.2 = phi i64 [ %indvar.2.next, %for.2 ], [ 0, %fence ]
%uglygep = getelementptr i8* %present19, i64 %indvar.2
%arrayidx.2 = bitcast i8* %uglygep to i32*
store i32 42, i32* %arrayidx.2
%indvar.2.next = add i64 %indvar.2, 1
%exitcond18 = icmp ne i64 %indvar.2.next, 2
br i1 %exitcond18, label %for.2, label %end
end:
ret void
}
!1 = metadata !{metadata !2, metadata !2, i64 0}
!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
!5 = metadata !{metadata !6, metadata !6, i64 0}
!6 = metadata !{metadata !"int", metadata !3, i64 0}

View File

@ -0,0 +1,44 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; This code has failed the scev based code generation as the scev in the scop
; contains an AddRecExpr of an outer loop. When generating code, we did not
; properly forward the value of this expression to the subfunction.
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= 1023; c1 += 1)
; AST: Stmt_for_j(c1);
; IR: @single_parallel_loop.polly.subfn
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
define void @single_parallel_loop() nounwind {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc], [ 0, %entry ]
br label %for.j
for.j:
%indvar.j = phi i64 [ %indvar.j.next, %for.j], [ 0, %for.i ]
%sum = add i64 %indvar.j, %indvar.i
%scevgep = getelementptr [1024 x float]* @A, i64 0, i64 %sum
store float 0.0, float *%scevgep
%indvar.j.next = add i64 %indvar.j, 1
%exitcond.j = icmp slt i64 %indvar.j.next, 1024
br i1 %exitcond.j, label %for.j, label %for.i.inc
for.i.inc:
fence seq_cst
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, 1024
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}

View File

@ -0,0 +1,36 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR-SCEV
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= 1023; c1 += 1)
; AST: Stmt_for_i(c1);
; IR: %[[gep:[._a-zA-Z0-9]*]] = getelementptr inbounds { [1024 x double]*, i64 }* %polly.par.userContext, i32 0, i32 1
; IR: store i64 %extern, i64* %[[gep]]
; IR-SCEV: getelementptr inbounds { [1024 x double]* }* %polly.par.userContext, i32 0, i32 0
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @kernel_trmm([1024 x double]* %B) {
entry:
br label %for.cond1.preheader
for.cond1.preheader:
%extern = add i64 1, 0
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i ], [ 0, %for.cond1.preheader ]
%getelementptr = getelementptr [1024 x double]* %B, i64 %extern, i64 %indvar.i
store double 0.000000e+00, double* %getelementptr
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, 1024
br i1 %exitcond.i, label %for.i, label %end
end:
ret void
}

View File

@ -0,0 +1,65 @@
; RUN: opt %loadPolly -basicaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; The interesting part of this test case is the instruction:
; %tmp = bitcast i8* %call to i64**
; which is not part of the scop. In the SCEV based code generation not '%tmp',
; but %call is a parameter of the SCoP and we need to make sure its value is
; properly forwarded to the subfunction.
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 < cols; c1 += 1)
; AST: Stmt_for_body(c1);
; IR: @foo.polly.subfn
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i64 %cols, i8* noalias %call) {
entry:
%tmp = bitcast i8* %call to i64**
br label %for.body
for.body:
%indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i64** %tmp, i64 0
%tmp1 = load i64** %arrayidx, align 8
%arrayidx.2 = getelementptr inbounds i64* %tmp1, i64 %indvar
store i64 1, i64* %arrayidx.2, align 4
%indvar.next = add nsw i64 %indvar, 1
%cmp = icmp slt i64 %indvar.next, %cols
br i1 %cmp, label %for.body, label %end
end:
ret void
}
; Another variation of this test case, now with even more of the index
; expression defined outside of the scop.
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 < cols; c1 += 1)
; AST: Stmt_for_body(c1);
; IR: @bar.polly.subfn
define void @bar(i64 %cols, i8* noalias %call) {
entry:
%tmp = bitcast i8* %call to i64**
%arrayidx = getelementptr inbounds i64** %tmp, i64 0
br label %for.body
for.body:
%indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
%tmp1 = load i64** %arrayidx, align 8
%arrayidx.2 = getelementptr inbounds i64* %tmp1, i64 %indvar
store i64 1, i64* %arrayidx.2, align 4
%indvar.next = add nsw i64 %indvar, 1
%cmp = icmp slt i64 %indvar.next, %cols
br i1 %cmp, label %for.body, label %end
end:
ret void
}

View File

@ -0,0 +1,48 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR
; Make sure we correctly forward the reference to 'A' to the OpenMP subfunction.
;
; void loop_references_outer_ids(float *A) {
; for (long i = 0; i < 100; i++)
; A[i] = i;
; }
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= 99; c1 += 1)
; AST: Stmt_for_body(c1);
; IR-LABEL: polly.start:
; IR-NEXT: %0 = bitcast { float* }* %polly.par.userContext to i8*
; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0)
; IR-NEXT: %1 = getelementptr inbounds { float* }* %polly.par.userContext, i32 0, i32 0
; IR-NEXT: store float* %A, float** %1
; IR-NEXT: %polly.par.userContext1 = bitcast { float* }* %polly.par.userContext to i8*
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @loop_references_outer_ids(float* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i64 %i.0, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%conv = sitofp i64 %i.0 to float
%arrayidx = getelementptr inbounds float* %A, i64 %i.0
store float %conv, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i64 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,102 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR
;
; float A[100];
;
; void loop_references_outer_ids(long n) {
; for (long i = 0; i < 100; i++)
; for (long j = 0; j < 100; j++)
; for (long k = 0; k < n + i; k++)
; A[j] += i + j + k;
; }
; In this test case we verify that the j-loop is generated as OpenMP parallel
; loop and that the values of 'i' and 'n', needed in the loop bounds of the
; k-loop, are correctly passed to the subfunction.
; AST: #pragma minimal dependence distance: 1
; AST: for (int c1 = max(0, -n + 1); c1 <= 99; c1 += 1)
; AST: #pragma omp parallel for
; AST: for (int c3 = 0; c3 <= 99; c3 += 1)
; AST: #pragma minimal dependence distance: 1
; AST: for (int c5 = 0; c5 < n + c1; c5 += 1)
; AST: Stmt_for_body6(c1, c3, c5);
; IR: %polly.par.userContext = alloca { i64, i64 }
; IR: %4 = bitcast { i64, i64 }* %polly.par.userContext to i8*
; IR-NEXT: call void @llvm.lifetime.start(i64 16, i8* %4)
; IR-NEXT: %5 = getelementptr inbounds { i64, i64 }* %polly.par.userContext, i32 0, i32 0
; IR-NEXT: store i64 %n, i64* %5
; IR-NEXT: %6 = getelementptr inbounds { i64, i64 }* %polly.par.userContext, i32 0, i32 1
; IR-NEXT: store i64 %polly.indvar, i64* %6
; IR-NEXT: %polly.par.userContext1 = bitcast { i64, i64 }* %polly.par.userContext to i8*
; IR-LABEL: @loop_references_outer_ids.polly.subfn(i8* %polly.par.userContext)
; IR: %polly.par.userContext1 = bitcast i8* %polly.par.userContext to { i64, i64 }*
; IR-NEXT: %0 = getelementptr inbounds { i64, i64 }* %polly.par.userContext1, i32 0, i32 0
; IR-NEXT: %1 = load i64* %0
; IR-NEXT: %2 = getelementptr inbounds { i64, i64 }* %polly.par.userContext1, i32 0, i32 1
; IR-NEXT: %3 = load i64* %2
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@A = common global [100 x float] zeroinitializer, align 16
define void @loop_references_outer_ids(i64 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc13, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]
%exitcond1 = icmp ne i64 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.end15
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc10, %for.body
%j.0 = phi i64 [ 0, %for.body ], [ %inc11, %for.inc10 ]
%exitcond = icmp ne i64 %j.0, 100
br i1 %exitcond, label %for.body3, label %for.end12
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ]
%add = add nsw i64 %i.0, %n
%cmp5 = icmp slt i64 %k.0, %add
br i1 %cmp5, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%add7 = add nsw i64 %i.0, %j.0
%add8 = add nsw i64 %add7, %k.0
%conv = sitofp i64 %add8 to float
%arrayidx = getelementptr inbounds [100 x float]* @A, i64 0, i64 %j.0
%tmp = load float* %arrayidx, align 4
%add9 = fadd float %tmp, %conv
store float %add9, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i64 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc10
for.inc10: ; preds = %for.end
%inc11 = add nsw i64 %j.0, 1
br label %for.cond1
for.end12: ; preds = %for.cond1
br label %for.inc13
for.inc13: ; preds = %for.end12
%inc14 = add nsw i64 %i.0, 1
br label %for.cond
for.end15: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,27 @@
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; IR: @foo.polly.subfn
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i32 %sendcount, i8* %recvbuf) {
entry:
br label %sw.bb3
sw.bb3:
%tmp = bitcast i8* %recvbuf to double*
%cmp75 = icmp sgt i32 %sendcount, 0
br i1 %cmp75, label %for.body, label %end
for.body:
%i.16 = phi i32 [ %inc14, %for.body ], [ 0, %sw.bb3 ]
%idxprom11 = sext i32 %i.16 to i64
%arrayidx12 = getelementptr inbounds double* %tmp, i64 %idxprom11
store double 1.0, double* %arrayidx12, align 8
%inc14 = add nsw i32 %i.16, 1
%cmp7 = icmp slt i32 %inc14, %sendcount
br i1 %cmp7, label %for.body, label %end
end:
ret void
}

View File

@ -0,0 +1,47 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; - Test the case where scalar evolution references a loop that is outside
; of the scop, but does not contain the scop.
; - Test the case where two parallel subfunctions are created.
; AST: if (symbol >= p_2 + 1) {
; AST-NEXT: #pragma simd
; AST-NEXT: #pragma omp parallel for
; AST-NEXT: for (int c1 = 0; c1 < p_0 + symbol; c1 += 1)
; AST-NEXT: Stmt_while_body(c1);
; AST-NEXT: } else
; AST-NEXT: #pragma simd
; AST-NEXT: #pragma omp parallel for
; AST-NEXT: for (int c1 = 0; c1 <= p_0 + p_2; c1 += 1)
; AST-NEXT: Stmt_while_body(c1);
; IR: @update_model.polly.subfn
; IR: @update_model.polly.subfn1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@cum_freq = external global [258 x i64], align 16
define void @update_model(i64 %symbol) {
entry:
br label %for.one
for.one:
%i.1 = phi i64 [ %dec17, %for.one ], [ %symbol, %entry ]
%dec17 = add nsw i64 %i.1, -1
br i1 undef, label %for.one, label %while.body
while.body:
%indvar = phi i64 [ %sub42, %while.body ], [ %i.1, %for.one ]
%sub42 = add nsw i64 %indvar, -1
%arrayidx44 = getelementptr inbounds [258 x i64]* @cum_freq, i64 0, i64 %sub42
store i64 1, i64* %arrayidx44, align 4
%cmp40 = icmp sgt i64 %sub42, 0
br i1 %cmp40, label %while.body, label %while.end
while.end:
ret void
}

View File

@ -0,0 +1,118 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4
; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR-STRIDE4
; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s -check-prefix=IR-STRIDE4
; This extensive test case tests the creation of the full set of OpenMP calls
; as well as the subfunction creation using a trivial loop as example.
; #define N 1024
; float A[N];
;
; void single_parallel_loop(void) {
; for (long i = 0; i < N; i++)
; A[i] = 1;
; }
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= 1023; c1 += 1)
; AST: Stmt_S(c1);
; AST-STRIDE4: #pragma omp parallel for
; AST-STRIDE4: for (int c1 = 0; c1 <= 1023; c1 += 4)
; AST-STRIDE4: #pragma simd
; AST-STRIDE4: for (int c2 = c1; c2 <= c1 + 3; c2 += 1)
; AST-STRIDE4: Stmt_S(c2);
; IR-LABEL: single_parallel_loop()
; IR-NEXT: entry
; IR-NEXT: %polly.par.userContext = alloca
; IR-LABEL: polly.start:
; IR-NEXT: %0 = bitcast {}* %polly.par.userContext to i8*
; IR-NEXT: call void @llvm.lifetime.start(i64 0, i8* %0)
; IR-NEXT: %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
; IR-NEXT: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1)
; IR-NEXT: call void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext1)
; IR-NEXT: call void @GOMP_parallel_end()
; IR-NEXT: %1 = bitcast {}* %polly.par.userContext to i8*
; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %1)
; IR-NEXT: br label %polly.merge_new_and_old
; IR: define internal void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext) #1
; IR-LABEL: polly.par.setup:
; IR-NEXT: %polly.par.LBPtr = alloca i64
; IR-NEXT: %polly.par.UBPtr = alloca i64
; IR-NEXT: %polly.par.userContext1 =
; IR: br label %polly.par.checkNext
; IR-LABEL: polly.par.exit:
; IR-NEXT: call void @GOMP_loop_end_nowait()
; IR-NEXT: ret void
; IR-LABEL: polly.par.checkNext:
; IR-NEXT: %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr)
; IR-NEXT: %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0
; IR-NEXT: br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit
; IR-LABEL: polly.par.loadIVBounds:
; IR-NEXT: %polly.par.LB = load i64* %polly.par.LBPtr
; IR-NEXT: %polly.par.UB = load i64* %polly.par.UBPtr
; IR-NEXT: %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1
; IR-NEXT: br label %polly.loop_preheader
; IR-LABEL: polly.loop_exit:
; IR-NEXT: br label %polly.par.checkNext
; IR-LABEL: polly.loop_header:
; IR-NEXT: %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ]
; IR-NEXT: br label %polly.stmt.S
; IR-LABEL: polly.stmt.S:
; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
; IR-NEXT: store float 1.000000e+00, float* %[[gep]]
; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
; IR-NEXT: %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 1
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
; IR-LABEL: polly.loop_preheader:
; IR-NEXT: br label %polly.loop_header
; IR: attributes #1 = { "polly.skip.fn" }
; IR-STRIDE4: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4)
; IR-STRIDE4: add nsw i64 %polly.indvar, 3
; IR-STRIDE4: %polly.indvar_next = add nsw i64 %polly.indvar, 4
; IR-STRIDE4 %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
define void @single_parallel_loop() nounwind {
entry:
br label %for.i
for.i:
%indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
%scevgep = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %S, label %exit
S:
store float 1.0, float* %scevgep
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.i
exit:
ret void
}

View File

@ -0,0 +1,50 @@
; RUN: opt %loadPolly -tbaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; #define N 1024
; float A[N];
;
; void single_parallel_loop(void) {
; for (long i = 0; i < N; i++)
; A[i] = 1;
; }
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= 1023; c1 += 1)
; AST: Stmt_S(c1);
; IR: @single_parallel_loop.polly.subfn
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @single_parallel_loop(float** %A) nounwind {
entry:
br label %for.i
for.i:
%indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %S, label %exit
S:
%ptr = load float** %A, !tbaa !2
%scevgep = getelementptr float* %ptr, i64 %indvar
%val = load float* %scevgep, !tbaa !6
%sum = fadd float %val, 1.0
store float %sum, float* %scevgep, !tbaa !6
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.i
exit:
ret void
}
!2 = metadata !{metadata !"float", metadata !3, i64 0}
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
!6 = metadata !{metadata !"float *ptr", metadata !3, i64 0}

View File

@ -0,0 +1,17 @@
{
"context" : "{ : }",
"name" : "for.i => exit",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_S[i0] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_S[i0] : i0 >= 0 and i0 <= 1023 }",
"name" : "Stmt_S",
"schedule" : "{ Stmt_S[i0] -> scattering[0, floor(i0/4) * 4, i0] }"
}
]
}

View File

@ -0,0 +1,47 @@
; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST
; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR
; This test case verifies that we create correct code even if two OpenMP loops
; share common outer variables.
; AST: if (nj >= p_1 + 3) {
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 < p_0 + nj - 1; c1 += 1)
; AST: Stmt_for_body35(c1);
; AST: } else
; AST: #pragma simd
; AST: #pragma omp parallel for
; AST: for (int c1 = 0; c1 <= p_0 + p_1; c1 += 1)
; AST: Stmt_for_body35(c1);
; IR: @foo.polly.subfn
; IR: @foo.polly.subfn1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i64 %nj, [512 x double]* %R) {
entry:
br label %for.cond1.preheader
for.cond1.preheader:
%k.014 = phi i64 [ %inc87, %for.inc86 ], [ 0, %entry ]
%j.010 = add nsw i64 %k.014, 1
br i1 undef, label %for.body35, label %for.inc86
for.body35:
%j.012 = phi i64 [ %j.0, %for.body35 ], [ %j.010, %for.cond1.preheader ]
%arrayidx39 = getelementptr inbounds [512 x double]* %R, i64 0, i64 %j.012
store double 0.000000e+00, double* %arrayidx39
%j.0 = add nsw i64 %j.012, 1
%cmp34 = icmp slt i64 %j.0, %nj
br i1 %cmp34, label %for.body35, label %for.inc86
for.inc86:
%inc87 = add nsw i64 %k.014, 1
br i1 undef, label %for.cond1.preheader, label %for.end88
for.end88:
ret void
}